Changeset 6104 for trunk/gsdl3
- Timestamp:
- 2003-12-03T09:42:06+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier
- Files:
-
- 3 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/ClassifierInterface.java
r5950 r6104 1 1 package org.greenstone.gsdl3.gs3build.classifier; 2 3 import java.util.List; 2 4 3 5 import org.greenstone.gsdl3.gs3build.doctypes.DocumentID; … … 6 8 public interface ClassifierInterface 7 9 { 8 /** 9 * Initialise the classifier as required... 10 */ 11 public void configure(String outputDir); 10 /** 11 * Initialise the classifier as required. The parameters given are specific 12 * to the classifier type, so this interface does not guarantee any interpretations 13 * of any given parameter. 14 * 15 * @param <code>List</code> the parameters with which to configure the classifier. 16 */ 17 public void configure(List parameters); 12 18 13 /** 14 * Classify an individual document - the document should be informed of any 15 * classification nodes which it is assigned to, so that it can obtain knowledge 16 * of any "parents". These are assigned into the "classified" metadata item in 17 * the "gsdl3" namespace... 18 */ 19 public boolean classifyDocument(DocumentID documentID, DocumentInterface document); 19 public void setDatabase(org.greenstone.gsdl3.gs3build.util.GS3SQLConnection connection); 20 20 21 public void startClassifierPass(int pass); 22 23 public int getClassifierPasses(); 24 25 public void endClassifierPass(int pass); 26 27 /** 28 * Called at the end of the classification sequence - the classifier should 29 * save its state at this point... 30 */ 31 public void completeClassification(); 21 /** 22 * Classify an individual document - the document should be informed of any 23 * classification nodes which it is assigned to, so that it can obtain knowledge 24 * of any "parents". These are assigned into the "classified" metadata item in 25 * the "gsdl3" namespace... 26 */ 27 public boolean classifyDocument(DocumentID documentID, DocumentInterface document); 28 29 public void startClassifierPass(int pass); 30 31 public int getClassifierPasses(); 32 33 public void endClassifierPass(int pass); 34 35 /** 36 * Called at the end of the classification sequence - the classifier should 37 * save its state at this point... 38 */ 39 public void completeClassification(); 32 40 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/ClassifierManager.java
r5950 r6104 1 1 package org.greenstone.gsdl3.gs3build.classifier; 2 2 3 import java.util.List; 3 4 import java.util.Iterator; 4 5 … … 6 7 import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface; 7 8 import org.greenstone.gsdl3.gs3build.doctypes.DocumentList; 9 import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection; 8 10 9 11 public class ClassifierManager 10 12 { 11 DocumentList documents; 12 ClassifierInterface [] list; 13 int size; 14 int used; 13 DocumentList documents; 14 ClassifierInterface [] list; 15 int size; 16 int used; 17 GS3SQLConnection database; 15 18 16 public ClassifierManager(DocumentList documentList) 17 { this.list = new ClassifierInterface[10]; 18 this.size = 10; 19 this.used = 0; 20 this.documents = documentList; 21 } 19 /** 20 * Instantiate a classifier manager that will classify the 21 * documents in a given <code>DocumentList</code> object. 22 * The list of documents should not change during the 23 * <code>classifyDocuments</code> function, and the documents 24 * themselves may be altered at that time. 25 * 26 * @param <code>DocumentList</code> a provider of documents for 27 * classification. 28 */ 29 public ClassifierManager(DocumentList documentList, GS3SQLConnection database) 30 { this.list = new ClassifierInterface[10]; 31 this.size = 10; 32 this.used = 0; 33 this.documents = documentList; 34 this.database = database; 35 } 22 36 23 /** 24 * Add a classifier to the end of the list of classifiers... 25 * 26 * @param <code>ClassifierInterface</code> the classifier. 27 */ 28 public void addClassifier(ClassifierInterface classifier) 29 { this.ensureSize(this.used + 1); 30 this.list[this.used] = classifier; 31 this.used ++; 32 } 37 /** 38 * Add a classifier to the end of the list of classifiers... 39 * 40 * @param <code>ClassifierInterface</code> the classifier. 41 */ 42 public void addClassifier(ClassifierInterface classifier) 43 { this.ensureSize(this.used + 1); 44 this.list[this.used] = classifier; 45 this.used ++; 33 46 34 public void classifyDocument(DocumentID docId, DocumentInterface document) 35 { for (int i = 0; i < this.used; i ++) 36 { this.list[i].classifyDocument(docId, document); 37 } 38 } 47 classifier.setDatabase(this.database); 48 } 49 50 /** 51 * Classify an individual document - the document itself may be written 52 * to in consequence, in which case the document <strong>must</strong> 53 * be rewritten to disk. This function does <strong>not</strong> do the 54 * writing itself. 55 * 56 * @param <code>DocumentID</code> the document's identifier. 57 * @param <code>DocumentInterface</code> the document itself. 58 */ 59 public void classifyDocument(DocumentID docId, DocumentInterface document) 60 { for (int i = 0; i < this.used; i ++) 61 { this.list[i].classifyDocument(docId, document); 62 } 63 } 39 64 40 65 /** … … 55 80 System.out.println("Ending document"); 56 81 } 82 83 if (document.isModified()) { 84 this.documents.modifiedDocument(document); 85 } 57 86 } 58 87 } … … 71 100 } 72 101 } 102 103 public static ClassifierInterface loadClassifier(String classifierName, List params) 104 { 105 try { 106 ClassifierInterface classifier = 107 (ClassifierInterface) Class.forName("org.greenstone.gsdl3.gs3build.classifier."+classifierName).newInstance(); 108 if (classifier != null) { 109 classifier.configure(params); 110 } 111 return classifier; 112 } 113 catch (ClassNotFoundException ex) { 114 System.err.println("Attempt to load classifier " + classifierName + " which does not exist"); 115 } 116 catch (InstantiationException instEx) { 117 System.err.println("Unable to create classifier " + classifierName); 118 } 119 catch (IllegalAccessException accessEx) { 120 System.err.println("Unable to instantiate classifier " + classifierName + " due to permissions problems " + accessEx.toString()); 121 } 122 return null; 123 } 73 124 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/HierarchyClassifier.java
r5950 r6104 17 17 import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface; 18 18 import org.greenstone.gsdl3.gs3build.util.XMLTools; 19 import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection; 19 20 20 21 public class HierarchyClassifier implements ClassifierInterface 21 22 { 22 class HierarchyClassifierObserver 23 { List classifications; 24 DocumentInterface document; 25 26 public HierarchyClassifierObserver(DocumentInterface document) 27 { this.document = document; 28 } 29 30 public void recordClassification(String label) 31 { this.document.addDocumentMetadata("gsdl3", "classified", label); 32 } 33 34 public List getClassifications() 35 { return this.classifications; 36 } 23 class HierarchyClassifierObserver 24 { List classifications; 25 DocumentInterface document; 26 27 public HierarchyClassifierObserver(DocumentInterface document) 28 { this.document = document; 29 } 30 31 public void recordClassification(String label) 32 { this.document.addDocumentMetadata("gsdl3", "classified", label); 33 } 34 35 public List getClassifications() 36 { return this.classifications; 37 } 38 } 39 40 class HierarchyNode 41 { String descriptor; // the textual descriptor used on screen or long-hand 42 String name; // the index number, letter assignment or other item 43 // used to identify the position of the item in the 44 // hierarchy 45 String id; // an identifier used by the GLI for maintenance 46 // purposes; this plays no active role in the 47 // rebuilding process (at the moment) 48 List childNodes; // the child classification nodes of this node 49 List childDocs; // the child documents of this node 50 List matches; // the other metadata values that may be matched 51 // against the classifier 52 HierarchyNode parent; // the parent of the node 53 54 /** 55 * Simple node 56 */ 57 public HierarchyNode() 58 { this.descriptor = null; 59 this.name = null; 60 this.id = null; 61 this.childNodes = new ArrayList(); 62 this.childDocs = new ArrayList(); 63 this.parent = null; 64 } 65 66 public void addChild(HierarchyNode child) 67 { this.childNodes.add(child); 68 child.setParent(this); 69 } 70 71 public void addDocument(DocumentID document) 72 { this.childDocs.add(document); 73 } 74 75 public void setParent(HierarchyNode parent) 76 { this.parent = parent; 77 } 78 79 public HierarchyNode getParent() 80 { return this.parent; 81 } 82 83 public void setDescriptor(String descriptor) 84 { this.descriptor = descriptor; 85 } 86 87 public void setID(String id) 88 { this.id = id; 89 } 90 91 public void setName(String name) 92 { this.name = name; 93 } 94 95 public void addMatch(String match) 96 { this.matches.add(match); 97 } 98 99 public boolean isMatch (String toMatch) 100 { Iterator thisMatch = this.matches.iterator(); 101 102 while (thisMatch.hasNext()) 103 { String thisMatchText = thisMatch.next().toString(); 104 105 if (thisMatchText.equals(toMatch)) 106 { return true; 37 107 } 38 39 class HierarchyNode 40 { String descriptor; // the textual descriptor used on screen or long-hand 41 String name; // the index number, letter assignment or other item 42 // used to identify the position of the item in the 43 // hierarchy 44 String id; // an identifier used by the GLI for maintenance 45 // purposes; this plays no active role in the 46 // rebuilding process (at the moment) 47 List childNodes; // the child classification nodes of this node 48 List childDocs; // the child documents of this node 49 List matches; // the other metadata values that may be matched 50 // against the classifier 51 HierarchyNode parent; // the parent of the node 52 53 /** 54 * Simple node 55 */ 56 public HierarchyNode() 57 { this.descriptor = null; 58 this.name = null; 59 this.id = null; 60 this.childNodes = new ArrayList(); 61 this.childDocs = new ArrayList(); 62 this.parent = null; 63 } 64 65 public void addChild(HierarchyNode child) 66 { this.childNodes.add(child); 67 child.setParent(this); 68 } 69 70 public void addDocument(DocumentID document) 71 { this.childDocs.add(document); 72 } 73 74 public void setParent(HierarchyNode parent) 75 { this.parent = parent; 76 } 77 78 public HierarchyNode getParent() 79 { return this.parent; 80 } 81 82 public void setDescriptor(String descriptor) 83 { this.descriptor = descriptor; 84 } 85 86 public void setID(String id) 87 { this.id = id; 88 } 89 90 public void setName(String name) 91 { this.name = name; 92 } 93 94 public void addMatch(String match) 95 { this.matches.add(match); 96 } 97 98 public boolean isMatch (String toMatch) 99 { Iterator thisMatch = this.matches.iterator(); 108 } 109 return false; 110 } 111 112 public void getClassifications(DocumentID documentID, List values, 113 HierarchyClassifierObserver observer) 114 { Iterator valueList = values.iterator(); 115 116 while (valueList.hasNext()) 117 { if (this.isMatch(valueList.next().toString())) 118 { observer.recordClassification(this.name); 119 this.addDocument(documentID); 120 } 121 } 122 Iterator childList = this.childNodes.iterator(); 123 124 while (childList.hasNext()) 125 { ((HierarchyNode) childList.next()).getClassifications(documentID, values, observer); 126 } 127 } 128 } 129 130 /** 131 * An inner class to handle Classifier files 132 */ 133 class HierarchyHandler extends DefaultHandler 134 { StringBuffer description; 135 StringBuffer name; 136 StringBuffer id; 137 StringBuffer match; 138 boolean inElement; 139 HierarchyNode rootNode = null; 140 HierarchyNode currentNode = null; 141 142 HierarchyHandler() 143 { super(); 144 145 this.name = null; 146 this.description = null; 147 this.id = null; 148 this.match = null; 149 } 150 151 public void startElement(String URI, String localName, String qName, Attributes attributes) 152 { if (localName.equals("Classification")) 153 { // create a new node in the hierarchy 154 HierarchyNode node = new HierarchyNode(); 155 156 // Initialise the tree 157 if (currentNode == null) 158 { if (this.rootNode == null) // which it should be 159 { this.rootNode = node; 160 } 161 else // insert a new root node, and set this as the peer 162 // of the original root... 163 { HierarchyNode newRoot = new HierarchyNode(); 164 newRoot.addChild(this.rootNode); 165 newRoot.addChild(node); 166 this.rootNode = newRoot; 167 } 168 } 169 // Add the node 170 else 171 { this.currentNode.addChild(node); 172 } 173 this.currentNode = node; 174 } 175 else if (localName.equals("Name")) 176 { this.name = new StringBuffer(); 177 } 178 else if (localName.equals("ID")) 179 { this.id = new StringBuffer(); 180 } 181 else if (localName.equals("Description")) 182 { this.description = new StringBuffer(); 183 } 184 else if (localName.equals("Matches")) 185 { this.match = new StringBuffer(); 186 } 187 else if (localName.equals("Children")) 188 { 189 } 190 } 191 192 public void endElement(String URI, String localName, String qName) 193 { if (localName.equals("Classification")) 194 { this.currentNode = this.currentNode.getParent(); 195 } 196 else if (localName.equals("Name")) 197 { this.currentNode.setName(XMLTools.cleanString(this.name.toString())); 198 this.name = null; 199 } 200 else if (localName.equals("ID")) 201 { this.currentNode.setID(XMLTools.cleanString(this.id.toString())); 202 this.id = null; 203 } 204 else if (localName.equals("Description")) 205 { this.currentNode.setDescriptor(XMLTools.cleanString(this.description.toString())); 206 this.description = null; 207 } 208 else if (localName.equals("Matches")) 209 { this.currentNode.addMatch(XMLTools.cleanString(this.match.toString())); 210 this.match = null; 211 } 212 else if (localName.equals("Children")) 213 { 214 } 215 } 216 217 public void characters(char c[], int start, int length) 218 { String string = new String(c, start, length); 219 if (this.id != null) 220 { this.id.append(string); 221 } 222 else if (this.name != null) 223 { this.name.append(string); 224 } 225 else if (this.description != null) 226 { this.description.append(string); 227 } 228 else if (this.match != null) 229 { this.match.append(string); 230 } 231 } 232 233 public HierarchyNode getHierarchy() 234 { return this.rootNode; 235 } 236 } 237 238 private HierarchyNode hierarchy; 239 private List fields; 240 241 public HierarchyClassifier(File basefile, List fields) 242 { 243 try 244 { XMLReader reader = XMLReaderFactory.createXMLReader(); 245 HierarchyHandler handler = new HierarchyHandler(); 246 reader.setContentHandler(handler); 247 reader.setErrorHandler(handler); 248 249 // A Classifier consists of one file only - get it. 250 FileReader fileReader = new FileReader(basefile); 251 reader.parse(new InputSource(fileReader)); 252 253 this.hierarchy = handler.getHierarchy(); 254 this.fields = fields; 255 } 256 catch (SAXException saxException) 257 { // TODO: log error 258 } 259 catch (java.io.FileNotFoundException fileException) 260 { 261 } 262 catch (java.io.IOException ioException) 263 { 264 } 265 } 266 267 public void configure(List parameters) 268 { 269 } 270 271 public void setDatabase(GS3SQLConnection database) 272 { 273 } 274 275 /** 276 * Classify a particular document - the document id will be used to record its 277 * membership of any classifier nodes, and the document interface will also be 278 * used to assign back knowledge of any classifiers to which the document is 279 * assigned. 280 * 281 * @param <code>DocumentID</code> the document identifier 282 * @param <code>DocumentInterface</code> the document interface 283 */ 284 public boolean classifyDocument(DocumentID documentID, DocumentInterface document) 285 { // the observer records the assignment of documents to classifications 286 HierarchyClassifierObserver classifyObserver = new HierarchyClassifierObserver(document); 287 288 // get the metadata item from the document 289 Iterator thisField = this.fields.iterator(); 290 291 // for every field listed as being a scannable field... 292 while (thisField.hasNext()) 293 { String fieldName = thisField.next().toString(); 100 294 101 while (thisMatch.hasNext()) 102 { String thisMatchText = thisMatch.next().toString(); 103 104 if (thisMatchText.equals(toMatch)) 105 { return true; 106 } 107 } 108 return false; 109 } 110 111 public void getClassifications(DocumentID documentID, List values, 112 HierarchyClassifierObserver observer) 113 { Iterator valueList = values.iterator(); 114 115 while (valueList.hasNext()) 116 { if (this.isMatch(valueList.next().toString())) 117 { observer.recordClassification(this.name); 118 this.addDocument(documentID); 119 } 120 } 121 Iterator childList = this.childNodes.iterator(); 122 123 while (childList.hasNext()) 124 { ((HierarchyNode) childList.next()).getClassifications(documentID, values, observer); 125 } 126 } 127 } 128 129 /** 130 * An inner class to handle Classifier files 131 */ 132 class HierarchyHandler extends DefaultHandler 133 { StringBuffer description; 134 StringBuffer name; 135 StringBuffer id; 136 StringBuffer match; 137 boolean inElement; 138 HierarchyNode rootNode = null; 139 HierarchyNode currentNode = null; 140 141 HierarchyHandler() 142 { super(); 143 144 this.name = null; 145 this.description = null; 146 this.id = null; 147 this.match = null; 148 } 149 150 public void startElement(String URI, String localName, String qName, Attributes attributes) 151 { if (localName.equals("Classification")) 152 { // create a new node in the hierarchy 153 HierarchyNode node = new HierarchyNode(); 154 155 // Initialise the tree 156 if (currentNode == null) 157 { if (this.rootNode == null) // which it should be 158 { this.rootNode = node; 159 } 160 else // insert a new root node, and set this as the peer 161 // of the original root... 162 { HierarchyNode newRoot = new HierarchyNode(); 163 newRoot.addChild(this.rootNode); 164 newRoot.addChild(node); 165 this.rootNode = newRoot; 166 } 167 } 168 // Add the node 169 else 170 { this.currentNode.addChild(node); 171 } 172 this.currentNode = node; 173 } 174 else if (localName.equals("Name")) 175 { this.name = new StringBuffer(); 176 } 177 else if (localName.equals("ID")) 178 { this.id = new StringBuffer(); 179 } 180 else if (localName.equals("Description")) 181 { this.description = new StringBuffer(); 182 } 183 else if (localName.equals("Matches")) 184 { this.match = new StringBuffer(); 185 } 186 else if (localName.equals("Children")) 187 { 188 } 189 } 190 191 public void endElement(String URI, String localName, String qName) 192 { if (localName.equals("Classification")) 193 { this.currentNode = this.currentNode.getParent(); 194 } 195 else if (localName.equals("Name")) 196 { this.currentNode.setName(XMLTools.cleanString(this.name.toString())); 197 this.name = null; 198 } 199 else if (localName.equals("ID")) 200 { this.currentNode.setID(XMLTools.cleanString(this.id.toString())); 201 this.id = null; 202 } 203 else if (localName.equals("Description")) 204 { this.currentNode.setDescriptor(XMLTools.cleanString(this.description.toString())); 205 this.description = null; 206 } 207 else if (localName.equals("Matches")) 208 { this.currentNode.addMatch(XMLTools.cleanString(this.match.toString())); 209 this.match = null; 210 } 211 else if (localName.equals("Children")) 212 { 213 } 214 } 215 216 public void characters(char c[], int start, int length) 217 { String string = new String(c, start, length); 218 if (this.id != null) 219 { this.id.append(string); 220 } 221 else if (this.name != null) 222 { this.name.append(string); 223 } 224 else if (this.description != null) 225 { this.description.append(string); 226 } 227 else if (this.match != null) 228 { this.match.append(string); 229 } 230 } 231 232 public HierarchyNode getHierarchy() 233 { return this.rootNode; 234 } 235 } 236 237 private HierarchyNode hierarchy; 238 private List fields; 239 240 public HierarchyClassifier(File basefile, List fields) 241 { 242 try 243 { XMLReader reader = XMLReaderFactory.createXMLReader(); 244 HierarchyHandler handler = new HierarchyHandler(); 245 reader.setContentHandler(handler); 246 reader.setErrorHandler(handler); 247 248 // A Classifier consists of one file only - get it. 249 FileReader fileReader = new FileReader(basefile); 250 reader.parse(new InputSource(fileReader)); 251 252 this.hierarchy = handler.getHierarchy(); 253 this.fields = fields; 254 } 255 catch (SAXException saxException) 256 { // TODO: log error 257 } 258 catch (java.io.FileNotFoundException fileException) 259 { 260 } 261 catch (java.io.IOException ioException) 262 { 263 } 264 } 265 266 public void configure(String outputDir) 267 { 268 } 269 270 /** 271 * Classify a particular document - the document id will be used to record its 272 * membership of any classifier nodes, and the document interface will also be 273 * used to assign back knowledge of any classifiers to which the document is 274 * assigned. 275 * 276 * @param <code>DocumentID</code> the document identifier 277 * @param <code>DocumentInterface</code> the document interface 278 */ 279 public boolean classifyDocument(DocumentID documentID, DocumentInterface document) 280 { // the observer records the assignment of documents to classifications 281 HierarchyClassifierObserver classifyObserver = new HierarchyClassifierObserver(document); 282 283 // get the metadata item from the document 284 Iterator thisField = this.fields.iterator(); 285 286 // for every field listed as being a scannable field... 287 while (thisField.hasNext()) 288 { String fieldName = thisField.next().toString(); 289 290 // ...get the values for that field... 291 List values = document.getDocumentMetadataItem(fieldName); 292 293 if (values == null) 294 { continue; 295 } 296 297 // ...and send them to the classifier 298 this.hierarchy.getClassifications(documentID, values, classifyObserver); 299 } 300 return true; 301 } 302 303 public int getClassifierPasses() 304 { return 1; 305 } 306 307 public void startClassifierPass(int pass) 308 { 309 } 310 311 public void endClassifierPass(int pass) 312 { 313 } 314 315 public void completeClassification() 316 { // TODO: store the classifications to file... 317 } 295 // ...get the values for that field... 296 List values = document.getDocumentMetadataItem(fieldName); 297 298 if (values == null) 299 { continue; 300 } 301 302 // ...and send them to the classifier 303 this.hierarchy.getClassifications(documentID, values, classifyObserver); 304 } 305 return true; 306 } 307 308 public int getClassifierPasses() 309 { return 1; 310 } 311 312 public void startClassifierPass(int pass) 313 { 314 } 315 316 public void endClassifierPass(int pass) 317 { 318 } 319 320 public void completeClassification() 321 { // TODO: store the classifications to file... 322 } 318 323 }
Note:
See TracChangeset
for help on using the changeset viewer.