source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/AZListClassifier.java@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.classifier;
2
3import java.io.*;
4
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Iterator;
8
9import java.sql.ResultSet;
10import java.sql.SQLException;
11import java.sql.Statement;
12
13import org.xml.sax.XMLReader;
14import org.xml.sax.InputSource;
15import org.xml.sax.SAXException;
16import org.xml.sax.Attributes;
17import org.xml.sax.helpers.XMLReaderFactory;
18import org.xml.sax.helpers.DefaultHandler;
19
20import org.w3c.dom.Element;
21
22import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
23import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
24import org.greenstone.gsdl3.gs3build.util.XMLTools;
25import org.greenstone.gsdl3.gs3build.database.*;
26import org.greenstone.gsdl3.gs3build.util.MultiMap;
27import org.greenstone.gsdl3.util.GSXML;
28
29public class AZListClassifier extends AbstractClassifier
30{
31 GS3SQLConnection database;
32 MultiMap alphaMap;
33 List fields;
34 String name;
35
36 class AZDocumentItem
37 { public DocumentID documentId;
38 public String title;
39
40 public AZDocumentItem(DocumentID documentId, String title)
41 { this.documentId = documentId;
42 this.title = title;
43 }
44
45 public String getTitle()
46 { return this.title;
47 }
48
49 public DocumentID getID()
50 { return this.documentId;
51 }
52
53 public String toString()
54 { return this.title;
55 }
56 }
57
58 private void sortList(String key)
59 { List list = this.alphaMap.getAll(key);
60 if (list.size() > 1) {
61 int first, last;
62
63 first = 0;
64 last = list.size() - 1;
65 while (first != last)
66 { int at = (first + last) / 2;
67
68 if (list.get(at).toString().compareTo(list.get(list.size() - 1).toString()) > 0) {
69 last = at;
70 }
71 else {
72 first = at + 1;
73 }
74 }
75
76 Object newObject = list.get(list.size()-1);
77 last = list.size() - 1;
78 while (last > first) {
79 list.set(last, list.get(last-1));
80 last --;
81 }
82 list.set(first, newObject);
83 }
84 }
85
86 public AZListClassifier(String name, List fields)
87 { this.fields = fields;
88 this.alphaMap = new MultiMap();
89 this.name = name;
90 // System.out.println("AZList :"+fields.size());
91 }
92
93 public AZListClassifier()
94 { this.fields = new ArrayList();
95 }
96
97 public void configure(List parameters)
98 {
99 Iterator iterator = parameters.iterator();
100 while (iterator.hasNext()) {
101 String param = iterator.next().toString();
102 if (param.equals("-metadata")) {
103 if (iterator.hasNext()) {
104 String field = iterator.next().toString();
105 if (field != null && field.length() > 0) {
106 this.fields.add(field);
107 }
108 }
109 }
110 }
111 }
112
113 public void setDatabase(GS3SQLConnection connection)
114 { this.database = connection;
115 }
116
117 public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
118 { // get the metadata item from the document
119 Iterator thisField = this.fields.iterator();
120
121 // for every field listed as being a scannable field...
122 while (thisField.hasNext())
123 { String fieldName = thisField.next().toString();
124
125 // ...get the values for that field...
126 List values = document.getDocumentMetadataItem(fieldName);
127
128 if (values == null)
129 { continue;
130 }
131
132 // ...and send them to the classifier
133 Iterator thisValue = values.iterator();
134 while (thisValue.hasNext())
135 { String value = thisValue.next().toString();
136
137 String classText = null;
138
139 int c = 0;
140 while (c < value.length() && classText == null)
141 { if (Character.isDigit(value.charAt(c)))
142 { classText = "0-9";
143 }
144 else if (Character.isLetter(value.charAt(c)))
145 { classText = value.substring(c, c+1).toLowerCase();
146 }
147 c ++;
148 }
149
150 if (classText != null) {
151 // System.out.println(classText + " " + documentID);
152 this.alphaMap.put(classText, new AZDocumentItem(documentID, value));
153 this.sortList(classText);
154 }
155 }
156 }
157
158 return true;
159 }
160
161 public void startClassifierPass(int pass)
162 {
163 }
164
165 public int getClassifierPasses()
166 { return 1;
167 }
168
169 public void endClassifierPass(int pass)
170 {
171 }
172
173 public void completeClassification()
174 { this.writeSQL(this.database);
175 }
176
177 public int writeSQLClassifyNode(GS3SQLConnection connection, String parentId, int orderRef,
178 String label, String name, String description, int noOfLeafDocs)
179 { GS3SQLAction action;
180 GS3SQLSelect select;
181 GS3SQLInsert insert;
182
183 int classifyRef;
184
185 // check for existing node
186 select = new GS3SQLSelect("classifiers");
187 select.addField("ClassifyRef");
188 GS3SQLWhereItem whereItem = new GS3SQLWhereItem("ClassifyID", "=", label);
189 GS3SQLWhere where = new GS3SQLWhere(whereItem);
190 select.setWhere(where);
191
192
193 try {
194 Statement statement = connection.createStatement();
195 ResultSet results = statement.executeQuery(select.toString());
196 if (results.first()) {
197 GS3SQLUpdate update = new GS3SQLUpdate("classifiers");
198 update.setWhere(where);
199 action = update;
200
201 classifyRef = results.getInt("ClassifyRef");
202 }
203 else {
204 insert = new GS3SQLInsert("classifiers");
205
206 insert.addValue("ParentID", parentId);
207
208 action = insert;
209 classifyRef = -1;
210 }
211 action.addValue("ClassifyID", label);
212 action.addValue("Name", name);
213 action.addValue("Description", description);
214 action.addValue("ClassifyOrder", Integer.toString(orderRef), GS3SQLField.INTEGER_TYPE);
215 action.addValue("NumLeafDocs", Integer.toString(noOfLeafDocs), GS3SQLField.INTEGER_TYPE);
216
217 // do the update/insert
218 statement.execute(action.toString());
219
220
221 // get the ClassifyRef if we don't already have it (have done a
222 // insert action above)...
223 if (classifyRef == -1) {
224 results = statement.executeQuery(select.toString());
225 if (!results.first()) {
226 return -1;
227 }
228
229 classifyRef = results.getInt("ClassifyRef");
230 }
231
232 statement.close();
233 } catch (SQLException sqlEx) {
234 System.err.println("AZListClassifier.writeSQLClassifyNode(): "+sqlEx);
235 return -1;
236 }
237
238
239 return classifyRef;
240 }
241
242 public boolean writeSQL(GS3SQLConnection connection)
243 { GS3SQLAction action;
244 GS3SQLSelect select;
245 GS3SQLInsert insert;
246
247 Iterator keys;
248
249 String prefix = this.name;
250 if (prefix==null || prefix.equals("")) {
251 prefix = "CLAZ"+this.fields.get(0).toString();
252 // TODO: write this name back to collectionConfig.xml
253 this.name = prefix;
254 }
255
256 int leafCount = 0;
257 keys = this.alphaMap.keySet().iterator();
258 while (keys.hasNext()) {
259 Object key = keys.next();
260
261 List childDocs = (List) this.alphaMap.getAll(key);
262 if (childDocs != null) {
263 leafCount += childDocs.size();
264 }
265 }
266
267 int parentClassify = this.writeSQLClassifyNode(connection, "", 0, prefix, "", "Classifier", leafCount);
268
269 if (parentClassify < 0)
270 { return false;
271 }
272
273 try {
274 Statement statement = connection.createStatement();
275
276 List children;
277
278 // TODO: cope with change rather than create from scratch...
279 int nodeOrder = 1;
280 keys = this.alphaMap.keySet().iterator();
281 while (keys.hasNext()) {
282 Object key = keys.next();
283
284 List childDocs = (List) this.alphaMap.getAll(key);
285
286 if (childDocs != null) {
287 String className = prefix + "." + key.toString();
288
289 int classifyRef = this.writeSQLClassifyNode(connection, prefix, nodeOrder, className, key.toString(), key.toString(), childDocs.size());
290
291 // note the child documents...
292 Iterator iterator = childDocs.iterator();
293 int childOrder = 1;
294 //St
295 while (iterator.hasNext()) {
296 AZDocumentItem documentItem = (AZDocumentItem) iterator.next();
297 DocumentID docId = documentItem.getID();
298
299 insert = new GS3SQLInsert("classdocuments");
300 insert.addValue("ClassifyRef", Integer.toString(classifyRef), GS3SQLField.INTEGER_TYPE);
301 insert.addValue("DocID", docId.toString());
302 insert.addValue("DocOrder", Integer.toString(childOrder), GS3SQLField.INTEGER_TYPE);
303
304 statement.execute(insert.toString());
305
306 childOrder ++;
307 }
308
309 nodeOrder ++;
310 }
311 }
312
313
314 /*
315 else {
316 // TODO: clear 'dead' child classifications
317
318 // delete child documents
319 GS3SQLDelete delete = new GS3SQLDelete("classdocuments");
320 delete.setWhere(where);
321
322 statement.execute(delete.toString());
323 }
324
325 // post the child nodes...
326 Iterator iterator = this.childNodes.iterator();
327 while (iterator.hasNext()) {
328 AbstractHierarchyNode childNode = (AbstractHierarchyNode) iterator.next();
329
330 if (!childNode.writeSQL(connection)) {
331 return false;
332 }
333 }
334 */
335 statement.close();
336 } catch (SQLException e) {
337 System.err.println("AZListClassifier.writeSQL(): "+e);
338 return false;
339 }
340
341 return true;
342 }
343 public boolean addClassifierDescription(Element classifier_list) {
344 // TODO check that there are some docs in the classification
345 Element classifier = classifier_list.getOwnerDocument().createElement(GSXML.CLASSIFIER_ELEM);
346 classifier.setAttribute(GSXML.NAME_ATT, this.name);
347 classifier_list.appendChild(classifier);
348 return true;
349 }
350
351}
352
353
354
355
356
Note: See TracBrowser for help on using the repository browser.