source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/AZListClassifier.java@ 8742

Last change on this file since 8742 was 8742, checked in by kjdon, 19 years ago

changed the import statements for GS3SQLConnection and GS3SQLConnectionFactory to reflect their move to the database package

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1package org.greenstone.gsdl3.gs3build.classifier;
2
3import java.io.*;
4
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Iterator;
8
9import java.sql.ResultSet;
10import java.sql.SQLException;
11
12import org.xml.sax.XMLReader;
13import org.xml.sax.InputSource;
14import org.xml.sax.SAXException;
15import org.xml.sax.Attributes;
16import org.xml.sax.helpers.XMLReaderFactory;
17import org.xml.sax.helpers.DefaultHandler;
18
19import org.w3c.dom.Element;
20
21import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
22import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
23import org.greenstone.gsdl3.gs3build.util.XMLTools;
24import org.greenstone.gsdl3.gs3build.database.*;
25import org.greenstone.gsdl3.gs3build.util.MultiMap;
26import org.greenstone.gsdl3.util.GSXML;
27
28public class AZListClassifier extends AbstractClassifier
29{
30 GS3SQLConnection database;
31 MultiMap alphaMap;
32 List fields;
33 String name;
34
35 class AZDocumentItem
36 { public DocumentID documentId;
37 public String title;
38
39 public AZDocumentItem(DocumentID documentId, String title)
40 { this.documentId = documentId;
41 this.title = title;
42 }
43
44 public String getTitle()
45 { return this.title;
46 }
47
48 public DocumentID getID()
49 { return this.documentId;
50 }
51
52 public String toString()
53 { return this.title;
54 }
55 }
56
57 private void sortList(String key)
58 { List list = this.alphaMap.getAll(key);
59 if (list.size() > 1) {
60 int first, last;
61
62 first = 0;
63 last = list.size() - 1;
64 while (first != last)
65 { int at = (first + last) / 2;
66
67 if (list.get(at).toString().compareTo(list.get(list.size() - 1).toString()) > 0) {
68 last = at;
69 }
70 else {
71 first = at + 1;
72 }
73 }
74
75 Object newObject = list.get(list.size()-1);
76 last = list.size() - 1;
77 while (last > first) {
78 list.set(last, list.get(last-1));
79 last --;
80 }
81 list.set(first, newObject);
82 }
83 }
84
85 public AZListClassifier(String name, List fields)
86 { this.fields = fields;
87 this.alphaMap = new MultiMap();
88 this.name = name;
89 // System.out.println("AZList :"+fields.size());
90 }
91
92 public AZListClassifier()
93 { this.fields = new ArrayList();
94 }
95
96 public void configure(List parameters)
97 {
98 Iterator iterator = parameters.iterator();
99 while (iterator.hasNext()) {
100 String param = iterator.next().toString();
101 if (param.equals("-metadata")) {
102 if (iterator.hasNext()) {
103 String field = iterator.next().toString();
104 if (field != null && field.length() > 0) {
105 this.fields.add(field);
106 }
107 }
108 }
109 }
110 }
111
112 public void setDatabase(GS3SQLConnection connection)
113 { this.database = connection;
114 }
115
116 public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
117 { // get the metadata item from the document
118 Iterator thisField = this.fields.iterator();
119
120 // for every field listed as being a scannable field...
121 while (thisField.hasNext())
122 { String fieldName = thisField.next().toString();
123
124 // ...get the values for that field...
125 List values = document.getDocumentMetadataItem(fieldName);
126
127 if (values == null)
128 { continue;
129 }
130
131 // ...and send them to the classifier
132 Iterator thisValue = values.iterator();
133 while (thisValue.hasNext())
134 { String value = thisValue.next().toString();
135
136 String classText = null;
137
138 int c = 0;
139 while (c < value.length() && classText == null)
140 { if (Character.isDigit(value.charAt(c)))
141 { classText = "0-9";
142 }
143 else if (Character.isLetter(value.charAt(c)))
144 { classText = value.substring(c, c+1).toLowerCase();
145 }
146 c ++;
147 }
148
149 if (classText != null) {
150 // System.out.println(classText + " " + documentID);
151 this.alphaMap.put(classText, new AZDocumentItem(documentID, value));
152 this.sortList(classText);
153 }
154 }
155 }
156
157 return true;
158 }
159
160 public void startClassifierPass(int pass)
161 {
162 }
163
164 public int getClassifierPasses()
165 { return 1;
166 }
167
168 public void endClassifierPass(int pass)
169 {
170 }
171
172 public void completeClassification()
173 { this.writeSQL(this.database);
174 }
175
176 public int writeSQLClassifyNode(GS3SQLConnection connection, String parentId, int orderRef,
177 String label, String name, String description, int noOfLeafDocs)
178 { GS3SQLAction action;
179 GS3SQLSelect select;
180 GS3SQLInsert insert;
181
182 int classifyRef;
183
184 // check for existing node
185 select = new GS3SQLSelect("classifiers");
186 select.addField("ClassifyRef");
187 GS3SQLWhereItem whereItem = new GS3SQLWhereItem("ClassifyID", "=", label);
188 GS3SQLWhere where = new GS3SQLWhere(whereItem);
189 select.setWhere(where);
190
191 connection.execute(select.toString());
192
193 try {
194 ResultSet results = connection.getResultSet();
195 if (results != null && results.first()) {
196 GS3SQLUpdate update = new GS3SQLUpdate("classifiers");
197 update.setWhere(where);
198 action = update;
199
200 classifyRef = results.getInt("ClassifyRef");
201 }
202 else {
203 insert = new GS3SQLInsert("classifiers");
204
205 insert.addValue("ParentID", parentId);
206
207 action = insert;
208 }
209 action.addValue("ClassifyID", label);
210 action.addValue("Name", name);
211 action.addValue("Description", description);
212 action.addValue("ClassifyOrder", Integer.toString(orderRef), GS3SQLField.INTEGER_TYPE);
213 action.addValue("NumLeafDocs", Integer.toString(noOfLeafDocs), GS3SQLField.INTEGER_TYPE);
214
215 connection.execute(action.toString());
216 classifyRef = -1;
217 }
218 catch (SQLException sqlEx) {
219 System.err.println(sqlEx);
220 return -1;
221 }
222
223 // get the ClassifyRef if we don't already have it (have done a
224 // insert action above)...
225 if (classifyRef == -1) {
226 connection.execute(select.toString());
227
228 try {
229 ResultSet results = connection.getResultSet();
230 if (results == null || !results.first()) {
231 return -1;
232 }
233
234 classifyRef = results.getInt("ClassifyRef");
235 }
236 catch (SQLException sqlEx) {
237 System.err.println(sqlEx);
238 return -1;
239 }
240 }
241
242 return classifyRef;
243 }
244
245 public boolean writeSQL(GS3SQLConnection connection)
246 { GS3SQLAction action;
247 GS3SQLSelect select;
248 GS3SQLInsert insert;
249
250 Iterator keys;
251
252 String prefix = this.name;
253 if (prefix==null || prefix.equals("")) {
254 prefix = "CLAZ"+this.fields.get(0).toString();
255 // TODO: write this name back to collectionConfig.xml
256 this.name = prefix;
257 }
258
259 int leafCount = 0;
260 keys = this.alphaMap.keySet().iterator();
261 while (keys.hasNext()) {
262 Object key = keys.next();
263
264 List childDocs = (List) this.alphaMap.getAll(key);
265 if (childDocs != null) {
266 leafCount += childDocs.size();
267 }
268 }
269
270 int parentClassify = this.writeSQLClassifyNode(connection, "", 0, prefix, "", "Classifier", leafCount);
271
272 if (parentClassify < 0)
273 { return false;
274 }
275
276 List children;
277
278 // TODO: cope with change rather than create from scratch...
279 int nodeOrder = 1;
280 keys = this.alphaMap.keySet().iterator();
281 while (keys.hasNext()) {
282 Object key = keys.next();
283
284 List childDocs = (List) this.alphaMap.getAll(key);
285
286 if (childDocs != null) {
287 String className = prefix + "." + key.toString();
288
289 int classifyRef = this.writeSQLClassifyNode(connection, prefix, nodeOrder, className, key.toString(), key.toString(), childDocs.size());
290
291 // note the child documents...
292 Iterator iterator = childDocs.iterator();
293 int childOrder = 1;
294 while (iterator.hasNext()) {
295 AZDocumentItem documentItem = (AZDocumentItem) iterator.next();
296 DocumentID docId = documentItem.getID();
297
298 insert = new GS3SQLInsert("classdocuments");
299 insert.addValue("ClassifyRef", Integer.toString(classifyRef), GS3SQLField.INTEGER_TYPE);
300 insert.addValue("DocID", docId.toString());
301 insert.addValue("DocOrder", Integer.toString(childOrder), GS3SQLField.INTEGER_TYPE);
302
303 connection.execute(insert.toString());
304
305 childOrder ++;
306 }
307
308 nodeOrder ++;
309 }
310 }
311
312 /*
313 else {
314 // TODO: clear 'dead' child classifications
315
316 // delete child documents
317 GS3SQLDelete delete = new GS3SQLDelete("classdocuments");
318 delete.setWhere(where);
319
320 connection.execute(delete.toString());
321 }
322
323 // post the child nodes...
324 Iterator iterator = this.childNodes.iterator();
325 while (iterator.hasNext()) {
326 AbstractHierarchyNode childNode = (AbstractHierarchyNode) iterator.next();
327
328 if (!childNode.writeSQL(connection)) {
329 return false;
330 }
331 }
332 */
333
334 return true;
335 }
336 public boolean addClassifierDescription(Element classifier_list) {
337 // TODO check that there are some docs in the classification
338 Element classifier = classifier_list.getOwnerDocument().createElement(GSXML.CLASSIFIER_ELEM);
339 classifier.setAttribute(GSXML.NAME_ATT, this.name);
340 classifier_list.appendChild(classifier);
341 return true;
342 }
343
344}
345
346
347
348
349
Note: See TracBrowser for help on using the repository browser.