source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/AZListClassifier.java@ 7307

Last change on this file since 7307 was 7303, checked in by kjdon, 20 years ago

now AZListCLassifier has a name parameter, and I modified slightly what gets written to the database - top level node no longer has a parent and its description is 'Classifier', lower nodes use the key as their anme and description for now

  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
Line 
1package org.greenstone.gsdl3.gs3build.classifier;
2
3import java.io.*;
4
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Iterator;
8
9import java.sql.ResultSet;
10import java.sql.SQLException;
11
12import org.xml.sax.XMLReader;
13import org.xml.sax.InputSource;
14import org.xml.sax.SAXException;
15import org.xml.sax.Attributes;
16import org.xml.sax.helpers.XMLReaderFactory;
17import org.xml.sax.helpers.DefaultHandler;
18
19import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
20import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
21import org.greenstone.gsdl3.gs3build.util.XMLTools;
22import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
23import org.greenstone.gsdl3.gs3build.database.*;
24import org.greenstone.gsdl3.gs3build.util.MultiMap;
25
26public class AZListClassifier implements ClassifierInterface
27{
28 GS3SQLConnection database;
29 MultiMap alphaMap;
30 List fields;
31 String name;
32 class AZDocumentItem
33 { public DocumentID documentId;
34 public String title;
35
36 public AZDocumentItem(DocumentID documentId, String title)
37 { this.documentId = documentId;
38 this.title = title;
39 }
40
41 public String getTitle()
42 { return this.title;
43 }
44
45 public DocumentID getID()
46 { return this.documentId;
47 }
48
49 public String toString()
50 { return this.title;
51 }
52 }
53
54 private void sortList(String key)
55 { List list = this.alphaMap.getAll(key);
56 if (list.size() > 1) {
57 int first, last;
58
59 first = 0;
60 last = list.size() - 1;
61 while (first != last)
62 { int at = (first + last) / 2;
63
64 if (list.get(at).toString().compareTo(list.get(list.size() - 1).toString()) > 0) {
65 last = at;
66 }
67 else {
68 first = at + 1;
69 }
70 }
71
72 Object newObject = list.get(list.size()-1);
73 last = list.size() - 1;
74 while (last > first) {
75 list.set(last, list.get(last-1));
76 last --;
77 }
78 list.set(first, newObject);
79 }
80 }
81
82 public AZListClassifier(String name, List fields)
83 { this.name = name;
84 this.fields = fields;
85 this.alphaMap = new MultiMap();
86 // System.out.println("AZList :"+fields.size());
87 }
88
89 public AZListClassifier()
90 { this.fields = new ArrayList();
91 }
92
93 public void configure(List parameters)
94 {
95 Iterator iterator = parameters.iterator();
96 while (iterator.hasNext()) {
97 String param = iterator.next().toString();
98 if (param.equals("-metadata")) {
99 if (iterator.hasNext()) {
100 String field = iterator.next().toString();
101 if (field != null && field.length() > 0) {
102 this.fields.add(field);
103 }
104 }
105 }
106 }
107 }
108
109 public void setDatabase(GS3SQLConnection connection)
110 { this.database = connection;
111 }
112
113 public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
114 { // get the metadata item from the document
115 Iterator thisField = this.fields.iterator();
116
117 // for every field listed as being a scannable field...
118 while (thisField.hasNext())
119 { String fieldName = thisField.next().toString();
120
121 // ...get the values for that field...
122 List values = document.getDocumentMetadataItem(fieldName);
123
124 if (values == null)
125 { continue;
126 }
127
128 // ...and send them to the classifier
129 Iterator thisValue = values.iterator();
130 while (thisValue.hasNext())
131 { String value = thisValue.next().toString();
132
133 String classText = null;
134
135 int c = 0;
136 while (c < value.length() && classText == null)
137 { if (Character.isDigit(value.charAt(c)))
138 { classText = "0-9";
139 }
140 else if (Character.isLetter(value.charAt(c)))
141 { classText = value.substring(c, c+1).toLowerCase();
142 }
143 c ++;
144 }
145
146 if (classText != null) {
147 // System.out.println(classText + " " + documentID);
148 this.alphaMap.put(classText, new AZDocumentItem(documentID, value));
149 this.sortList(classText);
150 }
151 }
152 }
153
154 return true;
155 }
156
157 public void startClassifierPass(int pass)
158 {
159 }
160
161 public int getClassifierPasses()
162 { return 1;
163 }
164
165 public void endClassifierPass(int pass)
166 {
167 }
168
169 public void completeClassification()
170 { this.writeSQL(this.database);
171 }
172
173 public int writeSQLClassifyNode(GS3SQLConnection connection, String parentId, int orderRef,
174 String label, String name, String description, int noOfLeafDocs)
175 { GS3SQLAction action;
176 GS3SQLSelect select;
177 GS3SQLInsert insert;
178
179 int classifyRef;
180
181 // check for existing node
182 select = new GS3SQLSelect("classifiers");
183 select.addField("ClassifyRef");
184 GS3SQLWhereItem whereItem = new GS3SQLWhereItem("ClassifyID", "=", label);
185 GS3SQLWhere where = new GS3SQLWhere(whereItem);
186 select.setWhere(where);
187
188 connection.execute(select.toString());
189
190 try {
191 ResultSet results = connection.getResultSet();
192 if (results != null && results.first()) {
193 GS3SQLUpdate update = new GS3SQLUpdate("classifiers");
194 update.setWhere(where);
195 action = update;
196
197 classifyRef = results.getInt("ClassifyRef");
198 }
199 else {
200 insert = new GS3SQLInsert("classifiers");
201
202 insert.addValue("ParentID", parentId);
203
204 action = insert;
205 }
206 action.addValue("ClassifyID", label);
207 action.addValue("Name", name);
208 action.addValue("Description", description);
209 action.addValue("ClassifyOrder", Integer.toString(orderRef), GS3SQLField.INTEGER_TYPE);
210 action.addValue("NumLeafDocs", Integer.toString(noOfLeafDocs), GS3SQLField.INTEGER_TYPE);
211
212 connection.execute(action.toString());
213 classifyRef = -1;
214 }
215 catch (SQLException sqlEx) {
216 System.err.println(sqlEx);
217 return -1;
218 }
219
220 // get the ClassifyRef if we don't already have it (have done a
221 // insert action above)...
222 if (classifyRef == -1) {
223 connection.execute(select.toString());
224
225 try {
226 ResultSet results = connection.getResultSet();
227 if (results == null || !results.first()) {
228 return -1;
229 }
230
231 classifyRef = results.getInt("ClassifyRef");
232 }
233 catch (SQLException sqlEx) {
234 System.err.println(sqlEx);
235 return -1;
236 }
237 }
238
239 return classifyRef;
240 }
241
242 public boolean writeSQL(GS3SQLConnection connection)
243 { GS3SQLAction action;
244 GS3SQLSelect select;
245 GS3SQLInsert insert;
246
247 Iterator keys;
248
249 String prefix = this.name;
250 if (prefix==null || prefix.equals("")) {
251 prefix = "CLAZ"+this.fields.get(0).toString();
252 // TODO: write this name back to collectionConfig.xml
253 }
254 int leafCount = 0;
255 keys = this.alphaMap.keySet().iterator();
256 while (keys.hasNext()) {
257 Object key = keys.next();
258
259 List childDocs = (List) this.alphaMap.getAll(key);
260 if (childDocs != null) {
261 leafCount += childDocs.size();
262 }
263 }
264
265 int parentClassify = this.writeSQLClassifyNode(connection, "", 0, prefix, "", "Classifier", leafCount);
266
267 if (parentClassify < 0)
268 { return false;
269 }
270
271 List children;
272
273 // TODO: cope with change rather than create from scratch...
274 int nodeOrder = 1;
275 keys = this.alphaMap.keySet().iterator();
276 while (keys.hasNext()) {
277 Object key = keys.next();
278
279 List childDocs = (List) this.alphaMap.getAll(key);
280
281 if (childDocs != null) {
282 String className = prefix + "." + key.toString();
283
284 int classifyRef = this.writeSQLClassifyNode(connection, prefix, nodeOrder, className, key.toString(), key.toString(), childDocs.size());
285
286 // note the child documents...
287 Iterator iterator = childDocs.iterator();
288 int childOrder = 1;
289 while (iterator.hasNext()) {
290 AZDocumentItem documentItem = (AZDocumentItem) iterator.next();
291 DocumentID docId = documentItem.getID();
292
293 insert = new GS3SQLInsert("classdocuments");
294 insert.addValue("ClassifyRef", Integer.toString(classifyRef), GS3SQLField.INTEGER_TYPE);
295 insert.addValue("DocID", docId.toString());
296 insert.addValue("DocOrder", Integer.toString(childOrder), GS3SQLField.INTEGER_TYPE);
297
298 connection.execute(insert.toString());
299
300 childOrder ++;
301 }
302
303 nodeOrder ++;
304 }
305 }
306
307 /*
308 else {
309 // TODO: clear 'dead' child classifications
310
311 // delete child documents
312 GS3SQLDelete delete = new GS3SQLDelete("classdocuments");
313 delete.setWhere(where);
314
315 connection.execute(delete.toString());
316 }
317
318 // post the child nodes...
319 Iterator iterator = this.childNodes.iterator();
320 while (iterator.hasNext()) {
321 AbstractHierarchyNode childNode = (AbstractHierarchyNode) iterator.next();
322
323 if (!childNode.writeSQL(connection)) {
324 return false;
325 }
326 }
327 */
328
329 return true;
330 }
331}
332
333
334
335
336
Note: See TracBrowser for help on using the repository browser.