source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/AZListClassifier.java@ 6699

Last change on this file since 6699 was 6699, checked in by cs025, 20 years ago

Minor alterations.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.0 KB
Line 
1package org.greenstone.gsdl3.gs3build.classifier;
2
3import java.io.*;
4
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Iterator;
8
9import java.sql.ResultSet;
10import java.sql.SQLException;
11
12import org.xml.sax.XMLReader;
13import org.xml.sax.InputSource;
14import org.xml.sax.SAXException;
15import org.xml.sax.Attributes;
16import org.xml.sax.helpers.XMLReaderFactory;
17import org.xml.sax.helpers.DefaultHandler;
18
19import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
20import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
21import org.greenstone.gsdl3.gs3build.util.XMLTools;
22import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
23import org.greenstone.gsdl3.gs3build.database.*;
24import org.greenstone.gsdl3.gs3build.util.MultiMap;
25
26public class AZListClassifier implements ClassifierInterface
27{
28 GS3SQLConnection database;
29 MultiMap alphaMap;
30 List fields;
31
32 class AZDocumentItem
33 { public DocumentID documentId;
34 public String title;
35
36 public AZDocumentItem(DocumentID documentId, String title)
37 { this.documentId = documentId;
38 this.title = title;
39 }
40
41 public String getTitle()
42 { return this.title;
43 }
44
45 public DocumentID getID()
46 { return this.documentId;
47 }
48
49 public String toString()
50 { return this.title;
51 }
52 }
53
54 private void sortList(String key)
55 { List list = this.alphaMap.getAll(key);
56 if (list.size() > 1) {
57 int first, last;
58
59 first = 0;
60 last = list.size() - 1;
61 while (first != last)
62 { int at = (first + last) / 2;
63
64 if (list.get(at).toString().compareTo(list.get(list.size() - 1).toString()) > 0) {
65 last = at;
66 }
67 else {
68 first = at + 1;
69 }
70 }
71
72 Object newObject = list.get(list.size()-1);
73 last = list.size() - 1;
74 while (last > first) {
75 list.set(last, list.get(last-1));
76 last --;
77 }
78 list.set(first, newObject);
79 }
80 }
81
82 public AZListClassifier(List fields)
83 { this.fields = fields;
84 this.alphaMap = new MultiMap();
85 // System.out.println("AZList :"+fields.size());
86 }
87
88 public AZListClassifier()
89 { this.fields = new ArrayList();
90 }
91
92 public void configure(List parameters)
93 {
94 Iterator iterator = parameters.iterator();
95 while (iterator.hasNext()) {
96 String param = iterator.next().toString();
97 if (param.equals("-metadata")) {
98 if (iterator.hasNext()) {
99 String field = iterator.next().toString();
100 if (field != null && field.length() > 0) {
101 this.fields.add(field);
102 }
103 }
104 }
105 }
106 }
107
108 public void setDatabase(GS3SQLConnection connection)
109 { this.database = connection;
110 }
111
112 public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
113 { // get the metadata item from the document
114 Iterator thisField = this.fields.iterator();
115
116 // for every field listed as being a scannable field...
117 while (thisField.hasNext())
118 { String fieldName = thisField.next().toString();
119
120 // ...get the values for that field...
121 List values = document.getDocumentMetadataItem(fieldName);
122
123 if (values == null)
124 { continue;
125 }
126
127 // ...and send them to the classifier
128 Iterator thisValue = values.iterator();
129 while (thisValue.hasNext())
130 { String value = thisValue.next().toString();
131
132 String classText = null;
133
134 int c = 0;
135 while (c < value.length() && classText == null)
136 { if (Character.isDigit(value.charAt(c)))
137 { classText = "0-9";
138 }
139 else if (Character.isLetter(value.charAt(c)))
140 { classText = value.substring(c, c+1).toLowerCase();
141 }
142 c ++;
143 }
144
145 if (classText != null) {
146 // System.out.println(classText + " " + documentID);
147 this.alphaMap.put(classText, new AZDocumentItem(documentID, value));
148 this.sortList(classText);
149 }
150 }
151 }
152
153 return true;
154 }
155
156 public void startClassifierPass(int pass)
157 {
158 }
159
160 public int getClassifierPasses()
161 { return 1;
162 }
163
164 public void endClassifierPass(int pass)
165 {
166 }
167
168 public void completeClassification()
169 { this.writeSQL(this.database);
170 }
171
172 public int writeSQLClassifyNode(GS3SQLConnection connection, String parentId, String label, String name, String description, int noOfLeafDocs)
173 { GS3SQLAction action;
174 GS3SQLSelect select;
175 GS3SQLInsert insert;
176
177 int classifyRef;
178
179 // check for existing node
180 select = new GS3SQLSelect("classifiers");
181 select.addField("ClassifyRef");
182 GS3SQLWhereItem whereItem = new GS3SQLWhereItem("ClassifyID", "=", label);
183 GS3SQLWhere where = new GS3SQLWhere(whereItem);
184 select.setWhere(where);
185
186 connection.execute(select.toString());
187
188 try {
189 ResultSet results = connection.getResultSet();
190 if (results != null && results.first()) {
191 GS3SQLUpdate update = new GS3SQLUpdate("classifiers");
192 update.setWhere(where);
193 action = update;
194
195 classifyRef = results.getInt("ClassifyRef");
196 }
197 else {
198 insert = new GS3SQLInsert("classifiers");
199
200 if (parentId.length() > 0) {
201 insert.addValue("ParentID", parentId);
202 }
203 else {
204 insert.addValue("ParentID", label);
205 }
206
207 action = insert;
208 }
209 action.addValue("ClassifyID", label);
210 action.addValue("Name", name);
211 action.addValue("Description", description);
212 action.addValue("NumLeafDocs", Integer.toString(noOfLeafDocs), GS3SQLField.INTEGER_TYPE);
213
214 connection.execute(action.toString());
215 classifyRef = -1;
216 }
217 catch (SQLException sqlEx) {
218 System.err.println(sqlEx);
219 return -1;
220 }
221
222 // get the ClassifyRef if we don't already have it (have done a
223 // insert action above)...
224 if (classifyRef == -1) {
225 connection.execute(select.toString());
226
227 try {
228 ResultSet results = connection.getResultSet();
229 if (results == null || !results.first()) {
230 return -1;
231 }
232
233 classifyRef = results.getInt("ClassifyRef");
234 }
235 catch (SQLException sqlEx) {
236 System.err.println(sqlEx);
237 return -1;
238 }
239 }
240
241 return classifyRef;
242 }
243
244 public boolean writeSQL(GS3SQLConnection connection)
245 { GS3SQLAction action;
246 GS3SQLSelect select;
247 GS3SQLInsert insert;
248
249 Iterator keys;
250
251 String prefix = "CLAZ"+this.fields.get(0).toString();
252
253 int leafCount = 0;
254 keys = this.alphaMap.keySet().iterator();
255 while (keys.hasNext()) {
256 Object key = keys.next();
257
258 List childDocs = (List) this.alphaMap.getAll(key);
259 if (childDocs != null) {
260 leafCount += childDocs.size();
261 }
262 }
263
264 int parentClassify = this.writeSQLClassifyNode(connection, "", prefix, "", "", leafCount);
265
266 if (parentClassify < 0)
267 { return false;
268 }
269
270 List children;
271
272 // TODO: cope with change rather than create from scratch...
273
274 keys = this.alphaMap.keySet().iterator();
275 while (keys.hasNext()) {
276 Object key = keys.next();
277
278 List childDocs = (List) this.alphaMap.getAll(key);
279
280 if (childDocs != null) {
281 String className = prefix + "." + key.toString();
282
283 int classifyRef = this.writeSQLClassifyNode(connection, prefix, className, "", "", childDocs.size());
284
285 // note the child documents...
286 Iterator iterator = childDocs.iterator();
287 int order = 1;
288 while (iterator.hasNext()) {
289 AZDocumentItem documentItem = (AZDocumentItem) iterator.next();
290 DocumentID docId = documentItem.getID();
291
292 insert = new GS3SQLInsert("classdocuments");
293 insert.addValue("ClassifyRef", Integer.toString(classifyRef), GS3SQLField.INTEGER_TYPE);
294 insert.addValue("DocID", docId.toString());
295 insert.addValue("DocOrder", Integer.toString(order), GS3SQLField.INTEGER_TYPE);
296
297 connection.execute(insert.toString());
298
299 order ++;
300 }
301 }
302 }
303
304 /*
305 else {
306 // TODO: clear 'dead' child classifications
307
308 // delete child documents
309 GS3SQLDelete delete = new GS3SQLDelete("classdocuments");
310 delete.setWhere(where);
311
312 connection.execute(delete.toString());
313 }
314
315 // post the child nodes...
316 Iterator iterator = this.childNodes.iterator();
317 while (iterator.hasNext()) {
318 AbstractHierarchyNode childNode = (AbstractHierarchyNode) iterator.next();
319
320 if (!childNode.writeSQL(connection)) {
321 return false;
322 }
323 }
324 */
325
326 return true;
327 }
328}
329
330
331
332
333
Note: See TracBrowser for help on using the repository browser.