source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/AZListClassifier.java@ 9858

Last change on this file since 9858 was 9858, checked in by kjdon, 19 years ago

OK, changed my mind about making SQLConnection kill off the previous statement.
To make it more transparent what is happening, you now have to create a Statement (connection.createStatement()), then use the Statement to execute the query. This means that the thing doing the query owns the Statement, and can kill it off when finished with it, and nothing else can kill it off unexpectedly. The previous way this was all implemented meant that there was a large memory leak, and some functionality actually relied on this. A newer version of the mysql connector/J has fixed the bug where the statement wasn't closed on garbage collection, but it still seems better to close it explicitly.
Hopefully I have got it all back to working as well as it was bfore, and haven't introduced any bugs :-)

  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.classifier;
2
3import java.io.*;
4
5import java.util.List;
6import java.util.ArrayList;
7import java.util.Iterator;
8
9import java.sql.ResultSet;
10import java.sql.SQLException;
11import java.sql.Statement;
12
13import org.xml.sax.XMLReader;
14import org.xml.sax.InputSource;
15import org.xml.sax.SAXException;
16import org.xml.sax.Attributes;
17import org.xml.sax.helpers.XMLReaderFactory;
18import org.xml.sax.helpers.DefaultHandler;
19
20import org.w3c.dom.Element;
21
22import org.greenstone.gsdl3.gs3build.doctypes.DocumentID;
23import org.greenstone.gsdl3.gs3build.doctypes.DocumentInterface;
24import org.greenstone.gsdl3.gs3build.util.XMLTools;
25import org.greenstone.gsdl3.gs3build.database.*;
26import org.greenstone.gsdl3.gs3build.util.MultiMap;
27import org.greenstone.gsdl3.util.GSXML;
28
29public class AZListClassifier extends AbstractClassifier
30{
31 GS3SQLConnection database;
32 MultiMap alphaMap;
33 List fields;
34 String name;
35
36 class AZDocumentItem
37 { public DocumentID documentId;
38 public String title;
39
40 public AZDocumentItem(DocumentID documentId, String title)
41 { this.documentId = documentId;
42 this.title = title;
43 }
44
45 public String getTitle()
46 { return this.title;
47 }
48
49 public DocumentID getID()
50 { return this.documentId;
51 }
52
53 public String toString()
54 { return this.title;
55 }
56 }
57
58 private void sortList(String key)
59 { List list = this.alphaMap.getAll(key);
60 if (list.size() > 1) {
61 int first, last;
62
63 first = 0;
64 last = list.size() - 1;
65 while (first != last)
66 { int at = (first + last) / 2;
67
68 if (list.get(at).toString().compareTo(list.get(list.size() - 1).toString()) > 0) {
69 last = at;
70 }
71 else {
72 first = at + 1;
73 }
74 }
75
76 Object newObject = list.get(list.size()-1);
77 last = list.size() - 1;
78 while (last > first) {
79 list.set(last, list.get(last-1));
80 last --;
81 }
82 list.set(first, newObject);
83 }
84 }
85
86 public AZListClassifier(String name, List fields)
87 { this.fields = fields;
88 this.alphaMap = new MultiMap();
89 this.name = name;
90 // System.out.println("AZList :"+fields.size());
91 }
92
93 public AZListClassifier()
94 { this.fields = new ArrayList();
95 }
96
97 public void configure(List parameters)
98 {
99 Iterator iterator = parameters.iterator();
100 while (iterator.hasNext()) {
101 String param = iterator.next().toString();
102 if (param.equals("-metadata")) {
103 if (iterator.hasNext()) {
104 String field = iterator.next().toString();
105 if (field != null && field.length() > 0) {
106 this.fields.add(field);
107 }
108 }
109 }
110 }
111 }
112
113 public void setDatabase(GS3SQLConnection connection)
114 { this.database = connection;
115 }
116
117 public boolean classifyDocument(DocumentID documentID, DocumentInterface document)
118 { // get the metadata item from the document
119 Iterator thisField = this.fields.iterator();
120
121 // for every field listed as being a scannable field...
122 while (thisField.hasNext())
123 { String fieldName = thisField.next().toString();
124
125 // ...get the values for that field...
126 List values = document.getDocumentMetadataItem(fieldName);
127
128 if (values == null)
129 { continue;
130 }
131
132 // ...and send them to the classifier
133 Iterator thisValue = values.iterator();
134 while (thisValue.hasNext())
135 { String value = thisValue.next().toString();
136
137 String classText = null;
138
139 int c = 0;
140 while (c < value.length() && classText == null)
141 { if (Character.isDigit(value.charAt(c)))
142 { classText = "0-9";
143 }
144 else if (Character.isLetter(value.charAt(c)))
145 { classText = value.substring(c, c+1).toLowerCase();
146 }
147 c ++;
148 }
149
150 if (classText != null) {
151 // System.out.println(classText + " " + documentID);
152 this.alphaMap.put(classText, new AZDocumentItem(documentID, value));
153 this.sortList(classText);
154 }
155 }
156 }
157
158 return true;
159 }
160
161 public void startClassifierPass(int pass)
162 {
163 }
164
165 public int getClassifierPasses()
166 { return 1;
167 }
168
169 public void endClassifierPass(int pass)
170 {
171 }
172
173 public void completeClassification()
174 { this.writeSQL(this.database);
175 }
176
177 public int writeSQLClassifyNode(GS3SQLConnection connection, String parentId, int orderRef,
178 String label, String name, String description, int noOfLeafDocs)
179 { GS3SQLAction action;
180 GS3SQLSelect select;
181 GS3SQLInsert insert;
182
183 int classifyRef;
184
185 // check for existing node
186 select = new GS3SQLSelect("classifiers");
187 select.addField("ClassifyRef");
188 GS3SQLWhereItem whereItem = new GS3SQLWhereItem("ClassifyID", "=", label);
189 GS3SQLWhere where = new GS3SQLWhere(whereItem);
190 select.setWhere(where);
191
192
193 try {
194 Statement statement = connection.createStatement();
195 ResultSet results = statement.executeQuery(select.toString());
196 if (results.first()) {
197 GS3SQLUpdate update = new GS3SQLUpdate("classifiers");
198 update.setWhere(where);
199 action = update;
200
201 classifyRef = results.getInt("ClassifyRef");
202 }
203 else {
204 insert = new GS3SQLInsert("classifiers");
205
206 insert.addValue("ParentID", parentId);
207
208 action = insert;
209 classifyRef = -1;
210 }
211 action.addValue("ClassifyID", label);
212 action.addValue("Name", name);
213 action.addValue("Description", description);
214 action.addValue("ClassifyOrder", Integer.toString(orderRef), GS3SQLField.INTEGER_TYPE);
215 action.addValue("NumLeafDocs", Integer.toString(noOfLeafDocs), GS3SQLField.INTEGER_TYPE);
216
217 // do the update/insert
218 statement.execute(action.toString());
219
220
221 // get the ClassifyRef if we don't already have it (have done a
222 // insert action above)...
223 if (classifyRef == -1) {
224 results = statement.executeQuery(select.toString());
225 if (!results.first()) {
226 return -1;
227 }
228
229 classifyRef = results.getInt("ClassifyRef");
230 }
231
232 statement.close();
233 } catch (SQLException sqlEx) {
234 System.err.println("AZListClassifier.writeSQLClassifyNode(): "+sqlEx);
235 return -1;
236 }
237
238
239 return classifyRef;
240 }
241
242 public boolean writeSQL(GS3SQLConnection connection)
243 { GS3SQLAction action;
244 GS3SQLSelect select;
245 GS3SQLInsert insert;
246
247 Iterator keys;
248
249 String prefix = this.name;
250 if (prefix==null || prefix.equals("")) {
251 prefix = "CLAZ"+this.fields.get(0).toString();
252 // TODO: write this name back to collectionConfig.xml
253 this.name = prefix;
254 }
255
256 int leafCount = 0;
257 keys = this.alphaMap.keySet().iterator();
258 while (keys.hasNext()) {
259 Object key = keys.next();
260
261 List childDocs = (List) this.alphaMap.getAll(key);
262 if (childDocs != null) {
263 leafCount += childDocs.size();
264 }
265 }
266
267 int parentClassify = this.writeSQLClassifyNode(connection, "", 0, prefix, "", "Classifier", leafCount);
268
269 if (parentClassify < 0)
270 { return false;
271 }
272
273 try {
274 Statement statement = connection.createStatement();
275
276 List children;
277
278 // TODO: cope with change rather than create from scratch...
279 int nodeOrder = 1;
280 keys = this.alphaMap.keySet().iterator();
281 while (keys.hasNext()) {
282 Object key = keys.next();
283
284 List childDocs = (List) this.alphaMap.getAll(key);
285
286 if (childDocs != null) {
287 String className = prefix + "." + key.toString();
288
289 int classifyRef = this.writeSQLClassifyNode(connection, prefix, nodeOrder, className, key.toString(), key.toString(), childDocs.size());
290
291 // note the child documents...
292 Iterator iterator = childDocs.iterator();
293 int childOrder = 1;
294 //St
295 while (iterator.hasNext()) {
296 AZDocumentItem documentItem = (AZDocumentItem) iterator.next();
297 DocumentID docId = documentItem.getID();
298
299 insert = new GS3SQLInsert("classdocuments");
300 insert.addValue("ClassifyRef", Integer.toString(classifyRef), GS3SQLField.INTEGER_TYPE);
301 insert.addValue("DocID", docId.toString());
302 insert.addValue("DocOrder", Integer.toString(childOrder), GS3SQLField.INTEGER_TYPE);
303
304 statement.execute(insert.toString());
305
306 childOrder ++;
307 }
308
309 nodeOrder ++;
310 }
311 }
312
313
314 /*
315 else {
316 // TODO: clear 'dead' child classifications
317
318 // delete child documents
319 GS3SQLDelete delete = new GS3SQLDelete("classdocuments");
320 delete.setWhere(where);
321
322 statement.execute(delete.toString());
323 }
324
325 // post the child nodes...
326 Iterator iterator = this.childNodes.iterator();
327 while (iterator.hasNext()) {
328 AbstractHierarchyNode childNode = (AbstractHierarchyNode) iterator.next();
329
330 if (!childNode.writeSQL(connection)) {
331 return false;
332 }
333 }
334 */
335 statement.close();
336 } catch (SQLException e) {
337 System.err.println("AZListClassifier.writeSQL(): "+e);
338 return false;
339 }
340
341 return true;
342 }
343 public boolean addClassifierDescription(Element classifier_list) {
344 // TODO check that there are some docs in the classification
345 Element classifier = classifier_list.getOwnerDocument().createElement(GSXML.CLASSIFIER_ELEM);
346 classifier.setAttribute(GSXML.NAME_ATT, this.name);
347 classifier_list.appendChild(classifier);
348 return true;
349 }
350
351}
352
353
354
355
356
Note: See TracBrowser for help on using the repository browser.