source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java@ 9858

Last change on this file since 9858 was 9858, checked in by kjdon, 19 years ago

OK, changed my mind about making SQLConnection kill off the previous statement.
To make it more transparent what is happening, you now have to create a Statement (connection.createStatement()), then use the Statement to execute the query. This means that the thing doing the query owns the Statement, and can kill it off when finished with it, and nothing else can kill it off unexpectedly. The previous way this was all implemented meant that there was a large memory leak, and some functionality actually relied on this. A newer version of the mysql connector/J has fixed the bug where the statement wasn't closed on garbage collection, but it still seems better to close it explicitly.
Hopefully I have got it all back to working as well as it was bfore, and haven't introduced any bugs :-)

  • Property svn:keywords set to Author Date Id Revision
File size: 15.3 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.util.Iterator;
4import java.util.List;
5import java.util.ArrayList;
6
7import java.io.PrintWriter;
8import java.io.FileWriter;
9import java.io.File;
10import java.io.IOException;
11
12import java.net.URL;
13
14import java.sql.SQLException;
15import java.sql.Statement;
16import java.sql.ResultSet;
17
18import org.greenstone.gsdl3.gs3build.metadata.METSFileGroup;
19
20import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection;
21import org.greenstone.gsdl3.gs3build.database.GS3SQLSelect;
22import org.greenstone.gsdl3.gs3build.database.GS3SQLWhereItem;
23import org.greenstone.gsdl3.gs3build.database.GS3SQLWhere;
24import org.greenstone.gsdl3.gs3build.database.GS3SQLField;
25
26public class DocumentList
27{
28 DocumentInterface [] list; // what is currently cached
29 int size; // the maximum number in the cache
30 int used; // the actual number in the cache
31 int count; // the total number of known documents
32 DocumentIDFactoryInterface idFactory; // A manufacturer of novel document IDs
33 GS3SQLConnection connection; // used to query the SQL database
34
35 private static final int maxSize = 3;
36
37 public DocumentList(GS3SQLConnection connection)
38 { this.idFactory = null;
39 this.list = new DocumentInterface[3];
40 this.used = 0;
41 this.size = 3;
42 this.count = 0;
43 this.connection = connection;
44 }
45
46 public DocumentList(DocumentIDFactoryInterface idFactory, GS3SQLConnection connection)
47 { this.idFactory = idFactory;
48 this.list = new DocumentInterface[3];
49 this.used = 0;
50 this.size = 3;
51 this.count = 0;
52 this.connection = connection;
53 }
54
55 /**
56 * Obtain the list of <code>DocumentID</code> objects representing the unique
57 * document identifiers of documents that refer to the file given as a parameter.
58 *
59 * @param <code>URL</code> the location of the file to match
60 *
61 * @return <code>List</code> of <code>DocumentID</code> reference identifiers.
62 */
63 public List getDocumentIdsWithFile(URL fileLocation)
64 { List reply = new ArrayList();
65
66 GS3SQLSelect select = new GS3SQLSelect("files");
67 select.addField("*");
68 GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("FileLocation", "=", fileLocation.toString()));
69 select.setWhere(where);
70
71 try {
72 Statement statement = connection.createStatement();
73 ResultSet results = statement.executeQuery(select.toString());
74
75 select = new GS3SQLSelect("filegroups");
76 select.addField("DocID");
77 select.setDistinct(true);
78
79 where = new GS3SQLWhere();
80 where.setCondition(GS3SQLWhere.OR_CONDITION);
81
82 GS3SQLWhereItem whereItem = null;
83
84 results.first();
85 do {
86 int fileGroupRef = results.getInt("FileGroupRef");
87 whereItem = new GS3SQLWhereItem("FileGroupRef", "=", Integer.toString(fileGroupRef), GS3SQLField.INTEGER_TYPE);
88 where.add(whereItem);
89 }
90 while (results.next());
91 select.setWhere(where);
92
93 results = statement.executeQuery(select.toString());
94
95 results.first();
96 do {
97 String docId = results.getString("DocID");
98 reply.add(docId);
99 } while (results.next());
100 statement.close();
101 }
102 catch (SQLException sqlEx) {
103 System.err.println("DocumentList.getDocumentIdsWithFile(): "+sqlEx);
104 }
105
106 return reply;
107 }
108
109 /**
110 * Get a list of documents that match a given set of patterns,
111 * within a given URL node.
112 *
113 * @param <code>List</code> the list of patterns to match
114 * @param <code>String</code> the partial URL of the root node under which o
115 * match files. NB: this is a <code>String</code> as the URL may be
116 * incomplete and not properly match the strict requirements for <code>URL</code>
117 */
118 public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
119 {
120 StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
121
122 Iterator files = fileRefs.iterator();
123 while (files.hasNext()) {
124 String file = files.next().toString();
125
126 if (withinNode != null) {
127 queryBuffer.append("REGEXP \"^");
128 queryBuffer.append(withinNode);
129 queryBuffer.append(".*");
130 }
131 else {
132 queryBuffer.append("REGEXP \"");
133 }
134 queryBuffer.append(file);
135 queryBuffer.append("\"");
136
137 if (files.hasNext()) {
138 queryBuffer.append(" OR ");
139 }
140 }
141 queryBuffer.append(";");
142 return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
143 }
144
145 public List findDocumentIdsUsingFile(String fileRef, String withinNode)
146 { String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
147 return this.findDocumentIdsUsingFileQuery(query);
148 }
149
150 /**
151 * Return a list of document identifiers against a simple pattern. No root node is given, so
152 * any file matching the pattern given will be returned. USE WITH CAUTION!!!
153 *
154 * @param <code>String</code> a fragment of file pathname to match against.
155 *
156 * @return <code>List</code> of <code>DocumentID</code> objects.
157 */
158 public List findDocumentIdsUsingFile(String fileRef)
159 {
160 // Get the simple list of file objects & their file group reference
161 String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
162
163 return this.findDocumentIdsUsingFileQuery(query);
164 }
165
166 public List findDocumentIdsUsingFileExact(String fileRef)
167 {
168 // Get the simple list of file objects & their file group reference
169 String query = "SELECT FileGroupRef FROM files WHERE FileLocation=\"" + fileRef +"\";";
170
171 return this.findDocumentIdsUsingFileQuery(query);
172 }
173
174 private List findDocumentIdsUsingFileQuery(String query) {
175
176 try {
177
178 Statement statement = connection.createStatement();
179 ResultSet results = statement.executeQuery(query);
180
181 if (!results.first()) {
182 statement.close();
183 return null;
184 }
185
186 // get a list of group ids first and turn it into a query on filegroups
187 StringBuffer queryBuffer = new StringBuffer("SELECT * FROM filegroups WHERE ");
188 boolean first = true;
189
190 do {
191 int groupRef = results.getInt("FileGroupRef");
192
193 if (first) {
194 first = false;
195 }
196 else {
197 queryBuffer.append(" OR ");
198 }
199 queryBuffer.append("FileGroupRef=" + Integer.toString(groupRef));
200 } while (results.next());
201 queryBuffer.append(";");
202
203 // make a holder for the actual file section identifiers
204 List divisions = new ArrayList();
205
206 // expand (or, in fact, contract) through the document
207 // structures...recreating new filegroup queries as necessary
208 while (queryBuffer.length() > 0) {
209 results = statement.executeQuery(queryBuffer.toString());
210
211 if (!results.first()) {
212 statement.close();
213 return null;
214 }
215
216 queryBuffer = new StringBuffer();
217 do {
218 String type = results.getString("ParentType");
219 String parentRef = results.getString("ParentRef");
220 if (type.equals(METSFileGroup.SECTION_PARENT)) {
221 divisions.add(parentRef);
222 }
223 else {
224 if (queryBuffer.length() > 0) {
225 queryBuffer.append(" OR ");
226 }
227 queryBuffer.append("FileGroupRef=" + parentRef);
228 }
229 } while (results.next());
230
231 if (queryBuffer.length() > 0) {
232 queryBuffer.insert(0, "SELECT * FROM filegroups WHERE ");
233 queryBuffer.append(";");
234 }
235 }
236
237 // ok, now find all the sections in which we are interested...
238 queryBuffer.setLength(0);
239 queryBuffer.append("SELECT DISTINCT DocID FROM filesection WHERE ");
240 Iterator iterator = divisions.iterator();
241 first = true;
242
243 while (iterator.hasNext()) {
244 String ref = iterator.next().toString();
245
246 if (first) {
247 first = false;
248 }
249 else {
250 queryBuffer.append(" OR ");
251 }
252
253 queryBuffer.append("FileSectionRef="+ref);
254 }
255 queryBuffer.append(";");
256
257 // execute the division query
258 results = statement.executeQuery(queryBuffer.toString());
259
260 if (!results.first()) {
261 statement.close();
262 return null;
263 }
264
265 List reply = new ArrayList();
266 do {
267 reply.add(results.getString("DocID"));
268 } while (results.next());
269
270 statement.close();
271 return reply;
272 }
273 catch (SQLException ex) {
274 System.err.println("DocumentList.findDocumentIdsUsingFileQuery()"+ ex);
275 }
276 return null;
277 }
278
279 /**
280 * Cache a document into the cache, without writing it to the database.
281 * Used directly by other parts of DocumentList when they know that the
282 * document is in the database already, or they are going to write it
283 * themselves...
284 *
285 * @param <code>DocumentInterface</code> the document to cache
286 */
287 private void cacheDocument(DocumentInterface document)
288 { // increase cache size, etc. as necessary
289 if (this.used == this.size) {
290 if (this.size >= maxSize) {
291 for (int i = 0; i < this.size - 1; i ++) {
292 this.list[i] = this.list[i+1];
293 }
294 this.used --;
295 }
296 else {
297 this.ensureSize((this.size * 2) > maxSize ? maxSize : (this.size * 2));
298 }
299 }
300
301 // insert the document itself
302 this.list[this.used] = document;
303 }
304
305 /**
306 * Write the document into the document list (cache) and the database.
307 *
308 * @param <code>DocumentInterface</code> the document itself
309 */
310 public void addDocument(DocumentInterface document)
311 { // initially, test if the document has a duplicate...
312 String duplicateDocID = document.getDuplicateID(this.connection);
313 if (duplicateDocID.length() > 0) {
314 System.out.println("Found duplicate document ");
315 return;
316 }
317
318 // first cache it...
319 this.cacheDocument(document);
320
321 // set the document identifier, if not already set
322 if (document.getID() == null) {
323 System.out.println("Posting new docuument ID");
324 DocumentID id = this.idFactory.getNewDocumentID(document);
325 document.setID(id);
326 }
327
328 // add to the database as well, if it is modified...
329 if (document.isChanged()) {
330 System.out.println("Document was changed");
331 document.getSQLWriter().writeDocument(document, this.connection);
332 }
333
334 // Remember that we've used one more item from the cache.
335 this.used ++;
336
337 // Note additional document
338 this.count ++;
339 }
340
341 /**
342 * Note that an individual document is modified, and act accordingly
343 *
344 * @param <code>DocumentInterface</code> the document
345 */
346 public void storeChangedDocument(DocumentInterface document)
347 { document.getSQLWriter().writeDocument(document, this.connection);
348 }
349
350 /**
351 * Get an iterator across all the documents, not merely those in
352 * the cache. Note that this <code>Iterator</code> does <b>not</b>
353 * support the <code>remove()</code> function, and will raise an
354 * <code>UnsupportedOperationException</code> if you attempt to do
355 * so.
356 *
357 * @return <code>Iterator</code> the iterator across the documents.
358 */
359 public Iterator iterator()
360 { return new DocumentListIterator(connection);
361 }
362
363 /**
364 * Get the nth member of the <b>cached</b> document list.
365 *
366 * @deprecated
367 */
368 public DocumentInterface getDocument(int index)
369 { if (index < 0 || index >= this.used)
370 { return null;
371 }
372 return this.list[index];
373 }
374
375 /**
376 * Simple "obtain a document" function
377 */
378 public DocumentInterface getDocument(DocumentID documentId)
379 {
380 DocumentInterface document = DocumentFactory.readSQLDocument(connection, documentId);
381 if (document != null) {
382 this.cacheDocument(document);
383 }
384 return document;
385 }
386
387 /**
388 * Update timestamps on an entire document list - done at the beginning of a build cycle
389 *
390 * @param <code>The date of the new build cycle</code>
391 */
392 public void updateTimestamps(long buildTimeStamp)
393 { Iterator documents = this.iterator();
394 int item = 0;
395
396 while (documents.hasNext())
397 { DocumentInterface document = (DocumentInterface) documents.next();
398
399 long thisTimeStamp = document.getFilesDatestamp();
400 long lastTimeStamp = document.getModifiedDatestamp();
401
402 if (thisTimeStamp > lastTimeStamp) {
403 System.out.println("Updating timestamps " + thisTimeStamp + " " + lastTimeStamp);
404
405 DocumentSQLWriter.touchDocument(document.getID(), this.connection, buildTimeStamp, thisTimeStamp);
406 }
407 }
408 }
409
410 /**
411 * A convenience method to map onto the old Vector source code...
412 */
413 protected void ensureSize(int size)
414 { DocumentInterface [] newList = new DocumentInterface[size];
415 System.arraycopy(this.list, 0, newList, 0, this.size);
416 this.list = newList;
417 this.size = size;
418 }
419
420 /**
421 * Write the documents into a directory as METS/XML
422 */
423 public void writeDocuments(File directory)
424 { Iterator documents = this.iterator();
425 int item = 0;
426
427 while (documents.hasNext())
428 { DocumentInterface document = (DocumentInterface) documents.next();
429 try
430 { item ++;
431 File localFile = new File(directory, "Doc"+Integer.toString(item)+".xml");
432 FileWriter fileWriter = new FileWriter(localFile);
433 PrintWriter writer = new PrintWriter(fileWriter);
434 document.getMETSWriter().writeDocument(document, writer);
435 writer.close();
436 fileWriter.close();
437 }
438 catch (IOException io)
439 {
440 }
441 }
442 }
443
444 public void writeSQLDocuments(GS3SQLConnection connection)
445 { for (int i = 0; i < this.used; i ++)
446 { this.list[i].getSQLWriter().writeDocument(this.list[i], connection);
447 }
448 }
449
450 public static DocumentList readSQLDocuments(GS3SQLConnection connection)
451 { DocumentList list = new DocumentList(connection);
452
453 GS3SQLSelect select = new GS3SQLSelect("document");
454 select.addField("*");
455
456 try {
457 Statement statement = connection.createStatement();
458 ResultSet documents = statement.executeQuery(select.toString());
459 if (documents.first()) {
460 do {
461 DocumentInterface document = AbstractDocument.readSQL(connection, documents);
462 list.addDocument(document);
463 }
464 while (documents.next());
465 }
466 statement.close();
467 }
468 catch (java.sql.SQLException ex) {
469 System.out.println("DocumentList.writeSQLDocuments(): "+ex);
470 return null;
471 }
472
473 return list;
474 }
475
476 public int getCount()
477 { return this.count;
478 }
479
480 public int size()
481 { return this.used;
482 }
483}
484
485class DocumentListIterator implements Iterator
486{
487 private boolean hasNext;
488 private Statement statement;
489 private ResultSet resultSet;
490 private GS3SQLConnection connection;
491
492 public DocumentListIterator(GS3SQLConnection connection)
493 {
494 this.connection = connection;
495
496 GS3SQLSelect select = new GS3SQLSelect("document");
497 select.addField("*");
498
499 try {
500 this.statement = connection.createStatement();
501 this.resultSet = statement.executeQuery(select.toString());
502 this.hasNext = this.resultSet.first();
503 } catch (SQLException ex) {
504 System.err.println("DocumentListIterator(): "+ex);
505 this.hasNext = false;
506 }
507 }
508
509 public boolean hasNext()
510 { return this.hasNext;
511 }
512
513 public Object next()
514 {
515 // get the 'next' document first
516 DocumentInterface document = AbstractDocument.readSQL(connection, this.resultSet);
517
518 // now actually step forward to the next item, so that we know if we have one!
519 try {
520 this.hasNext = this.resultSet.next();
521
522 if (!this.hasNext) {
523 this.statement.close(); // be a good citizen & close used statement
524 }
525 } catch (SQLException ex) {
526 System.err.println("DocumentList.iterator.next(): "+ex);
527 this.hasNext = false;
528 }
529 return document;
530 }
531
532 public void remove() throws UnsupportedOperationException
533 { throw new UnsupportedOperationException("DocumentList does not support iterator removal of documents");
534 }
535}
Note: See TracBrowser for help on using the repository browser.