source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java@ 8861

Last change on this file since 8861 was 8861, checked in by cs025, 19 years ago

Changed use of touchDocuments to use a long value as the timestamp

  • Property svn:keywords set to Author Date Id Revision
File size: 15.1 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.util.Iterator;
4import java.util.List;
5import java.util.ArrayList;
6
7import java.io.PrintWriter;
8import java.io.FileWriter;
9import java.io.File;
10import java.io.IOException;
11
12import java.net.URL;
13
14import java.sql.SQLException;
15import java.sql.ResultSet;
16
17import org.greenstone.gsdl3.gs3build.metadata.METSFileGroup;
18
19import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection;
20import org.greenstone.gsdl3.gs3build.database.GS3SQLSelect;
21import org.greenstone.gsdl3.gs3build.database.GS3SQLWhereItem;
22import org.greenstone.gsdl3.gs3build.database.GS3SQLWhere;
23import org.greenstone.gsdl3.gs3build.database.GS3SQLField;
24
25public class DocumentList
26{
27 DocumentInterface [] list; // what is currently cached
28 int size; // the maximum number in the cache
29 int used; // the actual number in the cache
30 int count; // the total number of known documents
31 DocumentIDFactoryInterface idFactory; // A manufacturer of novel document IDs
32 GS3SQLConnection connection; // used to query the SQL database
33
34 private static final int maxSize = 3;
35
36 public DocumentList(GS3SQLConnection connection)
37 { this.idFactory = null;
38 this.list = new DocumentInterface[3];
39 this.used = 0;
40 this.size = 3;
41 this.count = 0;
42 this.connection = connection;
43 }
44
45 public DocumentList(DocumentIDFactoryInterface idFactory, GS3SQLConnection connection)
46 { this.idFactory = idFactory;
47 this.list = new DocumentInterface[3];
48 this.used = 0;
49 this.size = 3;
50 this.count = 0;
51 this.connection = connection;
52 }
53
54 /**
55 * Obtain the list of <code>DocumentID</code> objects representing the unique
56 * document identifiers of documents that refer to the file given as a parameter.
57 *
58 * @param <code>URL</code> the location of the file to match
59 *
60 * @return <code>List</code> of <code>DocumentID</code> reference identifiers.
61 */
62 public List getDocumentIdsWithFile(URL fileLocation)
63 { List reply = new ArrayList();
64
65 GS3SQLSelect select = new GS3SQLSelect("files");
66 select.addField("*");
67 GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("FileLocation", "=", fileLocation.toString()));
68 select.setWhere(where);
69
70 this.connection.execute(select.toString());
71
72 ResultSet results = this.connection.getResultSet();
73 if (results != null) {
74 select = new GS3SQLSelect("filegroups");
75 select.addField("DocID");
76 select.setDistinct(true);
77
78 where = new GS3SQLWhere();
79 where.setCondition(GS3SQLWhere.OR_CONDITION);
80
81 GS3SQLWhereItem whereItem = null;
82
83 try {
84 results.first();
85 do {
86 int fileGroupRef = results.getInt("FileGroupRef");
87 whereItem = new GS3SQLWhereItem("FileGroupRef", "=", Integer.toString(fileGroupRef), GS3SQLField.INTEGER_TYPE);
88 where.add(whereItem);
89 }
90 while (results.next());
91 select.setWhere(where);
92 results.close();
93
94 this.connection.execute(select.toString());
95
96 results = this.connection.getResultSet();
97 results.first();
98 do {
99 String docId = results.getString("DocID");
100 reply.add(docId);
101 } while (results.next());
102 }
103 catch (SQLException sqlEx)
104 { System.err.println(sqlEx);
105 }
106 }
107 return reply;
108 }
109
110 /**
111 * Get a list of documents that match a given set of patterns,
112 * within a given URL node.
113 *
114 * @param <code>List</code> the list of patterns to match
115 * @param <code>String</code> the partial URL of the root node under which o
116 * match files. NB: this is a <code>String</code> as the URL may be
117 * incomplete and not properly match the strict requirements for <code>URL</code>
118 */
119 public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
120 {
121 StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
122
123 Iterator files = fileRefs.iterator();
124 while (files.hasNext()) {
125 String file = files.next().toString();
126
127 if (withinNode != null) {
128 queryBuffer.append("REGEXP \"^");
129 queryBuffer.append(withinNode);
130 queryBuffer.append(".*");
131 }
132 else {
133 queryBuffer.append("REGEXP \"");
134 }
135 queryBuffer.append(file);
136 queryBuffer.append("\"");
137
138 if (files.hasNext()) {
139 queryBuffer.append(" OR ");
140 }
141 }
142 queryBuffer.append(";");
143 return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
144 }
145
146 public List findDocumentIdsUsingFile(String fileRef, String withinNode)
147 { String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
148 return this.findDocumentIdsUsingFileQuery(query);
149 }
150
151 /**
152 * Return a list of document identifiers against a simple pattern. No root node is given, so
153 * any file matching the pattern given will be returned. USE WITH CAUTION!!!
154 *
155 * @param <code>String</code> a fragment of file pathname to match against.
156 *
157 * @return <code>List</code> of <code>DocumentID</code> objects.
158 */
159 public List findDocumentIdsUsingFile(String fileRef)
160 {
161 // Get the simple list of file objects & their file group reference
162 String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
163
164 return this.findDocumentIdsUsingFileQuery(query);
165 }
166
167 public List findDocumentIdsUsingFileExact(String fileRef)
168 {
169 // Get the simple list of file objects & their file group reference
170 String query = "SELECT FileGroupRef FROM files WHERE FileLocation=\"" + fileRef +"\";";
171
172 return this.findDocumentIdsUsingFileQuery(query);
173 }
174
175 private List findDocumentIdsUsingFileQuery(String query)
176 { this.connection.execute(query);
177
178 try {
179
180 ResultSet results = this.connection.getResultSet();
181 if (results == null ||
182 !results.first()) {
183 return null;
184 }
185
186 // get a list of group ids first and turn it into a query on filegroups
187 StringBuffer queryBuffer = new StringBuffer("SELECT * FROM filegroups WHERE ");
188 boolean first = true;
189
190 do {
191 int groupRef = results.getInt("FileGroupRef");
192
193 if (first) {
194 first = false;
195 }
196 else {
197 queryBuffer.append(" OR ");
198 }
199 queryBuffer.append("FileGroupRef=" + Integer.toString(groupRef));
200 } while (results.next());
201 queryBuffer.append(";");
202
203 // make a holder for the actual file section identifiers
204 List divisions = new ArrayList();
205
206 // expand (or, in fact, contract) through the document
207 // structures...recreating new filegroup queries as necessary
208 while (queryBuffer.length() > 0) {
209 connection.execute(queryBuffer.toString());
210
211 results = this.connection.getResultSet();
212 if (results == null || !results.first()) {
213 return null;
214 }
215
216 queryBuffer = new StringBuffer();
217 do {
218 String type = results.getString("ParentType");
219 String parentRef = results.getString("ParentRef");
220 if (type.equals(METSFileGroup.SECTION_PARENT)) {
221 divisions.add(parentRef);
222 }
223 else {
224 if (queryBuffer.length() > 0) {
225 queryBuffer.append(" OR ");
226 }
227 queryBuffer.append("FileGroupRef=" + parentRef);
228 }
229 } while (results.next());
230
231 if (queryBuffer.length() > 0) {
232 queryBuffer.insert(0, "SELECT * FROM filegroups WHERE ");
233 queryBuffer.append(";");
234 }
235 }
236
237 // ok, now find all the sections in which we are interested...
238 queryBuffer.setLength(0);
239 queryBuffer.append("SELECT DISTINCT DocID FROM filesection WHERE ");
240 Iterator iterator = divisions.iterator();
241 first = true;
242
243 while (iterator.hasNext()) {
244 String ref = iterator.next().toString();
245
246 if (first) {
247 first = false;
248 }
249 else {
250 queryBuffer.append(" OR ");
251 }
252
253 queryBuffer.append("FileSectionRef="+ref);
254 }
255 queryBuffer.append(";");
256
257 // execute the division query
258 this.connection.execute(queryBuffer.toString());
259
260 results = this.connection.getResultSet();
261 if (results == null ||
262 !results.first()) {
263 return null;
264 }
265
266 List reply = new ArrayList();
267 do {
268 reply.add(results.getString("DocID"));
269 } while (results.next());
270
271 return reply;
272 }
273 catch (SQLException ex) {
274 System.err.println(ex);
275 }
276 return null;
277 }
278
279 /**
280 * Cache a document into the cache, without writing it to the database.
281 * Used directly by other parts of DocumentList when they know that the
282 * document is in the database already, or they are going to write it
283 * themselves...
284 *
285 * @param <code>DocumentInterface</code> the document to cache
286 */
287 private void cacheDocument(DocumentInterface document)
288 { // increase cache size, etc. as necessary
289 if (this.used == this.size) {
290 if (this.size >= maxSize) {
291 for (int i = 0; i < this.size - 1; i ++) {
292 this.list[i] = this.list[i+1];
293 }
294 this.used --;
295 }
296 else {
297 this.ensureSize((this.size * 2) > maxSize ? maxSize : (this.size * 2));
298 }
299 }
300
301 // insert the document itself
302 this.list[this.used] = document;
303 }
304
305 /**
306 * Write the document into the document list (cache) and the database.
307 *
308 * @param <code>DocumentInterface</code> the document itself
309 */
310 public void addDocument(DocumentInterface document)
311 { // initially, test if the document has a duplicate...
312 String duplicateDocID = document.getDuplicateID(this.connection);
313 if (duplicateDocID.length() > 0) {
314 System.out.println("Found duplicate document ");
315 return;
316 }
317
318 // first cache it...
319 this.cacheDocument(document);
320
321 // set the document identifier, if not already set
322 if (document.getID() == null) {
323 System.out.println("Posting new docuument ID");
324 DocumentID id = this.idFactory.getNewDocumentID(document);
325 document.setID(id);
326 }
327
328 // add to the database as well, if it is modified...
329 if (document.isChanged()) {
330 System.out.println("Document was changed");
331 document.getSQLWriter().writeDocument(document, this.connection);
332 }
333
334 // Remember that we've used one more item from the cache.
335 this.used ++;
336
337 // Note additional document
338 this.count ++;
339 }
340
341 /**
342 * Note that an individual document is modified, and act accordingly
343 *
344 * @param <code>DocumentInterface</code> the document
345 */
346 public void storeChangedDocument(DocumentInterface document)
347 { document.getSQLWriter().writeDocument(document, this.connection);
348 }
349
350 /**
351 * Get an iterator across all the documents, not merely those in
352 * the cache. Note that this <code>Iterator</code> does <b>not</b>
353 * support the <code>remove()</code> function, and will raise an
354 * <code>UnsupportedOperationException</code> if you attempt to do
355 * so.
356 *
357 * @return <code>Iterator</code> the iterator across the documents.
358 */
359 public Iterator iterator()
360 { return new DocumentListIterator(connection);
361 }
362
363 /**
364 * Get the nth member of the <b>cached</b> document list.
365 *
366 * @deprecated
367 */
368 public DocumentInterface getDocument(int index)
369 { if (index < 0 || index >= this.used)
370 { return null;
371 }
372 return this.list[index];
373 }
374
375 /**
376 * Simple "obtain a document" function
377 */
378 public DocumentInterface getDocument(DocumentID documentId)
379 {
380 DocumentInterface document = DocumentFactory.readSQLDocument(connection, documentId);
381 if (document != null) {
382 this.cacheDocument(document);
383 }
384 return document;
385 }
386
387 /**
388 * Update timestamps on an entire document list - done at the beginning of a build cycle
389 *
390 * @param <code>The date of the new build cycle</code>
391 */
392 public void updateTimestamps(long buildTimeStamp)
393 { Iterator documents = this.iterator();
394 int item = 0;
395
396 while (documents.hasNext())
397 { DocumentInterface document = (DocumentInterface) documents.next();
398
399 long thisTimeStamp = document.getFilesDatestamp();
400 long lastTimeStamp = document.getModifiedDatestamp();
401
402 if (thisTimeStamp > lastTimeStamp) {
403 System.out.println("Updating timestamps " + thisTimeStamp + " " + lastTimeStamp);
404
405 DocumentSQLWriter.touchDocument(document.getID(), this.connection, buildTimeStamp, thisTimeStamp);
406 }
407 }
408 }
409
410 /**
411 * A convenience method to map onto the old Vector source code...
412 */
413 protected void ensureSize(int size)
414 { DocumentInterface [] newList = new DocumentInterface[size];
415 System.arraycopy(this.list, 0, newList, 0, this.size);
416 this.list = newList;
417 this.size = size;
418 }
419
420 /**
421 * Write the documents into a directory as METS/XML
422 */
423 public void writeDocuments(File directory)
424 { Iterator documents = this.iterator();
425 int item = 0;
426
427 while (documents.hasNext())
428 { DocumentInterface document = (DocumentInterface) documents.next();
429 try
430 { item ++;
431 File localFile = new File(directory, "Doc"+Integer.toString(item)+".xml");
432 FileWriter fileWriter = new FileWriter(localFile);
433 PrintWriter writer = new PrintWriter(fileWriter);
434 document.getMETSWriter().writeDocument(document, writer);
435 writer.close();
436 fileWriter.close();
437 }
438 catch (IOException io)
439 {
440 }
441 }
442 }
443
444 public void writeSQLDocuments(GS3SQLConnection connection)
445 { for (int i = 0; i < this.used; i ++)
446 { this.list[i].getSQLWriter().writeDocument(this.list[i], connection);
447 }
448 }
449
450 public static DocumentList readSQLDocuments(GS3SQLConnection connection)
451 { DocumentList list = new DocumentList(connection);
452
453 GS3SQLSelect select = new GS3SQLSelect("document");
454 select.addField("*");
455
456 ResultSet documents;
457 try {
458 connection.execute(select.toString());
459 documents = connection.getResultSet();
460
461 if (documents.first())
462 { do
463 { DocumentInterface document = AbstractDocument.readSQL(connection, documents);
464 list.addDocument(document);
465 }
466 while (documents.next());
467 }
468 }
469 catch (java.sql.SQLException ex)
470 { System.out.println(ex);
471 return null;
472 }
473
474 return list;
475 }
476
477 public int getCount()
478 { return this.count;
479 }
480
481 public int size()
482 { return this.used;
483 }
484}
485
486class DocumentListIterator implements Iterator
487{
488 private boolean hasNext;
489 private ResultSet resultSet;
490 private GS3SQLConnection connection;
491
492 public DocumentListIterator(GS3SQLConnection connection)
493 {
494 this.connection = connection;
495
496 GS3SQLSelect select = new GS3SQLSelect("document");
497 select.addField("*");
498
499 try {
500 connection.execute(select.toString());
501 this.resultSet = connection.getResultSet();
502 this.hasNext = this.resultSet.first();
503 } catch (SQLException ex) {
504 this.hasNext = false;
505 }
506 }
507
508 public boolean hasNext()
509 { return this.hasNext;
510 }
511
512 public Object next()
513 {
514 // get the 'next' document first
515 DocumentInterface document = AbstractDocument.readSQL(connection, this.resultSet);
516
517 // now actually step forward to the next item, so that we know if we have one!
518 try {
519 this.hasNext = this.resultSet.next();
520
521 if (!this.hasNext) {
522 this.resultSet.close(); // be a good citizen & close used result sets
523 }
524 } catch (SQLException ex) {
525 this.hasNext = false;
526 }
527 return document;
528 }
529
530 public void remove() throws UnsupportedOperationException
531 { throw new UnsupportedOperationException("DocumentList does not support iterator removal of documents");
532 }
533}
Note: See TracBrowser for help on using the repository browser.