source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java@ 6897

Last change on this file since 6897 was 6697, checked in by cs025, 20 years ago

Additions to cope with handling of date of accession, etc.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.util.Iterator;
4import java.util.List;
5import java.util.ArrayList;
6
7import java.io.PrintWriter;
8import java.io.FileWriter;
9import java.io.File;
10import java.io.IOException;
11
12import java.net.URL;
13
14import java.sql.SQLException;
15import java.sql.ResultSet;
16
17import org.greenstone.gsdl3.gs3build.metadata.METSFileGroup;
18
19import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection;
20import org.greenstone.gsdl3.gs3build.database.GS3SQLSelect;
21import org.greenstone.gsdl3.gs3build.database.GS3SQLWhereItem;
22import org.greenstone.gsdl3.gs3build.database.GS3SQLWhere;
23import org.greenstone.gsdl3.gs3build.database.GS3SQLField;
24
25public class DocumentList
26{
27 DocumentInterface [] list; // what is currently cached
28 int size; // the maximum number in the cache
29 int used; // the actual number in the cache
30 int count; // the total number of known documents
31 DocumentIDFactoryInterface idFactory; // A manufacturer of novel document IDs
32 GS3SQLConnection connection; // used to query the SQL database
33
34 private static final int maxSize = 3;
35
36 public DocumentList(GS3SQLConnection connection)
37 { this.idFactory = null;
38 this.list = new DocumentInterface[3];
39 this.used = 0;
40 this.size = 3;
41 this.count = 0;
42 this.connection = connection;
43 }
44
45 public DocumentList(DocumentIDFactoryInterface idFactory, GS3SQLConnection connection)
46 { this.idFactory = idFactory;
47 this.list = new DocumentInterface[3];
48 this.used = 0;
49 this.size = 3;
50 this.count = 0;
51 this.connection = connection;
52 }
53
54
55 public List getDocumentIdsWithFile(URL fileLocation)
56 { List reply = new ArrayList();
57
58 GS3SQLSelect select = new GS3SQLSelect("files");
59 select.addField("*");
60 GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("FileLocation", "=", fileLocation.toString()));
61 select.setWhere(where);
62
63 this.connection.execute(select.toString());
64
65 ResultSet results = this.connection.getResultSet();
66 if (results != null) {
67 select = new GS3SQLSelect("filegroups");
68 select.addField("DocID");
69 select.setDistinct(true);
70
71 where = new GS3SQLWhere();
72 where.setCondition(GS3SQLWhere.OR_CONDITION);
73
74 GS3SQLWhereItem whereItem = null;
75
76 try {
77 results.first();
78 do {
79 int fileGroupRef = results.getInt("FileGroupRef");
80 whereItem = new GS3SQLWhereItem("FileGroupRef", "=", Integer.toString(fileGroupRef), GS3SQLField.INTEGER_TYPE);
81 where.add(whereItem);
82 }
83 while (results.next());
84 select.setWhere(where);
85 results.close();
86
87 this.connection.execute(select.toString());
88
89 results = this.connection.getResultSet();
90 results.first();
91 do {
92 String docId = results.getString("DocID");
93 reply.add(docId);
94 } while (results.next());
95 }
96 catch (SQLException sqlEx)
97 { System.err.println(sqlEx);
98 }
99 }
100 return reply;
101 }
102
103 /**
104 * Get a list of documents that match a
105 */
106 public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
107 {
108 StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
109
110 Iterator files = fileRefs.iterator();
111 while (files.hasNext()) {
112 String file = files.next().toString();
113
114 if (withinNode != null) {
115 queryBuffer.append("REGEXP \"^");
116 queryBuffer.append(withinNode);
117 queryBuffer.append(".*");
118 }
119 else {
120 queryBuffer.append("REGEXP \"");
121 }
122 queryBuffer.append(file);
123 queryBuffer.append("\"");
124
125 if (files.hasNext()) {
126 queryBuffer.append(" OR ");
127 }
128 }
129 queryBuffer.append(";");
130 return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
131 }
132
133 public List findDocumentIdsUsingFile(String fileRef, String withinNode)
134 { String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
135 return this.findDocumentIdsUsingFileQuery(query);
136 }
137
138 public List findDocumentIdsUsingFile(String fileRef)
139 {
140 // Get the simple list of file objects & their file group reference
141 String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
142
143 return this.findDocumentIdsUsingFileQuery(query);
144 }
145
146 private List findDocumentIdsUsingFileQuery(String query)
147 { this.connection.execute(query);
148
149 try {
150
151 ResultSet results = this.connection.getResultSet();
152 if (results == null ||
153 !results.first()) {
154 return null;
155 }
156
157 // get a list of group ids first and turn it into a query on filegroups
158 StringBuffer queryBuffer = new StringBuffer("SELECT * FROM filegroups WHERE ");
159 boolean first = true;
160
161 do {
162 int groupRef = results.getInt("FileGroupRef");
163
164 if (first) {
165 first = false;
166 }
167 else {
168 queryBuffer.append(" OR ");
169 }
170 queryBuffer.append("FileGroupRef=" + Integer.toString(groupRef));
171 } while (results.next());
172 queryBuffer.append(";");
173
174 // make a holder for the actual file section identifiers
175 List divisions = new ArrayList();
176
177 // expand (or, in fact, contract) through the document
178 // structures...recreating new filegroup queries as necessary
179 while (queryBuffer.length() > 0) {
180 connection.execute(queryBuffer.toString());
181
182 results = this.connection.getResultSet();
183 if (results == null || !results.first()) {
184 return null;
185 }
186
187 queryBuffer = new StringBuffer();
188 do {
189 String type = results.getString("ParentType");
190 String parentRef = results.getString("ParentRef");
191 if (type.equals(METSFileGroup.SECTION_PARENT)) {
192 divisions.add(parentRef);
193 }
194 else {
195 if (queryBuffer.length() > 0) {
196 queryBuffer.append(" OR ");
197 }
198 queryBuffer.append("FileGroupRef=" + parentRef);
199 }
200 } while (results.next());
201
202 if (queryBuffer.length() > 0) {
203 queryBuffer.insert(0, "SELECT * FROM filegroups WHERE ");
204 queryBuffer.append(";");
205 }
206 }
207
208 // ok, now find all the sections in which we are interested...
209 queryBuffer.setLength(0);
210 queryBuffer.append("SELECT DISTINCT DocID FROM filesection WHERE ");
211 Iterator iterator = divisions.iterator();
212 first = true;
213
214 while (iterator.hasNext()) {
215 String ref = iterator.next().toString();
216
217 if (first) {
218 first = false;
219 }
220 else {
221 queryBuffer.append(" OR ");
222 }
223
224 queryBuffer.append("FileSectionRef="+ref);
225 }
226 queryBuffer.append(";");
227
228 // execute the division query
229 this.connection.execute(queryBuffer.toString());
230
231 results = this.connection.getResultSet();
232 if (results == null ||
233 !results.first()) {
234 return null;
235 }
236
237 List reply = new ArrayList();
238 do {
239 reply.add(results.getString("DocID"));
240 } while (results.next());
241
242 return reply;
243 }
244 catch (SQLException ex) {
245 System.err.println(ex);
246 }
247 return null;
248 }
249
250 /**
251 * Cache a document into the cache, without writing it to the database.
252 * Used directly by other parts of DocumentList when they know that the
253 * document is in the database already, or they are going to write it
254 * themselves...
255 *
256 * @param <code>DocumentInterface</code> the document to cache
257 */
258 private void cacheDocument(DocumentInterface document)
259 { // increase cache size, etc. as necessary
260 if (this.used == this.size) {
261 if (this.size >= maxSize) {
262 for (int i = 0; i < this.size - 1; i ++) {
263 this.list[i] = this.list[i+1];
264 }
265 this.used --;
266 }
267 else {
268 this.ensureSize((this.size * 2) > maxSize ? maxSize : (this.size * 2));
269 }
270 }
271
272 // insert the document itself
273 this.list[this.used] = document;
274 }
275
276 /**
277 * Write the document into the document list (cache) and the database.
278 *
279 * @param <code>DocumentInterface</code> the document itself
280 */
281 public void addDocument(DocumentInterface document)
282 { // initially, test if the document has a duplicate...
283 if (document.hasDuplicate(this.connection)) {
284 System.out.println("Found duplicate document ");
285 return;
286 }
287
288 // first cache it...
289 this.cacheDocument(document);
290
291 // set the document identifier, if not already set
292 if (document.getID() == null) {
293 DocumentID id = this.idFactory.getNewDocumentID(document);
294 document.setID(id);
295 }
296
297 // add to the database as well, if it is modified...
298 if (document.isModified()) {
299 document.getSQLWriter().writeDocument(document, this.connection);
300 }
301
302 // Remember that we've used one more item from the cache.
303 this.used ++;
304
305 // Note additional document
306 this.count ++;
307 }
308
309 /**
310 * Note that an individual document is modified, and act accordingly
311 *
312 * @param <code>DocumentInterface</code> the document
313 */
314 public void modifiedDocument(DocumentInterface document)
315 { document.getSQLWriter().writeDocument(document, this.connection);
316 }
317
318 /**
319 * Get an iterator across all the documents, not merely those in
320 * the cache. Note that this <code>Iterator</code> does <b>not</b>
321 * support the <code>remove()</code> function, and will raise an
322 * <code>UnsupportedOperationException</code> if you attempt to do
323 * so.
324 *
325 * @return <code>Iterator</code> the iterator across the documents.
326 */
327 public Iterator iterator()
328 { return new DocumentListIterator(connection);
329 }
330
331 /**
332 * Get the nth member of the <b>cached</b> document list.
333 *
334 * @deprecated
335 */
336 public DocumentInterface getDocument(int index)
337 { if (index < 0 || index >= this.used)
338 { return null;
339 }
340 return this.list[index];
341 }
342
343 public DocumentInterface getDocument(DocumentID documentId)
344 {
345 DocumentInterface document = DocumentFactory.readSQLDocument(connection, documentId);
346 if (document != null) {
347 this.cacheDocument(document);
348 }
349 return document;
350 }
351
352 /**
353 public DocumentID getDocumentID(int index)
354 { if (index < 0 || index >= this.used)
355 { return null;
356 }
357 return this.list[index].getID();
358 }
359 */
360
361 protected void ensureSize(int size)
362 { DocumentInterface [] newList = new DocumentInterface[size];
363 System.arraycopy(this.list, 0, newList, 0, this.size);
364 this.list = newList;
365 this.size = size;
366 }
367
368 public void writeDocuments(File directory)
369 { Iterator documents = this.iterator();
370 int item = 0;
371
372 while (documents.hasNext())
373 { DocumentInterface document = (DocumentInterface) documents.next();
374 try
375 { item ++;
376 File localFile = new File(directory, "Doc"+Integer.toString(item)+".xml");
377 FileWriter fileWriter = new FileWriter(localFile);
378 PrintWriter writer = new PrintWriter(fileWriter);
379 document.getMETSWriter().writeDocument(document, writer);
380 writer.close();
381 fileWriter.close();
382 }
383 catch (IOException io)
384 {
385 }
386 }
387 }
388
389 public void writeSQLDocuments(GS3SQLConnection connection)
390 { for (int i = 0; i < this.used; i ++)
391 { this.list[i].getSQLWriter().writeDocument(this.list[i], connection);
392 }
393 }
394
395 public static DocumentList readSQLDocuments(GS3SQLConnection connection)
396 { DocumentList list = new DocumentList(connection);
397
398 GS3SQLSelect select = new GS3SQLSelect("document");
399 select.addField("*");
400
401 ResultSet documents;
402 try {
403 connection.execute(select.toString());
404 documents = connection.getResultSet();
405
406 if (documents.first())
407 { do
408 { DocumentInterface document = AbstractDocument.readSQL(connection, documents);
409 list.addDocument(document);
410 }
411 while (documents.next());
412 }
413 }
414 catch (java.sql.SQLException ex)
415 { System.out.println(ex);
416 return null;
417 }
418
419 return list;
420 }
421
422 public int getCount()
423 { return this.count;
424 }
425
426 public int size()
427 { return this.used;
428 }
429}
430
431class DocumentListIterator implements Iterator
432{
433 private boolean hasNext;
434 private ResultSet resultSet;
435 private GS3SQLConnection connection;
436
437 public DocumentListIterator(GS3SQLConnection connection)
438 {
439 this.connection = connection;
440
441 GS3SQLSelect select = new GS3SQLSelect("document");
442 select.addField("*");
443
444 try {
445 connection.execute(select.toString());
446 this.resultSet = connection.getResultSet();
447 this.hasNext = this.resultSet.first();
448 } catch (SQLException ex) {
449 this.hasNext = false;
450 }
451 }
452
453 public boolean hasNext()
454 { return this.hasNext;
455 }
456
457 public Object next()
458 {
459 // get the 'next' document first
460 DocumentInterface document = AbstractDocument.readSQL(connection, this.resultSet);
461
462 // now actually step forward to the next item, so that we know if we have one!
463 try {
464 this.hasNext = this.resultSet.next();
465
466 if (!this.hasNext) {
467 this.resultSet.close(); // be a good citizen & close used result sets
468 }
469 } catch (SQLException ex) {
470 this.hasNext = false;
471 }
472 return document;
473 }
474
475 public void remove() throws UnsupportedOperationException
476 { throw new UnsupportedOperationException("DocumentList does not support iterator removal of documents");
477 }
478}
Note: See TracBrowser for help on using the repository browser.