Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java@ 8861

Last change on this file since 8861 was 8861, checked in by cs025, 19 years ago
Changed use of touchDocuments to use a long value as the timestamp
Property svn:keywords set to `Author Date Id Revision`
File size: 15.1 KB

Line
1	package org.greenstone.gsdl3.gs3build.doctypes;
2
3	import java.util.Iterator;
4	import java.util.List;
5	import java.util.ArrayList;
6
7	import java.io.PrintWriter;
8	import java.io.FileWriter;
9	import java.io.File;
10	import java.io.IOException;
11
12	import java.net.URL;
13
14	import java.sql.SQLException;
15	import java.sql.ResultSet;
16
17	import org.greenstone.gsdl3.gs3build.metadata.METSFileGroup;
18
19	import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection;
20	import org.greenstone.gsdl3.gs3build.database.GS3SQLSelect;
21	import org.greenstone.gsdl3.gs3build.database.GS3SQLWhereItem;
22	import org.greenstone.gsdl3.gs3build.database.GS3SQLWhere;
23	import org.greenstone.gsdl3.gs3build.database.GS3SQLField;
24
25	public class DocumentList
26	{
27	DocumentInterface [] list; // what is currently cached
28	int size; // the maximum number in the cache
29	int used; // the actual number in the cache
30	int count; // the total number of known documents
31	DocumentIDFactoryInterface idFactory; // A manufacturer of novel document IDs
32	GS3SQLConnection connection; // used to query the SQL database
33
34	private static final int maxSize = 3;
35
36	public DocumentList(GS3SQLConnection connection)
37	{ this.idFactory = null;
38	this.list = new DocumentInterface[3];
39	this.used = 0;
40	this.size = 3;
41	this.count = 0;
42	this.connection = connection;
43	}
44
45	public DocumentList(DocumentIDFactoryInterface idFactory, GS3SQLConnection connection)
46	{ this.idFactory = idFactory;
47	this.list = new DocumentInterface[3];
48	this.used = 0;
49	this.size = 3;
50	this.count = 0;
51	this.connection = connection;
52	}
53
54	/**
55	* Obtain the list of <code>DocumentID</code> objects representing the unique
56	* document identifiers of documents that refer to the file given as a parameter.
57	*
58	* @param <code>URL</code> the location of the file to match
59	*
60	* @return <code>List</code> of <code>DocumentID</code> reference identifiers.
61	*/
62	public List getDocumentIdsWithFile(URL fileLocation)
63	{ List reply = new ArrayList();
64
65	GS3SQLSelect select = new GS3SQLSelect("files");
66	select.addField("*");
67	GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("FileLocation", "=", fileLocation.toString()));
68	select.setWhere(where);
69
70	this.connection.execute(select.toString());
71
72	ResultSet results = this.connection.getResultSet();
73	if (results != null) {
74	select = new GS3SQLSelect("filegroups");
75	select.addField("DocID");
76	select.setDistinct(true);
77
78	where = new GS3SQLWhere();
79	where.setCondition(GS3SQLWhere.OR_CONDITION);
80
81	GS3SQLWhereItem whereItem = null;
82
83	try {
84	results.first();
85	do {
86	int fileGroupRef = results.getInt("FileGroupRef");
87	whereItem = new GS3SQLWhereItem("FileGroupRef", "=", Integer.toString(fileGroupRef), GS3SQLField.INTEGER_TYPE);
88	where.add(whereItem);
89	}
90	while (results.next());
91	select.setWhere(where);
92	results.close();
93
94	this.connection.execute(select.toString());
95
96	results = this.connection.getResultSet();
97	results.first();
98	do {
99	String docId = results.getString("DocID");
100	reply.add(docId);
101	} while (results.next());
102	}
103	catch (SQLException sqlEx)
104	{ System.err.println(sqlEx);
105	}
106	}
107	return reply;
108	}
109
110	/**
111	* Get a list of documents that match a given set of patterns,
112	* within a given URL node.
113	*
114	* @param <code>List</code> the list of patterns to match
115	* @param <code>String</code> the partial URL of the root node under which o
116	* match files. NB: this is a <code>String</code> as the URL may be
117	* incomplete and not properly match the strict requirements for <code>URL</code>
118	*/
119	public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
120	{
121	StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
122
123	Iterator files = fileRefs.iterator();
124	while (files.hasNext()) {
125	String file = files.next().toString();
126
127	if (withinNode != null) {
128	queryBuffer.append("REGEXP \"^");
129	queryBuffer.append(withinNode);
130	queryBuffer.append(".*");
131	}
132	else {
133	queryBuffer.append("REGEXP \"");
134	}
135	queryBuffer.append(file);
136	queryBuffer.append("\"");
137
138	if (files.hasNext()) {
139	queryBuffer.append(" OR ");
140	}
141	}
142	queryBuffer.append(";");
143	return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
144	}
145
146	public List findDocumentIdsUsingFile(String fileRef, String withinNode)
147	{ String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
148	return this.findDocumentIdsUsingFileQuery(query);
149	}
150
151	/**
152	* Return a list of document identifiers against a simple pattern. No root node is given, so
153	* any file matching the pattern given will be returned. USE WITH CAUTION!!!
154	*
155	* @param <code>String</code> a fragment of file pathname to match against.
156	*
157	* @return <code>List</code> of <code>DocumentID</code> objects.
158	*/
159	public List findDocumentIdsUsingFile(String fileRef)
160	{
161	// Get the simple list of file objects & their file group reference
162	String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
163
164	return this.findDocumentIdsUsingFileQuery(query);
165	}
166
167	public List findDocumentIdsUsingFileExact(String fileRef)
168	{
169	// Get the simple list of file objects & their file group reference
170	String query = "SELECT FileGroupRef FROM files WHERE FileLocation=\"" + fileRef +"\";";
171
172	return this.findDocumentIdsUsingFileQuery(query);
173	}
174
175	private List findDocumentIdsUsingFileQuery(String query)
176	{ this.connection.execute(query);
177
178	try {
179
180	ResultSet results = this.connection.getResultSet();
181	if (results == null \|\|
182	!results.first()) {
183	return null;
184	}
185
186	// get a list of group ids first and turn it into a query on filegroups
187	StringBuffer queryBuffer = new StringBuffer("SELECT * FROM filegroups WHERE ");
188	boolean first = true;
189
190	do {
191	int groupRef = results.getInt("FileGroupRef");
192
193	if (first) {
194	first = false;
195	}
196	else {
197	queryBuffer.append(" OR ");
198	}
199	queryBuffer.append("FileGroupRef=" + Integer.toString(groupRef));
200	} while (results.next());
201	queryBuffer.append(";");
202
203	// make a holder for the actual file section identifiers
204	List divisions = new ArrayList();
205
206	// expand (or, in fact, contract) through the document
207	// structures...recreating new filegroup queries as necessary
208	while (queryBuffer.length() > 0) {
209	connection.execute(queryBuffer.toString());
210
211	results = this.connection.getResultSet();
212	if (results == null \|\| !results.first()) {
213	return null;
214	}
215
216	queryBuffer = new StringBuffer();
217	do {
218	String type = results.getString("ParentType");
219	String parentRef = results.getString("ParentRef");
220	if (type.equals(METSFileGroup.SECTION_PARENT)) {
221	divisions.add(parentRef);
222	}
223	else {
224	if (queryBuffer.length() > 0) {
225	queryBuffer.append(" OR ");
226	}
227	queryBuffer.append("FileGroupRef=" + parentRef);
228	}
229	} while (results.next());
230
231	if (queryBuffer.length() > 0) {
232	queryBuffer.insert(0, "SELECT * FROM filegroups WHERE ");
233	queryBuffer.append(";");
234	}
235	}
236
237	// ok, now find all the sections in which we are interested...
238	queryBuffer.setLength(0);
239	queryBuffer.append("SELECT DISTINCT DocID FROM filesection WHERE ");
240	Iterator iterator = divisions.iterator();
241	first = true;
242
243	while (iterator.hasNext()) {
244	String ref = iterator.next().toString();
245
246	if (first) {
247	first = false;
248	}
249	else {
250	queryBuffer.append(" OR ");
251	}
252
253	queryBuffer.append("FileSectionRef="+ref);
254	}
255	queryBuffer.append(";");
256
257	// execute the division query
258	this.connection.execute(queryBuffer.toString());
259
260	results = this.connection.getResultSet();
261	if (results == null \|\|
262	!results.first()) {
263	return null;
264	}
265
266	List reply = new ArrayList();
267	do {
268	reply.add(results.getString("DocID"));
269	} while (results.next());
270
271	return reply;
272	}
273	catch (SQLException ex) {
274	System.err.println(ex);
275	}
276	return null;
277	}
278
279	/**
280	* Cache a document into the cache, without writing it to the database.
281	* Used directly by other parts of DocumentList when they know that the
282	* document is in the database already, or they are going to write it
283	* themselves...
284	*
285	* @param <code>DocumentInterface</code> the document to cache
286	*/
287	private void cacheDocument(DocumentInterface document)
288	{ // increase cache size, etc. as necessary
289	if (this.used == this.size) {
290	if (this.size >= maxSize) {
291	for (int i = 0; i < this.size - 1; i ++) {
292	this.list[i] = this.list[i+1];
293	}
294	this.used --;
295	}
296	else {
297	this.ensureSize((this.size * 2) > maxSize ? maxSize : (this.size * 2));
298	}
299	}
300
301	// insert the document itself
302	this.list[this.used] = document;
303	}
304
305	/**
306	* Write the document into the document list (cache) and the database.
307	*
308	* @param <code>DocumentInterface</code> the document itself
309	*/
310	public void addDocument(DocumentInterface document)
311	{ // initially, test if the document has a duplicate...
312	String duplicateDocID = document.getDuplicateID(this.connection);
313	if (duplicateDocID.length() > 0) {
314	System.out.println("Found duplicate document ");
315	return;
316	}
317
318	// first cache it...
319	this.cacheDocument(document);
320
321	// set the document identifier, if not already set
322	if (document.getID() == null) {
323	System.out.println("Posting new docuument ID");
324	DocumentID id = this.idFactory.getNewDocumentID(document);
325	document.setID(id);
326	}
327
328	// add to the database as well, if it is modified...
329	if (document.isChanged()) {
330	System.out.println("Document was changed");
331	document.getSQLWriter().writeDocument(document, this.connection);
332	}
333
334	// Remember that we've used one more item from the cache.
335	this.used ++;
336
337	// Note additional document
338	this.count ++;
339	}
340
341	/**
342	* Note that an individual document is modified, and act accordingly
343	*
344	* @param <code>DocumentInterface</code> the document
345	*/
346	public void storeChangedDocument(DocumentInterface document)
347	{ document.getSQLWriter().writeDocument(document, this.connection);
348	}
349
350	/**
351	* Get an iterator across all the documents, not merely those in
352	* the cache. Note that this <code>Iterator</code> does <b>not</b>
353	* support the <code>remove()</code> function, and will raise an
354	* <code>UnsupportedOperationException</code> if you attempt to do
355	* so.
356	*
357	* @return <code>Iterator</code> the iterator across the documents.
358	*/
359	public Iterator iterator()
360	{ return new DocumentListIterator(connection);
361	}
362
363	/**
364	* Get the nth member of the <b>cached</b> document list.
365	*
366	* @deprecated
367	*/
368	public DocumentInterface getDocument(int index)
369	{ if (index < 0 \|\| index >= this.used)
370	{ return null;
371	}
372	return this.list[index];
373	}
374
375	/**
376	* Simple "obtain a document" function
377	*/
378	public DocumentInterface getDocument(DocumentID documentId)
379	{
380	DocumentInterface document = DocumentFactory.readSQLDocument(connection, documentId);
381	if (document != null) {
382	this.cacheDocument(document);
383	}
384	return document;
385	}
386
387	/**
388	* Update timestamps on an entire document list - done at the beginning of a build cycle
389	*
390	* @param <code>The date of the new build cycle</code>
391	*/
392	public void updateTimestamps(long buildTimeStamp)
393	{ Iterator documents = this.iterator();
394	int item = 0;
395
396	while (documents.hasNext())
397	{ DocumentInterface document = (DocumentInterface) documents.next();
398
399	long thisTimeStamp = document.getFilesDatestamp();
400	long lastTimeStamp = document.getModifiedDatestamp();
401
402	if (thisTimeStamp > lastTimeStamp) {
403	System.out.println("Updating timestamps " + thisTimeStamp + " " + lastTimeStamp);
404
405	DocumentSQLWriter.touchDocument(document.getID(), this.connection, buildTimeStamp, thisTimeStamp);
406	}
407	}
408	}
409
410	/**
411	* A convenience method to map onto the old Vector source code...
412	*/
413	protected void ensureSize(int size)
414	{ DocumentInterface [] newList = new DocumentInterface[size];
415	System.arraycopy(this.list, 0, newList, 0, this.size);
416	this.list = newList;
417	this.size = size;
418	}
419
420	/**
421	* Write the documents into a directory as METS/XML
422	*/
423	public void writeDocuments(File directory)
424	{ Iterator documents = this.iterator();
425	int item = 0;
426
427	while (documents.hasNext())
428	{ DocumentInterface document = (DocumentInterface) documents.next();
429	try
430	{ item ++;
431	File localFile = new File(directory, "Doc"+Integer.toString(item)+".xml");
432	FileWriter fileWriter = new FileWriter(localFile);
433	PrintWriter writer = new PrintWriter(fileWriter);
434	document.getMETSWriter().writeDocument(document, writer);
435	writer.close();
436	fileWriter.close();
437	}
438	catch (IOException io)
439	{
440	}
441	}
442	}
443
444	public void writeSQLDocuments(GS3SQLConnection connection)
445	{ for (int i = 0; i < this.used; i ++)
446	{ this.list[i].getSQLWriter().writeDocument(this.list[i], connection);
447	}
448	}
449
450	public static DocumentList readSQLDocuments(GS3SQLConnection connection)
451	{ DocumentList list = new DocumentList(connection);
452
453	GS3SQLSelect select = new GS3SQLSelect("document");
454	select.addField("*");
455
456	ResultSet documents;
457	try {
458	connection.execute(select.toString());
459	documents = connection.getResultSet();
460
461	if (documents.first())
462	{ do
463	{ DocumentInterface document = AbstractDocument.readSQL(connection, documents);
464	list.addDocument(document);
465	}
466	while (documents.next());
467	}
468	}
469	catch (java.sql.SQLException ex)
470	{ System.out.println(ex);
471	return null;
472	}
473
474	return list;
475	}
476
477	public int getCount()
478	{ return this.count;
479	}
480
481	public int size()
482	{ return this.used;
483	}
484	}
485
486	class DocumentListIterator implements Iterator
487	{
488	private boolean hasNext;
489	private ResultSet resultSet;
490	private GS3SQLConnection connection;
491
492	public DocumentListIterator(GS3SQLConnection connection)
493	{
494	this.connection = connection;
495
496	GS3SQLSelect select = new GS3SQLSelect("document");
497	select.addField("*");
498
499	try {
500	connection.execute(select.toString());
501	this.resultSet = connection.getResultSet();
502	this.hasNext = this.resultSet.first();
503	} catch (SQLException ex) {
504	this.hasNext = false;
505	}
506	}
507
508	public boolean hasNext()
509	{ return this.hasNext;
510	}
511
512	public Object next()
513	{
514	// get the 'next' document first
515	DocumentInterface document = AbstractDocument.readSQL(connection, this.resultSet);
516
517	// now actually step forward to the next item, so that we know if we have one!
518	try {
519	this.hasNext = this.resultSet.next();
520
521	if (!this.hasNext) {
522	this.resultSet.close(); // be a good citizen & close used result sets
523	}
524	} catch (SQLException ex) {
525	this.hasNext = false;
526	}
527	return document;
528	}
529
530	public void remove() throws UnsupportedOperationException
531	{ throw new UnsupportedOperationException("DocumentList does not support iterator removal of documents");
532	}
533	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: