package org.greenstone.gsdl3.gs3build.doctypes;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.io.PrintWriter;
import java.io.FileWriter;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.ResultSet;
import org.greenstone.gsdl3.gs3build.metadata.METSFileGroup;
import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection;
import org.greenstone.gsdl3.gs3build.database.GS3SQLSelect;
import org.greenstone.gsdl3.gs3build.database.GS3SQLWhereItem;
import org.greenstone.gsdl3.gs3build.database.GS3SQLWhere;
import org.greenstone.gsdl3.gs3build.database.GS3SQLField;
public class DocumentList
{
DocumentInterface [] list; // what is currently cached
int size; // the maximum number in the cache
int used; // the actual number in the cache
int count; // the total number of known documents
DocumentIDFactoryInterface idFactory; // A manufacturer of novel document IDs
GS3SQLConnection connection; // used to query the SQL database
private static final int maxSize = 3;
public DocumentList(GS3SQLConnection connection)
{ this.idFactory = null;
this.list = new DocumentInterface[3];
this.used = 0;
this.size = 3;
this.count = 0;
this.connection = connection;
}
public DocumentList(DocumentIDFactoryInterface idFactory, GS3SQLConnection connection)
{ this.idFactory = idFactory;
this.list = new DocumentInterface[3];
this.used = 0;
this.size = 3;
this.count = 0;
this.connection = connection;
}
/**
* Obtain the list of DocumentID
objects representing the unique
* document identifiers of documents that refer to the file given as a parameter.
*
* @param URL
the location of the file to match
*
* @return List
of DocumentID
reference identifiers.
*/
public List getDocumentIdsWithFile(URL fileLocation)
{ List reply = new ArrayList();
GS3SQLSelect select = new GS3SQLSelect("files");
select.addField("*");
GS3SQLWhere where = new GS3SQLWhere(new GS3SQLWhereItem("FileLocation", "=", fileLocation.toString()));
select.setWhere(where);
try {
Statement statement = connection.createStatement();
ResultSet results = statement.executeQuery(select.toString());
select = new GS3SQLSelect("filegroups");
select.addField("DocID");
select.setDistinct(true);
where = new GS3SQLWhere();
where.setCondition(GS3SQLWhere.OR_CONDITION);
GS3SQLWhereItem whereItem = null;
results.first();
do {
int fileGroupRef = results.getInt("FileGroupRef");
whereItem = new GS3SQLWhereItem("FileGroupRef", "=", Integer.toString(fileGroupRef), GS3SQLField.INTEGER_TYPE);
where.add(whereItem);
}
while (results.next());
select.setWhere(where);
results = statement.executeQuery(select.toString());
results.first();
do {
String docId = results.getString("DocID");
reply.add(docId);
} while (results.next());
statement.close();
}
catch (SQLException sqlEx) {
System.err.println("DocumentList.getDocumentIdsWithFile(): "+sqlEx);
}
return reply;
}
/**
* Get a list of documents that match a given set of patterns,
* within a given URL node.
*
* @param List
the list of patterns to match
* @param String
the partial URL of the root node under which o
* match files. NB: this is a String
as the URL may be
* incomplete and not properly match the strict requirements for URL
*/
public List findDocumentIdsUsingFiles(List fileRefs, String withinNode)
{
StringBuffer queryBuffer = new StringBuffer("SELECT FileGroupRef FROM files WHERE FileLocation ");
Iterator files = fileRefs.iterator();
while (files.hasNext()) {
String file = files.next().toString();
if (withinNode != null) {
queryBuffer.append("REGEXP \"^");
queryBuffer.append(withinNode);
queryBuffer.append(".*");
}
else {
queryBuffer.append("REGEXP \"");
}
queryBuffer.append(file);
queryBuffer.append("\"");
if (files.hasNext()) {
queryBuffer.append(" OR ");
}
}
queryBuffer.append(";");
return this.findDocumentIdsUsingFileQuery(queryBuffer.toString());
}
public List findDocumentIdsUsingFile(String fileRef, String withinNode)
{ String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"^"+withinNode+".*"+fileRef+"\";";
return this.findDocumentIdsUsingFileQuery(query);
}
/**
* Return a list of document identifiers against a simple pattern. No root node is given, so
* any file matching the pattern given will be returned. USE WITH CAUTION!!!
*
* @param String
a fragment of file pathname to match against.
*
* @return List
of DocumentID
objects.
*/
public List findDocumentIdsUsingFile(String fileRef)
{
// Get the simple list of file objects & their file group reference
String query = "SELECT FileGroupRef FROM files WHERE FileLocation REGEXP \"" + fileRef +"\";";
return this.findDocumentIdsUsingFileQuery(query);
}
public List findDocumentIdsUsingFileExact(String fileRef)
{
// Get the simple list of file objects & their file group reference
String query = "SELECT FileGroupRef FROM files WHERE FileLocation=\"" + fileRef +"\";";
return this.findDocumentIdsUsingFileQuery(query);
}
private List findDocumentIdsUsingFileQuery(String query) {
try {
Statement statement = connection.createStatement();
ResultSet results = statement.executeQuery(query);
if (!results.first()) {
statement.close();
return null;
}
// get a list of group ids first and turn it into a query on filegroups
StringBuffer queryBuffer = new StringBuffer("SELECT * FROM filegroups WHERE ");
boolean first = true;
do {
int groupRef = results.getInt("FileGroupRef");
if (first) {
first = false;
}
else {
queryBuffer.append(" OR ");
}
queryBuffer.append("FileGroupRef=" + Integer.toString(groupRef));
} while (results.next());
queryBuffer.append(";");
// make a holder for the actual file section identifiers
List divisions = new ArrayList();
// expand (or, in fact, contract) through the document
// structures...recreating new filegroup queries as necessary
while (queryBuffer.length() > 0) {
results = statement.executeQuery(queryBuffer.toString());
if (!results.first()) {
statement.close();
return null;
}
queryBuffer = new StringBuffer();
do {
String type = results.getString("ParentType");
String parentRef = results.getString("ParentRef");
if (type.equals(METSFileGroup.SECTION_PARENT)) {
divisions.add(parentRef);
}
else {
if (queryBuffer.length() > 0) {
queryBuffer.append(" OR ");
}
queryBuffer.append("FileGroupRef=" + parentRef);
}
} while (results.next());
if (queryBuffer.length() > 0) {
queryBuffer.insert(0, "SELECT * FROM filegroups WHERE ");
queryBuffer.append(";");
}
}
// ok, now find all the sections in which we are interested...
queryBuffer.setLength(0);
queryBuffer.append("SELECT DISTINCT DocID FROM filesection WHERE ");
Iterator iterator = divisions.iterator();
first = true;
while (iterator.hasNext()) {
String ref = iterator.next().toString();
if (first) {
first = false;
}
else {
queryBuffer.append(" OR ");
}
queryBuffer.append("FileSectionRef="+ref);
}
queryBuffer.append(";");
// execute the division query
results = statement.executeQuery(queryBuffer.toString());
if (!results.first()) {
statement.close();
return null;
}
List reply = new ArrayList();
do {
reply.add(results.getString("DocID"));
} while (results.next());
statement.close();
return reply;
}
catch (SQLException ex) {
System.err.println("DocumentList.findDocumentIdsUsingFileQuery()"+ ex);
}
return null;
}
/**
* Cache a document into the cache, without writing it to the database.
* Used directly by other parts of DocumentList when they know that the
* document is in the database already, or they are going to write it
* themselves...
*
* @param DocumentInterface
the document to cache
*/
private void cacheDocument(DocumentInterface document)
{ // increase cache size, etc. as necessary
if (this.used == this.size) {
if (this.size >= maxSize) {
for (int i = 0; i < this.size - 1; i ++) {
this.list[i] = this.list[i+1];
}
this.used --;
}
else {
this.ensureSize((this.size * 2) > maxSize ? maxSize : (this.size * 2));
}
}
// insert the document itself
this.list[this.used] = document;
}
/**
* Write the document into the document list (cache) and the database.
*
* @param DocumentInterface
the document itself
*/
public void addDocument(DocumentInterface document)
{ // initially, test if the document has a duplicate...
String duplicateDocID = document.getDuplicateID(this.connection);
if (duplicateDocID.length() > 0) {
System.out.println("Found duplicate document ");
return;
}
// first cache it...
this.cacheDocument(document);
// set the document identifier, if not already set
if (document.getID() == null) {
System.out.println("Posting new docuument ID");
DocumentID id = this.idFactory.getNewDocumentID(document);
document.setID(id);
}
// add to the database as well, if it is modified...
if (document.isChanged()) {
System.out.println("Document was changed");
document.getSQLWriter().writeDocument(document, this.connection);
}
// Remember that we've used one more item from the cache.
this.used ++;
// Note additional document
this.count ++;
}
/**
* Note that an individual document is modified, and act accordingly
*
* @param DocumentInterface
the document
*/
public void storeChangedDocument(DocumentInterface document)
{ document.getSQLWriter().writeDocument(document, this.connection);
}
/**
* Get an iterator across all the documents, not merely those in
* the cache. Note that this Iterator
does not
* support the remove()
function, and will raise an
* UnsupportedOperationException
if you attempt to do
* so.
*
* @return Iterator
the iterator across the documents.
*/
public Iterator iterator()
{ return new DocumentListIterator(connection);
}
/**
* Get the nth member of the cached document list.
*
* @deprecated
*/
public DocumentInterface getDocument(int index)
{ if (index < 0 || index >= this.used)
{ return null;
}
return this.list[index];
}
/**
* Simple "obtain a document" function
*/
public DocumentInterface getDocument(DocumentID documentId)
{
DocumentInterface document = DocumentFactory.readSQLDocument(connection, documentId);
if (document != null) {
this.cacheDocument(document);
}
return document;
}
/**
* Update timestamps on an entire document list - done at the beginning of a build cycle
*
* @param The date of the new build cycle
*/
public void updateTimestamps(long buildTimeStamp)
{ Iterator documents = this.iterator();
int item = 0;
while (documents.hasNext())
{ DocumentInterface document = (DocumentInterface) documents.next();
long thisTimeStamp = document.getFilesDatestamp();
long lastTimeStamp = document.getModifiedDatestamp();
if (thisTimeStamp > lastTimeStamp) {
System.out.println("Updating timestamps " + thisTimeStamp + " " + lastTimeStamp);
DocumentSQLWriter.touchDocument(document.getID(), this.connection, buildTimeStamp, thisTimeStamp);
}
}
}
/**
* A convenience method to map onto the old Vector source code...
*/
protected void ensureSize(int size)
{ DocumentInterface [] newList = new DocumentInterface[size];
System.arraycopy(this.list, 0, newList, 0, this.size);
this.list = newList;
this.size = size;
}
/**
* Write the documents into a directory as METS/XML
*/
public void writeDocuments(File directory)
{ Iterator documents = this.iterator();
int item = 0;
while (documents.hasNext())
{ DocumentInterface document = (DocumentInterface) documents.next();
try
{ item ++;
File localFile = new File(directory, "Doc"+Integer.toString(item)+".xml");
FileWriter fileWriter = new FileWriter(localFile);
PrintWriter writer = new PrintWriter(fileWriter);
document.getMETSWriter().writeDocument(document, writer);
writer.close();
fileWriter.close();
}
catch (IOException io)
{
}
}
}
public void writeSQLDocuments(GS3SQLConnection connection)
{ for (int i = 0; i < this.used; i ++)
{ this.list[i].getSQLWriter().writeDocument(this.list[i], connection);
}
}
public static DocumentList readSQLDocuments(GS3SQLConnection connection)
{ DocumentList list = new DocumentList(connection);
GS3SQLSelect select = new GS3SQLSelect("document");
select.addField("*");
try {
Statement statement = connection.createStatement();
ResultSet documents = statement.executeQuery(select.toString());
if (documents.first()) {
do {
DocumentInterface document = AbstractDocument.readSQL(connection, documents);
list.addDocument(document);
}
while (documents.next());
}
statement.close();
}
catch (java.sql.SQLException ex) {
System.out.println("DocumentList.writeSQLDocuments(): "+ex);
return null;
}
return list;
}
public int getCount()
{ return this.count;
}
public int size()
{ return this.used;
}
}
class DocumentListIterator implements Iterator
{
private boolean hasNext;
private Statement statement;
private ResultSet resultSet;
private GS3SQLConnection connection;
public DocumentListIterator(GS3SQLConnection connection)
{
this.connection = connection;
GS3SQLSelect select = new GS3SQLSelect("document");
select.addField("*");
try {
this.statement = connection.createStatement();
this.resultSet = statement.executeQuery(select.toString());
this.hasNext = this.resultSet.first();
} catch (SQLException ex) {
System.err.println("DocumentListIterator(): "+ex);
this.hasNext = false;
}
}
public boolean hasNext()
{ return this.hasNext;
}
public Object next()
{
// get the 'next' document first
DocumentInterface document = AbstractDocument.readSQL(connection, this.resultSet);
// now actually step forward to the next item, so that we know if we have one!
try {
this.hasNext = this.resultSet.next();
if (!this.hasNext) {
this.statement.close(); // be a good citizen & close used statement
}
} catch (SQLException ex) {
System.err.println("DocumentList.iterator.next(): "+ex);
this.hasNext = false;
}
return document;
}
public void remove() throws UnsupportedOperationException
{ throw new UnsupportedOperationException("DocumentList does not support iterator removal of documents");
}
}