package org.greenstone.gsdl3.gs3build.doctypes; import java.util.List; import java.util.ArrayList; import java.util.Iterator; import java.util.HashMap; import java.util.Map; import java.util.Date; import java.sql.SQLException; import java.sql.Statement; import java.sql.ResultSet; import java.sql.Timestamp; import java.net.URL; import org.greenstone.gsdl3.gs3build.metadata.NamespaceFactory; import org.greenstone.gsdl3.gs3build.metadata.StructureIdentifierFactory; import org.greenstone.gsdl3.gs3build.metadata.GSDL3Namespace; import org.greenstone.gsdl3.gs3build.metadata.METSDescriptiveSet; import org.greenstone.gsdl3.gs3build.metadata.METSFile; import org.greenstone.gsdl3.gs3build.metadata.METSFileSet; import org.greenstone.gsdl3.gs3build.metadata.METSHeader; import org.greenstone.gsdl3.gs3build.metadata.METSStructure; import org.greenstone.gsdl3.gs3build.metadata.METSStructureSet; import org.greenstone.gsdl3.gs3build.metadata.METSDivision; import org.greenstone.gsdl3.gs3build.metadata.METSNamespace; import org.greenstone.gsdl3.gs3build.metadata.MetadataLabel; import org.greenstone.gsdl3.gs3build.util.MultiMap; import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection; /** * Provide a base-line functionality for the DocumentInterface * class. */ public abstract class AbstractDocument implements DocumentInterface { METSFileSet fileSet; METSDescriptiveSet metadata; METSStructureSet structureSet; METSHeader header; DocumentID id; boolean isModified; StructureIdentifierFactory structureIdFactory; java.sql.Timestamp firstDate; java.sql.Timestamp indexDate; java.sql.Timestamp modifiedDate; /** *

Create a very vanilla document with a given document identifier.

*

Most commonly used in dealing with loading files using DocumentFactory * or similar.

* * @param DocumentID the document identifier */ public AbstractDocument(DocumentID id) { this.fileSet = new METSFileSet(); this.metadata = new METSDescriptiveSet(); this.header = new METSHeader(); this.structureSet = new METSStructureSet(); this.id = id; this.structureIdFactory = new StructureIdentifierFactory(); java.util.Date thisDate = new java.util.Date(); this.firstDate = new java.sql.Timestamp(thisDate.getTime()); this.indexDate = new java.sql.Timestamp(thisDate.getTime()); this.modifiedDate = null; // as a signature that the modified date needs finding... } /** * Create a basic document from a given URLURL the URL of the first file in the document package */ public AbstractDocument(URL url) { this.fileSet = new METSFileSet(); METSFile metsFile = this.fileSet.addFile(url); this.metadata = new METSDescriptiveSet(); this.header = new METSHeader(); this.structureSet = new METSStructureSet(); this.id = null; java.util.Date thisDate = new java.util.Date(); this.firstDate = new java.sql.Timestamp(thisDate.getTime()); this.indexDate = new java.sql.Timestamp(thisDate.getTime()); METSStructure structure = new METSStructure("All", "All", "Whole Document"); METSDivision documentBody = new METSDivision("All", "All", "All", "Whole Document", "Document"); structure.addDivision(documentBody); this.structureSet.addStructure(structure); documentBody.addFileReference(metsFile.getID()); documentBody.addMetadataReference("DM1"); } /** * Set the identified for the document. Every document should have * a document number set on its accession, either through metadata * placed upon it internally or externally, or by assignment through * a DocumentIDFactory. Each identifier should be * unique. * * @param DocumentID the document identifier - in XML * terms, the gsdl3:id element. */ public void setID(DocumentID id) { this.id = id; this.isModified = true; } /** * Get the document identifier - this should be unique to the document, * but care must be taken in the configuration of the collection to * ensure that this is the case. * * @return DocumentID the identifer */ public DocumentID getID() { return this.id; } /** * Indicate whether this document is indexed. * * @see: DocumentInterface.isIndexed */ public boolean isIndexed() { return true; } /** * Get the date that this file was modified */ public long getFilesDatestamp() { return this.fileSet.getModifiedDatestamp(); } /** * Get the date that this file was modified */ public long getModifiedDatestamp() { if (this.modifiedDate == null) { this.setModifiedDatestamp(); } return this.modifiedDate.getTime(); } /** * Update/set the date of the most recent file modification */ public void setModifiedDatestamp() { this.modifiedDate = new java.sql.Timestamp(this.fileSet.getModifiedDatestamp()); } /** * Get the date that this document was first indexed */ public long getAccessionDate() { return this.firstDate.getTime(); } /** * Get the date that this document was last indexed */ public long getLastIndexedDate() { return this.indexDate.getTime(); } /** * Set the last indexed date for this document; */ public void setLastIndexedDate(long timestamp) { this.indexDate = new java.sql.Timestamp(timestamp); } /** * Check if this document is in the database already. * * In this simple implementation, the first file in the document's "default" * filegroup is taken to be the canonical file for this document - any document * of the same type with the same canonical file is taken to be a match. * * @return boolean - if a matching document is found in the * database. */ public String getDuplicateID(GS3SQLConnection connection) { //String query = "SELECT * FROM document INNER JOIN filegroups ON document.docId=filegroups.docId WHERE DocType=\"" + HTML_DOCUMENT_TYPE + "\""; // Query for documents using the same file... String query = "SELECT DocID FROM files INNER JOIN filegroups ON files.FileGroupRef=filegroups.FileGroupRef WHERE (filegroups.FileGroupId=\"default\" AND files.FileLocation=\"" + this.fileSet.getFile(0).getLocation().toString() + "\")"; try { Statement statement = connection.createStatement(); ResultSet results = statement.executeQuery(query); List docs = new ArrayList(); if (results.first()) { do { String value = results.getString("DocID"); docs.add(value); } while (results.next()); Iterator docIterator = docs.iterator(); while (docIterator.hasNext()) { String docId = docIterator.next().toString(); String innerQuery = "SELECT * FROM document WHERE DocID=\"" + docId + "\""; results = statement.executeQuery(innerQuery); if (results.first()) { String docType = results.getString("DocType"); if (docType.equals(this.getDocumentType())) { return docId; } } } } statement.close(); } catch (java.sql.SQLException sqlEx) { System.err.println("AbstractDocument.getDuplicateID(): "+sqlEx); } return ""; } /** * Obtain the METS header of this document * * @return METSHeader the header */ public METSHeader getHeader() { return this.header; } /** * Set the METS header for this document. * * @param METSHeader the header */ public void setHeader(METSHeader header) { this.header = header; } /** * A simple implementation of the isDocumentType function that does not consider * inheritance - it must be extended as required. */ public boolean isDocumentType(String type) { return type.equals(this.getDocumentType()); } public abstract String getDocumentType(); public abstract String getDocumentText(); public abstract String getSectionText(String sectionId); public String getMETSType() { return "document"; } /** * @see DocumentInterface:addDocumentMetadata */ public void addDocumentMetadata(MetadataLabel label, String value) { // no need to set isModified, as the following call will do it anyway! this.addDocumentMetadata(label.getNamespace(), label.getLabel(), value); } /** * @see DocumentInterface:addDocumentMetadata */ public void setDocumentMetadata(MetadataLabel label, String value) { // no need to set isModified, as the following call will do it anyway! this.setDocumentMetadata(label.getNamespace(), label.getLabel(), value); } /** * @see DocumentInterface:addDocumentMetadata */ public void addDocumentMetadata(String namespace, String label, String value) { this.metadata.addMetadata("default", namespace, label, value); this.isModified = true; } /** * @see DocumentInterface:addDocumentMetadata */ public void setDocumentMetadata(String namespace, String label, String value) { this.metadata.setMetadata("default", namespace, label, value); this.isModified = true; } /** * @see DocumentInterface:removeDocumentMetadata */ public void removeDocumentMetadata(String namespace, String label) { this.metadata.removeMetadata("default", namespace, label); this.isModified = true; } /** * */ public void removeAllMetadata(String namespace, String label) { this.metadata.removeAllMetadata(namespace, label); this.isModified = true; } /** * Post metadata to a file in this document - the appropriate changes * should be made... */ public void postFileMetadata(URL fileLocation, String namespace, String label, String value) { // First get the list of file groups, etc. that this file is associated with... List fileGroups = this.fileSet.findGroups(fileLocation); // Next, get the METS divisions associated with each file group... List divisions = this.structureSet.findDivisionsForFiles(fileGroups); // Finally, post the metadata to the metadata group associated with each structure Iterator divisionIter = divisions.iterator(); while (divisionIter.hasNext()) { METSDivision division = (METSDivision) divisionIter.next(); // get the open namespace for this division METSNamespace namespaceMetadata = division.findNamespace(namespace, true, this.metadata); // then post the metadata to it... namespaceMetadata.addMetadata(label, value); } } /** * Get the metadata structure of the document * * @return METSDescriptive the metadata holder for the document. */ public METSDescriptiveSet getDocumentMetadata() { return this.metadata; } /** * Set the metadata structure for this document * * @param METSDescriptive the new metadata holder for the document. */ public void setDocumentMetadata(METSDescriptiveSet metadata) { this.metadata = metadata; this.isModified = true; } /** * Get the metadata structure of the document * * @return METSStructureSet the metadata holder for the document. */ public METSStructureSet getDocumentStructure() { return this.structureSet; } public void setDocumentStructure(METSStructureSet structureSet) { this.structureSet = structureSet; } /** * Get the values associated with a particular metadata value. * * @param String the namespace to find the values in. * @param String the label to match to find the values. * * @return List the values. */ public List getDocumentMetadataItem(String namespace, String label) { return this.metadata.getMetadata("default", namespace, label); } /** * Get the values associated with a particular metadata value. * * @param String the namespace and label separated by a * colon. * * @return List the values. */ public List getDocumentMetadataItem(String namespaceLabel) { String namespace, label; int colonAt = namespaceLabel.indexOf(':'); if (colonAt < 0) { namespace = GSDL3Namespace.GSDL3_NAMESPACE_ID; label = namespaceLabel; } else { namespace = namespaceLabel.substring(0, colonAt); label = namespaceLabel.substring(colonAt+1); } return this.metadata.getMetadata("default", namespace, label); } /** * @see DocumentInterface:getDocumentFiles */ public METSFileSet getDocumentFiles() { return this.fileSet; } public void setDocumentFiles(METSFileSet fileSet) { this.fileSet = fileSet; } /** * This is just a dummy function that does nothing at this level... */ public org.w3c.dom.Document getDOMDocument() { return null; } /** * @see DocumentInterface:isMETSCompatible */ public boolean isMETSCompatible() { return true; } /** * Use a default document writer - this may be overridden for subclasses... * * @see DocumentInterface:writeMETSObject */ public DocumentWriter getMETSWriter() { return new DocumentWriter(); } /** * Use a default SQL document writer - this may be overridden for subclasses... * */ public DocumentSQLWriter getSQLWriter() { return new DocumentSQLWriter(); } /** * Obtain a document from the SQL database */ public static AbstractDocument readSQL(GS3SQLConnection connection, ResultSet sqlResult) { try { DocumentID id = new DocumentID(sqlResult.getString("DocID")); String type = sqlResult.getString("docType"); // Use a factory method to create the correct subtype... AbstractDocument document = DocumentFactory.createDocument(type, id); // Append the document date information document.indexDate = sqlResult.getTimestamp("IndexedDate"); document.firstDate = sqlResult.getTimestamp("AccessionDate"); document.modifiedDate = sqlResult.getTimestamp("ModifiedDate"); // Get the individual components of the document METSFileSet fileSet = METSFileSet.readSQL(document, connection); document.setDocumentFiles(fileSet); METSDescriptiveSet descriptiveSet = METSDescriptiveSet.readSQL(document, connection); document.setDocumentMetadata(descriptiveSet); METSStructureSet structureSet = METSStructureSet.readSQL(document, connection); document.setDocumentStructure(structureSet); // indicate that the document is not currently modified document.setChanged(false); return document; } catch (SQLException sqlEx) { System.err.println("AbstractDocument.readSQL(): Failure to load document: " + sqlEx); } return null; } /** * */ public boolean isChanged() { return this.isModified; } public void setChanged(boolean isModified) { this.isModified = isModified; } }