Changeset 6697
- Timestamp:
- 2004-02-02T16:07:09+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/AbstractDocument.java
r6502 r6697 6 6 import java.util.HashMap; 7 7 import java.util.Map; 8 import java.util.Date; 8 9 9 10 import java.sql.SQLException; 10 11 import java.sql.ResultSet; 12 //import java.sql.Date; 11 13 12 14 import java.net.URL; … … 35 37 public abstract class AbstractDocument implements DocumentInterface 36 38 { 37 METSFileSet fileSet;39 METSFileSet fileSet; 38 40 METSDescriptiveSet metadata; 39 METSStructureSet structureSet;40 METSHeader header;41 DocumentID id;42 boolean isModified;41 METSStructureSet structureSet; 42 METSHeader header; 43 DocumentID id; 44 boolean isModified; 43 45 StructureIdentifierFactory structureIdFactory; 46 java.sql.Date firstDate; 47 java.sql.Date indexDate; 44 48 45 49 /** … … 57 61 this.id = id; 58 62 this.structureIdFactory = new StructureIdentifierFactory(); 63 64 java.util.Date thisDate = new java.util.Date(); 65 this.firstDate = new java.sql.Date(thisDate.getTime()); 66 this.indexDate = new java.sql.Date(thisDate.getTime()); 59 67 } 60 68 … … 73 81 this.id = null; 74 82 83 java.util.Date thisDate = new java.util.Date(); 84 this.firstDate = new java.sql.Date(thisDate.getTime()); 85 this.indexDate = new java.sql.Date(thisDate.getTime()); 86 75 87 METSStructure structure = new METSStructure("All", "All", "Whole Document"); 76 88 METSDivision documentBody = new METSDivision("All", "All", "All", "Whole Document", "Document"); … … 113 125 */ 114 126 public boolean isIndexed() 115 { 116 return true; 127 { return true; 128 } 129 130 /** 131 * Check if this document is in the database already. 132 * 133 * In this simple implementation, the first file in the document's "default" 134 * filegroup is taken to be the canonical file for this document - any document 135 * of the same type with the same canonical file is taken to be a match. 136 * 137 * @return <code>boolean</code> - if a matching document is found in the 138 * database. 139 */ 140 public boolean hasDuplicate(GS3SQLConnection connection) 141 { //String query = "SELECT * FROM document INNER JOIN filegroups ON document.docId=filegroups.docId WHERE DocType=\"" + HTML_DOCUMENT_TYPE + "\""; 142 143 // Query for documents using the same file... 144 String query = "SELECT DocID FROM files INNER JOIN filegroups ON files.FileGroupRef=filegroups.FileGroupRef WHERE (filegroups.FileGroupId=\"default\" AND files.FileLocation=\"" + this.fileSet.getFile(0).getLocation().toString() + "\")"; 145 connection.execute(query); 146 147 List docs = new ArrayList(); 148 ResultSet results = connection.getResultSet(); 149 150 try { 151 if (results != null && 152 results.first()) 153 { do { 154 String value = results.getString("DocID"); 155 156 docs.add(value); 157 } while (results.next()); 158 159 Iterator docIterator = docs.iterator(); 160 while (docIterator.hasNext()) { 161 String docId = docIterator.next().toString(); 162 String innerQuery = "SELECT * FROM document WHERE DocID=\"" + docId + "\""; 163 connection.execute(innerQuery); 164 ResultSet innerSet = connection.getResultSet(); 165 if (innerSet != null && innerSet.first()) { 166 String docType = innerSet.getString("DocType"); 167 if (docType.equals(this.getDocumentType())) { 168 return true; 169 } 170 } 171 } 172 } 173 } 174 catch (java.sql.SQLException sqlEx) { 175 System.err.println(sqlEx); 176 } 177 178 return false; 117 179 } 118 180 … … 330 392 AbstractDocument document = DocumentFactory.createDocument(type, id); 331 393 394 // Append the document date information 395 document.indexDate = sqlResult.getDate("IndexedDate"); 396 document.firstDate = sqlResult.getDate("AccessionDate"); 397 332 398 // Get the individual components of the document 333 399 METSFileSet fileSet = METSFileSet.readSQL(document, connection); … … 343 409 } 344 410 catch (SQLException sqlEx) { 411 System.out.println("Failure to load document: " + sqlEx); 345 412 } 346 413 return null; -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentInterface.java
r6502 r6697 9 9 10 10 import org.greenstone.gsdl3.gs3build.util.MultiMap; 11 import org.greenstone.gsdl3.gs3build.util.GS3SQLConnection; 11 12 12 13 public interface DocumentInterface … … 49 50 public boolean isIndexed(); 50 51 52 /** 53 * Check if the document matches another in the database 54 */ 55 public boolean hasDuplicate(GS3SQLConnection connection); 56 51 57 /** 52 58 * The plain text of the document. -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java
r6346 r6697 280 280 */ 281 281 public void addDocument(DocumentInterface document) 282 { // first cache it... 282 { // initially, test if the document has a duplicate... 283 if (document.hasDuplicate(this.connection)) { 284 System.out.println("Found duplicate document "); 285 return; 286 } 287 288 // first cache it... 283 289 this.cacheDocument(document); 284 290 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentSQLWriter.java
r6010 r6697 39 39 insert.addValue("DocID", document.getID().toString()); 40 40 insert.addValue("DocType", document.getDocumentType()); 41 // TODO: avoid this terrible cast 42 insert.addDate("AccessionDate", ((AbstractDocument) document).firstDate); 41 43 42 44 connection.execute(insert.toString()); -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLDocument.java
r6353 r6697 5 5 import java.net.MalformedURLException; 6 6 7 import java.util.Iterator; 7 8 import java.util.List; 8 9 import java.util.Map;
Note:
See TracChangeset
for help on using the changeset viewer.