Changeset 8408
- Timestamp:
- 2004-10-22T13:56:15+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build
- Files:
-
- 1 added
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/BuildManager.java
r7210 r8408 111 111 // TODO: add expansion (e.g. Zip files) 112 112 113 // Crawl the file tree - will recognise documents 113 114 for (int i = 0; i < this.inputRoots.size(); i ++) 114 115 { FileCrawler fileCrawler = new FileCrawler(new File((String) this.inputRoots.get(i)), recogniserManager); … … 116 117 fileCrawler.crawl(); 117 118 } 119 120 // Extract phase, etc. 118 121 this.extractorManager.extractDocuments(); 119 122 this.classifierManager.classifyDocuments(); 120 123 this.indexerManager.indexDocuments(); 124 125 // Timestamp management - update all timestamps on modified dates... 126 // 127 // This should only occur at the end of building in case the build is cancelled... 128 docList.updateTimestamps(this.collectionManager.getBuildDate()); 121 129 122 130 // TODO: validation phase -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/CollectionManager.java
r7478 r8408 155 155 this.siteHome = GSFile.siteHome(gsdl3Root, site); 156 156 File site_dir = new File(this.siteHome); 157 System.out.println(site_dir); 157 158 if (!site_dir.exists()) { 158 159 System.out.println("Error: Non-existant site ("+site+") specified"); … … 378 379 { 379 380 return this.database; 381 } 382 383 public Date getBuildDate() 384 { return this.lastBuildDate.getTime(); 380 385 } 381 386 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/ClassifierManager.java
r7470 r8408 85 85 } 86 86 87 if (document.is Modified()) {87 if (document.isChanged()) { 88 88 // System.out.println("Writing document " + document.getID()); 89 this.documents. modifiedDocument(document);89 this.documents.storeChangedDocument(document); 90 90 } 91 91 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/classifier/GS2HierarchyClassifier.java
r7470 r8408 37 37 public void recordClassification(String label) 38 38 { this.document.addDocumentMetadata("gsdl3", "classified", label); 39 this.document.set Modified(true);39 this.document.setChanged(true); 40 40 // System.out.println("Assigned document " + this.document.getID().toString() + " to " + label); 41 41 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/AbstractDocument.java
r7466 r8408 46 46 java.sql.Timestamp firstDate; 47 47 java.sql.Timestamp indexDate; 48 java.sql.Timestamp modifiedDate; 48 49 49 50 /** … … 65 66 this.firstDate = new java.sql.Timestamp(thisDate.getTime()); 66 67 this.indexDate = new java.sql.Timestamp(thisDate.getTime()); 68 this.modifiedDate = null; // as a signature that the modified date needs finding... 67 69 } 68 70 … … 131 133 * Get the date that this file was modified 132 134 */ 135 public long getFilesDatestamp() 136 { return this.fileSet.getModifiedDatestamp(); 137 } 138 139 /** 140 * Get the date that this file was modified 141 */ 133 142 public long getModifiedDatestamp() 134 { return this.fileSet.getModifiedDatestamp(); 143 { if (this.modifiedDate == null) { 144 this.setModifiedDatestamp(); 145 } 146 return this.modifiedDate.getTime(); 147 } 148 149 /** 150 * Update/set the date of the most recent file modification 151 */ 152 public void setModifiedDatestamp() 153 { this.modifiedDate = new java.sql.Timestamp(this.fileSet.getModifiedDatestamp()); 154 } 155 156 /** 157 * Get the date that this document was first indexed 158 */ 159 public long getAccessionDate() 160 { return this.firstDate.getTime(); 161 } 162 163 /** 164 * Get the date that this document was last indexed 165 */ 166 public long getLastIndexedDate() 167 { return this.indexDate.getTime(); 168 } 169 170 /** 171 * Set the last indexed date for this document; 172 */ 173 public void setLastIndexedDate(long timestamp) 174 { this.indexDate = new java.sql.Timestamp(timestamp); 135 175 } 136 176 … … 145 185 * database. 146 186 */ 147 public boolean hasDuplicate(GS3SQLConnection connection)187 public String getDuplicateID(GS3SQLConnection connection) 148 188 { //String query = "SELECT * FROM document INNER JOIN filegroups ON document.docId=filegroups.docId WHERE DocType=\"" + HTML_DOCUMENT_TYPE + "\""; 149 189 … … 173 213 String docType = innerSet.getString("DocType"); 174 214 if (docType.equals(this.getDocumentType())) { 175 return true;215 return docId; 176 216 } 177 217 } … … 183 223 } 184 224 185 return false;225 return ""; 186 226 } 187 227 … … 416 456 417 457 // Append the document date information 418 document.indexDate = sqlResult.getTimestamp("IndexedDate"); 419 document.firstDate = sqlResult.getTimestamp("AccessionDate"); 458 document.indexDate = sqlResult.getTimestamp("IndexedDate"); 459 document.firstDate = sqlResult.getTimestamp("AccessionDate"); 460 document.modifiedDate = sqlResult.getTimestamp("ModifiedDate"); 420 461 421 462 // Get the individual components of the document … … 428 469 429 470 // indicate that the document is not currently modified 430 document.set Modified(false);471 document.setChanged(false); 431 472 return document; 432 473 } … … 440 481 * 441 482 */ 442 public boolean is Modified()483 public boolean isChanged() 443 484 { return this.isModified; 444 485 } 445 486 446 public void set Modified(boolean isModified)487 public void setChanged(boolean isModified) 447 488 { this.isModified = isModified; 448 489 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentInterface.java
r7466 r8408 51 51 52 52 /** 53 * Get the date that this file was modified 53 * Get the latest date that a component file was modified... 54 */ 55 public long getFilesDatestamp(); 56 57 /** 58 * Get the stored modified date for the files datestamp... 54 59 */ 55 60 public long getModifiedDatestamp(); 56 61 57 62 /** 63 * 64 */ 65 public void setModifiedDatestamp(); 66 67 /** 68 * Get the date that this document was first indexed 69 */ 70 public long getAccessionDate(); 71 72 /** 73 * Get the date that this document was last indexed 74 */ 75 public long getLastIndexedDate(); 76 77 /** 78 * Set the last indexed date for this document; 79 */ 80 public void setLastIndexedDate(long date); 81 82 /** 58 83 * Check if the document matches another in the database 59 84 */ 60 public boolean hasDuplicate(GS3SQLConnection connection);85 public String getDuplicateID(GS3SQLConnection connection); 61 86 62 87 /** … … 249 274 * Check if the document is changed or not 250 275 */ 251 public boolean is Modified();276 public boolean isChanged(); 252 277 253 278 /** 254 279 * Set the document modified state 255 280 */ 256 public void set Modified(boolean isModified);281 public void setChanged(boolean isModified); 257 282 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentList.java
r7190 r8408 52 52 } 53 53 54 54 /** 55 * Obtain the list of <code>DocumentID</code> objects representing the unique 56 * document identifiers of documents that refer to the file given as a parameter. 57 * 58 * @param <code>URL</code> the location of the file to match 59 * 60 * @return <code>List</code> of <code>DocumentID</code> reference identifiers. 61 */ 55 62 public List getDocumentIdsWithFile(URL fileLocation) 56 63 { List reply = new ArrayList(); … … 104 111 * Get a list of documents that match a given set of patterns, 105 112 * within a given URL node. 113 * 114 * @param <code>List</code> the list of patterns to match 115 * @param <code>String</code> the partial URL of the root node under which o 116 * match files. NB: this is a <code>String</code> as the URL may be 117 * incomplete and not properly match the strict requirements for <code>URL</code> 106 118 */ 107 119 public List findDocumentIdsUsingFiles(List fileRefs, String withinNode) … … 137 149 } 138 150 151 /** 152 * Return a list of document identifiers against a simple pattern. No root node is given, so 153 * any file matching the pattern given will be returned. USE WITH CAUTION!!! 154 * 155 * @param <code>String</code> a fragment of file pathname to match against. 156 * 157 * @return <code>List</code> of <code>DocumentID</code> objects. 158 */ 139 159 public List findDocumentIdsUsingFile(String fileRef) 140 160 { … … 290 310 public void addDocument(DocumentInterface document) 291 311 { // initially, test if the document has a duplicate... 292 if (document.hasDuplicate(this.connection)) { 312 String duplicateDocID = document.getDuplicateID(this.connection); 313 if (duplicateDocID.length() > 0) { 293 314 System.out.println("Found duplicate document "); 294 315 return; … … 305 326 306 327 // add to the database as well, if it is modified... 307 if (document.is Modified()) {328 if (document.isChanged()) { 308 329 document.getSQLWriter().writeDocument(document, this.connection); 309 330 } … … 321 342 * @param <code>DocumentInterface</code> the document 322 343 */ 323 public void modifiedDocument(DocumentInterface document)344 public void storeChangedDocument(DocumentInterface document) 324 345 { document.getSQLWriter().writeDocument(document, this.connection); 325 346 } … … 350 371 } 351 372 373 /** 374 * Simple "obtain a document" function 375 */ 352 376 public DocumentInterface getDocument(DocumentID documentId) 353 377 { … … 360 384 361 385 /** 362 public DocumentID getDocumentID(int index) 363 { if (index < 0 || index >= this.used) 364 { return null; 365 } 366 return this.list[index].getID(); 367 } 368 */ 369 386 * Update timestamps on an entire document list - done at the beginning of a build cycle 387 * 388 * @param <code>The date of the new build cycle</code> 389 */ 390 public void updateTimestamps(java.util.Date time) 391 { Iterator documents = this.iterator(); 392 int item = 0; 393 394 while (documents.hasNext()) 395 { DocumentInterface document = (DocumentInterface) documents.next(); 396 397 long thisTimeStamp = document.getFilesDatestamp(); 398 long lastTimeStamp = document.getModifiedDatestamp(); 399 400 if (thisTimeStamp > lastTimeStamp) { 401 System.out.println("Updating timestamps " + thisTimeStamp + " " + lastTimeStamp); 402 403 DocumentSQLWriter.touchDocument(document.getID(), this.connection, time.getTime()); 404 } 405 } 406 } 407 408 /** 409 * A convenience method to map onto the old Vector source code... 410 */ 370 411 protected void ensureSize(int size) 371 412 { DocumentInterface [] newList = new DocumentInterface[size]; … … 375 416 } 376 417 418 /** 419 * Write the documents into a directory as METS/XML 420 */ 377 421 public void writeDocuments(File directory) 378 422 { Iterator documents = this.iterator(); -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/DocumentSQLWriter.java
r6697 r8408 13 13 public DocumentSQLWriter() 14 14 { 15 } 16 17 public static boolean touchDocument(DocumentID docID, GS3SQLConnection connection, long touchTime) 18 { GS3SQLUpdate update = new GS3SQLUpdate("document"); 19 update.setWhere(new GS3SQLWhere(new GS3SQLWhereItem("DocID", "=", docID.toString()))); 20 update.addDate("IndexedDate", new java.sql.Timestamp(touchTime)); 21 connection.execute(update.toString()); 22 System.out.println(update.toString()); 23 return true; 15 24 } 16 25 … … 39 48 insert.addValue("DocID", document.getID().toString()); 40 49 insert.addValue("DocType", document.getDocumentType()); 41 // TODO: avoid this terrible cast 42 insert.addDate("AccessionDate", ((AbstractDocument) document).firstDate); 50 51 insert.addDate("AccessionDate", new java.sql.Timestamp(document.getAccessionDate())); 52 insert.addDate("IndexedDate", new java.sql.Timestamp(document.getLastIndexedDate())); 53 insert.addDate("ModifiedDate", new java.sql.Timestamp(document.getModifiedDatestamp())); 43 54 44 55 connection.execute(insert.toString()); 56 } 57 else { 58 /* redundant code - not used... */ 59 GS3SQLUpdate update = new GS3SQLUpdate("document"); 60 update.setWhere(new GS3SQLWhere(new GS3SQLWhereItem("DocID", "=", document.getID().toString()))); 61 connection.execute(update.toString()); 45 62 } 46 63 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/IndexExtractor.java
r6379 r8408 171 171 172 172 System.out.println("Writing modified document " + document.getID()); 173 documentList. modifiedDocument(document);173 documentList.storeChangedDocument(document); 174 174 } 175 175 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/extractor/MetaXMLExtractor.java
r6503 r8408 103 103 104 104 // System.out.println("Writing modified document " + document.getID()); 105 documentList. modifiedDocument(document);105 documentList.storeChangedDocument(document); 106 106 } 107 107 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/IndexerManager.java
r6897 r8408 14 14 int size; 15 15 int used; 16 DocumentList document s;16 DocumentList documentList; 17 17 18 18 public static final String outputDir = "outputDir"; … … 24 24 this.size = 10; 25 25 this.used = 0; 26 this.document s= documentList;26 this.documentList = documentList; 27 27 } 28 28 … … 50 50 continue; 51 51 } 52 Iterator iterator = this.document s.iterator();52 Iterator iterator = this.documentList.iterator(); 53 53 54 54 while (iterator.hasNext()) { … … 61 61 62 62 // note any changes made to this document... 63 if (document.is Modified()) {64 this.document s.modifiedDocument(document);63 if (document.isChanged()) { 64 this.documentList.storeChangedDocument(document); 65 65 // System.out.println("Writing document "+document.getID()); 66 66 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/indexers/MGIndexer.java
r7583 r8408 229 229 230 230 descriptive.addMetadata("gsdl3", "mgseqno", this.overallName + "." + Integer.toString(this.sectionSeqNo)); 231 metsDoc.set Modified(true);231 metsDoc.setChanged(true); 232 232 // System.out.println("Assigning " + this.sectionSeqNo + " to " + metsDoc.getID() + " " + division.getLabel()); 233 233 } // section level -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/metadata/METSFile.java
r7465 r8408 296 296 297 297 /** 298 * 298 * Get modified file date 299 299 */ 300 300 public long getModifiedDatestamp() -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/GS3SQLConnection.java
r7306 r8408 201 201 docTable.addProperty("AccessionDate", GS3SQLField.DATETIME_TYPE); 202 202 docTable.addProperty("IndexedDate", GS3SQLField.DATETIME_TYPE); 203 docTable.addProperty("ModifiedDate", GS3SQLField.DATETIME_TYPE); 203 204 statement = this.connection.createStatement(); 204 205 statement.execute(docTable.toString());
Note:
See TracChangeset
for help on using the changeset viewer.