source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/AbstractDocument.java@ 8742

Last change on this file since 8742 was 8742, checked in by kjdon, 19 years ago

changed the import statements for GS3SQLConnection and GS3SQLConnectionFactory to reflect their move to the database package

  • Property svn:keywords set to Author Date Id Revision
File size: 14.7 KB
Line 
1package org.greenstone.gsdl3.gs3build.doctypes;
2
3import java.util.List;
4import java.util.ArrayList;
5import java.util.Iterator;
6import java.util.HashMap;
7import java.util.Map;
8import java.util.Date;
9
10import java.sql.SQLException;
11import java.sql.ResultSet;
12import java.sql.Timestamp;
13
14import java.net.URL;
15
16import org.greenstone.gsdl3.gs3build.metadata.NamespaceFactory;
17import org.greenstone.gsdl3.gs3build.metadata.StructureIdentifierFactory;
18import org.greenstone.gsdl3.gs3build.metadata.GSDL3Namespace;
19import org.greenstone.gsdl3.gs3build.metadata.METSDescriptiveSet;
20import org.greenstone.gsdl3.gs3build.metadata.METSFile;
21import org.greenstone.gsdl3.gs3build.metadata.METSFileSet;
22import org.greenstone.gsdl3.gs3build.metadata.METSHeader;
23import org.greenstone.gsdl3.gs3build.metadata.METSStructure;
24import org.greenstone.gsdl3.gs3build.metadata.METSStructureSet;
25import org.greenstone.gsdl3.gs3build.metadata.METSDivision;
26import org.greenstone.gsdl3.gs3build.metadata.METSNamespace;
27import org.greenstone.gsdl3.gs3build.metadata.MetadataLabel;
28
29import org.greenstone.gsdl3.gs3build.util.MultiMap;
30import org.greenstone.gsdl3.gs3build.database.GS3SQLConnection;
31
32/**
33 * Provide a base-line functionality for the <code>DocumentInterface</code>
34 * class.
35 */
36
37public abstract class AbstractDocument implements DocumentInterface
38{
39 METSFileSet fileSet;
40 METSDescriptiveSet metadata;
41 METSStructureSet structureSet;
42 METSHeader header;
43 DocumentID id;
44 boolean isModified;
45 StructureIdentifierFactory structureIdFactory;
46 java.sql.Timestamp firstDate;
47 java.sql.Timestamp indexDate;
48 java.sql.Timestamp modifiedDate;
49
50 /**
51 * <p>Create a very vanilla document with a given document identifier.</p>
52 * <p>Most commonly used in dealing with loading files using DocumentFactory
53 * or similar.</p>
54 *
55 * @param <code>DocumentID</code> the document identifier
56 */
57 public AbstractDocument(DocumentID id)
58 { this.fileSet = new METSFileSet();
59 this.metadata = new METSDescriptiveSet();
60 this.header = new METSHeader();
61 this.structureSet = new METSStructureSet();
62 this.id = id;
63 this.structureIdFactory = new StructureIdentifierFactory();
64
65 java.util.Date thisDate = new java.util.Date();
66 this.firstDate = new java.sql.Timestamp(thisDate.getTime());
67 this.indexDate = new java.sql.Timestamp(thisDate.getTime());
68 this.modifiedDate = null; // as a signature that the modified date needs finding...
69 }
70
71 /**
72 * Create a basic document from a given <code>URL</code. This is usually the form
73 * called through the recognisers.
74 *
75 * @param <code>URL</code> the URL of the first file in the document package
76 */
77 public AbstractDocument(URL url)
78 { this.fileSet = new METSFileSet();
79 METSFile metsFile = this.fileSet.addFile(url);
80 this.metadata = new METSDescriptiveSet();
81 this.header = new METSHeader();
82 this.structureSet = new METSStructureSet();
83 this.id = null;
84
85 java.util.Date thisDate = new java.util.Date();
86 this.firstDate = new java.sql.Timestamp(thisDate.getTime());
87 this.indexDate = new java.sql.Timestamp(thisDate.getTime());
88
89 METSStructure structure = new METSStructure("All", "All", "Whole Document");
90 METSDivision documentBody = new METSDivision("All", "All", "All", "Whole Document", "Document");
91 structure.addDivision(documentBody);
92 this.structureSet.addStructure(structure);
93 documentBody.addFileReference(metsFile.getID());
94 documentBody.addMetadataReference("DM1");
95 }
96
97 /**
98 * Set the identified for the document. Every document should have
99 * a document number set on its accession, either through metadata
100 * placed upon it internally or externally, or by assignment through
101 * a <code>DocumentIDFactory</code>. Each identifier should be
102 * unique.
103 *
104 * @param <code>DocumentID</code> the document identifier - in XML
105 * terms, the gsdl3:id element.
106 */
107 public void setID(DocumentID id)
108 { this.id = id;
109 this.isModified = true;
110 }
111
112 /**
113 * Get the document identifier - this should be unique to the document,
114 * but care must be taken in the configuration of the collection to
115 * ensure that this is the case.
116 *
117 * @return <code>DocumentID</code> the identifer
118 */
119 public DocumentID getID()
120 { return this.id;
121 }
122
123 /**
124 * Indicate whether this document is indexed.
125 *
126 * @see: DocumentInterface.isIndexed
127 */
128 public boolean isIndexed()
129 { return true;
130 }
131
132 /**
133 * Get the date that this file was modified
134 */
135 public long getFilesDatestamp()
136 { return this.fileSet.getModifiedDatestamp();
137 }
138
139 /**
140 * Get the date that this file was modified
141 */
142 public long getModifiedDatestamp()
143 { if (this.modifiedDate == null) {
144 this.setModifiedDatestamp();
145 }
146 return this.modifiedDate.getTime();
147 }
148
149 /**
150 * Update/set the date of the most recent file modification
151 */
152 public void setModifiedDatestamp()
153 { this.modifiedDate = new java.sql.Timestamp(this.fileSet.getModifiedDatestamp());
154 }
155
156 /**
157 * Get the date that this document was first indexed
158 */
159 public long getAccessionDate()
160 { return this.firstDate.getTime();
161 }
162
163 /**
164 * Get the date that this document was last indexed
165 */
166 public long getLastIndexedDate()
167 { return this.indexDate.getTime();
168 }
169
170 /**
171 * Set the last indexed date for this document;
172 */
173 public void setLastIndexedDate(long timestamp)
174 { this.indexDate = new java.sql.Timestamp(timestamp);
175 }
176
177 /**
178 * Check if this document is in the database already.
179 *
180 * In this simple implementation, the first file in the document's "default"
181 * filegroup is taken to be the canonical file for this document - any document
182 * of the same type with the same canonical file is taken to be a match.
183 *
184 * @return <code>boolean</code> - if a matching document is found in the
185 * database.
186 */
187 public String getDuplicateID(GS3SQLConnection connection)
188 { //String query = "SELECT * FROM document INNER JOIN filegroups ON document.docId=filegroups.docId WHERE DocType=\"" + HTML_DOCUMENT_TYPE + "\"";
189
190 // Query for documents using the same file...
191 String query = "SELECT DocID FROM files INNER JOIN filegroups ON files.FileGroupRef=filegroups.FileGroupRef WHERE (filegroups.FileGroupId=\"default\" AND files.FileLocation=\"" + this.fileSet.getFile(0).getLocation().toString() + "\")";
192 connection.execute(query);
193
194 List docs = new ArrayList();
195 ResultSet results = connection.getResultSet();
196
197 try {
198 if (results != null &&
199 results.first())
200 { do {
201 String value = results.getString("DocID");
202
203 docs.add(value);
204 } while (results.next());
205
206 Iterator docIterator = docs.iterator();
207 while (docIterator.hasNext()) {
208 String docId = docIterator.next().toString();
209 String innerQuery = "SELECT * FROM document WHERE DocID=\"" + docId + "\"";
210 connection.execute(innerQuery);
211 ResultSet innerSet = connection.getResultSet();
212 if (innerSet != null && innerSet.first()) {
213 String docType = innerSet.getString("DocType");
214 if (docType.equals(this.getDocumentType())) {
215 return docId;
216 }
217 }
218 }
219 }
220 }
221 catch (java.sql.SQLException sqlEx) {
222 System.err.println(sqlEx);
223 }
224
225 return "";
226 }
227
228 /**
229 * Obtain the METS header of this document
230 *
231 * @return <code>METSHeader</code> the header
232 */
233 public METSHeader getHeader()
234 { return this.header;
235 }
236
237 /**
238 * Set the METS header for this document.
239 *
240 * @param <code>METSHeader</code> the header
241 */
242 public void setHeader(METSHeader header)
243 { this.header = header;
244 }
245
246 /**
247 * A simple implementation of the isDocumentType function that does <b>not</b> consider
248 * inheritance - it <code>must</code> be extended as required.
249 */
250 public boolean isDocumentType(String type)
251 { return type.equals(this.getDocumentType());
252 }
253
254 public abstract String getDocumentType();
255
256 public abstract String getDocumentText();
257
258 public abstract String getSectionText(String sectionId);
259
260 public String getMETSType()
261 { return "document";
262 }
263
264 /**
265 * @see DocumentInterface:addDocumentMetadata
266 */
267 public void addDocumentMetadata(MetadataLabel label, String value)
268 { // no need to set isModified, as the following call will do it anyway!
269 this.addDocumentMetadata(label.getNamespace(), label.getLabel(), value);
270 }
271
272 /**
273 * @see DocumentInterface:addDocumentMetadata
274 */
275 public void setDocumentMetadata(MetadataLabel label, String value)
276 { // no need to set isModified, as the following call will do it anyway!
277 this.setDocumentMetadata(label.getNamespace(), label.getLabel(), value);
278 }
279
280 /**
281 * @see DocumentInterface:addDocumentMetadata
282 */
283 public void addDocumentMetadata(String namespace, String label, String value)
284 { this.metadata.addMetadata("default", namespace, label, value);
285 this.isModified = true;
286 }
287
288 /**
289 * @see DocumentInterface:addDocumentMetadata
290 */
291 public void setDocumentMetadata(String namespace, String label, String value)
292 { this.metadata.setMetadata("default", namespace, label, value);
293 this.isModified = true;
294 }
295
296 /**
297 * @see DocumentInterface:removeDocumentMetadata
298 */
299 public void removeDocumentMetadata(String namespace, String label)
300 { this.metadata.removeMetadata("default", namespace, label);
301 this.isModified = true;
302 }
303
304 /**
305 *
306 */
307 public void removeAllMetadata(String namespace, String label)
308 { this.metadata.removeAllMetadata(namespace, label);
309 this.isModified = true;
310 }
311
312 /**
313 * Post metadata to a file in this document - the appropriate changes
314 * should be made...
315 */
316 public void postFileMetadata(URL fileLocation, String namespace, String label, String value)
317 {
318 // First get the list of file groups, etc. that this file is associated with...
319 List fileGroups = this.fileSet.findGroups(fileLocation);
320
321 // Next, get the METS divisions associated with each file group...
322 List divisions = this.structureSet.findDivisionsForFiles(fileGroups);
323
324 // Finally, post the metadata to the metadata group associated with each structure
325 Iterator divisionIter = divisions.iterator();
326 while (divisionIter.hasNext())
327 { METSDivision division = (METSDivision) divisionIter.next();
328
329 // get the open namespace for this division
330 METSNamespace namespaceMetadata = division.findNamespace(namespace, true, this.metadata);
331
332 // then post the metadata to it...
333 namespaceMetadata.addMetadata(label, value);
334 }
335 }
336
337 /**
338 * Get the metadata structure of the document
339 *
340 * @return <code>METSDescriptive</code> the metadata holder for the document.
341 */
342 public METSDescriptiveSet getDocumentMetadata()
343 { return this.metadata;
344 }
345
346 /**
347 * Set the metadata structure for this document
348 *
349 * @param <code>METSDescriptive</code> the new metadata holder for the document.
350 */
351 public void setDocumentMetadata(METSDescriptiveSet metadata)
352 { this.metadata = metadata;
353 this.isModified = true;
354 }
355
356 /**
357 * Get the metadata structure of the document
358 *
359 * @return <code>METSStructureSet</code> the metadata holder for the document.
360 */
361 public METSStructureSet getDocumentStructure()
362 { return this.structureSet;
363 }
364
365 public void setDocumentStructure(METSStructureSet structureSet)
366 { this.structureSet = structureSet;
367 }
368
369 /**
370 * Get the values associated with a particular metadata value.
371 *
372 * @param <code>String</code> the namespace to find the values in.
373 * @param <code>String</code> the label to match to find the values.
374 *
375 * @return <code>List</code> the values.
376 */
377 public List getDocumentMetadataItem(String namespace, String label)
378 { return this.metadata.getMetadata("default", namespace, label);
379 }
380
381 /**
382 * Get the values associated with a particular metadata value.
383 *
384 * @param <code>String</code> the namespace and label separated by a
385 * colon.
386 *
387 * @return <code>List</code> the values.
388 */
389 public List getDocumentMetadataItem(String namespaceLabel)
390 { String namespace, label;
391
392 int colonAt = namespaceLabel.indexOf(':');
393 if (colonAt < 0)
394 { namespace = GSDL3Namespace.GSDL3_NAMESPACE_ID;
395 label = namespaceLabel;
396 }
397 else
398 { namespace = namespaceLabel.substring(0, colonAt);
399 label = namespaceLabel.substring(colonAt+1);
400 }
401 return this.metadata.getMetadata("default", namespace, label);
402 }
403
404 /**
405 * @see DocumentInterface:getDocumentFiles
406 */
407 public METSFileSet getDocumentFiles()
408 { return this.fileSet;
409 }
410
411 public void setDocumentFiles(METSFileSet fileSet)
412 { this.fileSet = fileSet;
413 }
414
415 /**
416 * This is just a dummy function that does nothing at this level...
417 */
418 public org.w3c.dom.Document getDOMDocument()
419 { return null;
420 }
421
422 /**
423 * @see DocumentInterface:isMETSCompatible
424 */
425 public boolean isMETSCompatible()
426 { return true;
427 }
428
429 /**
430 * Use a default document writer - this may be overridden for subclasses...
431 *
432 * @see DocumentInterface:writeMETSObject
433 */
434 public DocumentWriter getMETSWriter()
435 { return new DocumentWriter();
436 }
437
438 /**
439 * Use a default SQL document writer - this may be overridden for subclasses...
440 *
441 */
442 public DocumentSQLWriter getSQLWriter()
443 { return new DocumentSQLWriter();
444 }
445
446 /**
447 * Obtain a document from the SQL database
448 */
449 public static AbstractDocument readSQL(GS3SQLConnection connection, ResultSet sqlResult)
450 { try {
451 DocumentID id = new DocumentID(sqlResult.getString("DocID"));
452 String type = sqlResult.getString("docType");
453
454 // Use a factory method to create the correct subtype...
455 AbstractDocument document = DocumentFactory.createDocument(type, id);
456
457 // Append the document date information
458 document.indexDate = sqlResult.getTimestamp("IndexedDate");
459 document.firstDate = sqlResult.getTimestamp("AccessionDate");
460 document.modifiedDate = sqlResult.getTimestamp("ModifiedDate");
461
462 // Get the individual components of the document
463 METSFileSet fileSet = METSFileSet.readSQL(document, connection);
464 document.setDocumentFiles(fileSet);
465 METSDescriptiveSet descriptiveSet = METSDescriptiveSet.readSQL(document, connection);
466 document.setDocumentMetadata(descriptiveSet);
467 METSStructureSet structureSet = METSStructureSet.readSQL(document, connection);
468 document.setDocumentStructure(structureSet);
469
470 // indicate that the document is not currently modified
471 document.setChanged(false);
472 return document;
473 }
474 catch (SQLException sqlEx) {
475 System.out.println("Failure to load document: " + sqlEx);
476 }
477 return null;
478 }
479
480 /**
481 *
482 */
483 public boolean isChanged()
484 { return this.isModified;
485 }
486
487 public void setChanged(boolean isModified)
488 { this.isModified = isModified;
489 }
490}
Note: See TracBrowser for help on using the repository browser.