source: trunk/gli/src/org/greenstone/gatherer/msm/GDMDocument.java@ 4527

Last change on this file since 4527 was 4515, checked in by kjdon, 21 years ago

now doesn't try to add metadata if there are no metadata sets

  • Property svn:keywords set to Author Date Id Revision
File size: 22.1 KB
Line 
1package org.greenstone.gatherer.msm;
2/**
3 *#########################################################################
4 *
5 * A component of the Gatherer application, part of the Greenstone digital
6 * library suite from the New Zealand Digital Library Project at the
7 * University of Waikato, New Zealand.
8 *
9 * Author: John Thompson, Greenstone Digital Library, University of Waikato
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28import java.io.*;
29import java.util.*;
30import org.greenstone.gatherer.Gatherer;
31import org.greenstone.gatherer.msm.GDMManager;
32import org.greenstone.gatherer.msm.MSMUtils;
33import org.greenstone.gatherer.util.HashMap3D;
34import org.greenstone.gatherer.util.Utility;
35import org.greenstone.gatherer.valuetree.GValueModel;
36import org.greenstone.gatherer.valuetree.GValueNode;
37import org.w3c.dom.*;
38/** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
39 * @author John Thompson, Greenstone Digital Library, University of Waikato
40 * @version 2.3b
41 */
42public class GDMDocument {
43 /** Record if the document this object is based on is up to date. */
44 private boolean up_to_date = true;
45 /** The document this class sources its data from. */
46 private Document base_document;
47 static final private String ACCUMULATE = "accumulate";
48 /** The pattern to match when searching for directory level assignments. */
49 static final private String DIRECTORY_FILENAME = ".*";
50 static final private String DESCRIPTION_ELEMENT = "Description";
51 static final private String FILENAME_ELEMENT = "FileName";
52 static final private String FILESET_ELEMENT = "FileSet";
53 static final private String HVALUE_ATTRIBUTE = "hvalue";
54 static final private String METADATA_ELEMENT = "Metadata";
55 static final private String MODE_ATTRIBUTE = "mode";
56 static final private String NAME_ATTRIBUTE = "name";
57 static final private String OVERWRITE = "overwrite";
58 /** Constructor which creates a brand new metadata.xml document. */
59 public GDMDocument() {
60 // Create new document. We do this by loading a copy of the template. */
61 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
62 }
63 /** Constructor which parses an existing metadata.xml document. */
64 public GDMDocument(File file) {
65 try {
66 this.base_document = Utility.parse(file.getAbsolutePath(), false);
67 }
68 catch (Exception error) {
69 // Poorly formed, or completely invalid metadata.xml file!
70 }
71 }
72 /** Constructor which wraps around an existing metadata.xml document. */
73 public GDMDocument(Document base_document) {
74 this.base_document = base_document;
75 }
76 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! */
77 public void addMetadata(String filename, Metadata metadata) {
78 ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
79 try {
80 // Retrieve the document element.
81 Element directorymetadata_element = base_document.getDocumentElement();
82 // Iterate through the filesets looking for one that matches the given filename.
83 Element fileset_element = null;
84 boolean found = false;
85 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
86 for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
87 fileset_element = (Element) fileset_elements.item(i);
88 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
89 for(int j = 0; !found && j < filename_elements.getLength(); j++) {
90 Element filename_element = (Element) filename_elements.item(j);
91 String filename_pattern = MSMUtils.getValue(filename_element);
92 // Have we found a match. If so break out of for loop.
93 if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) {
94 ///ystem.err.println("Adding to existing file fileset!");
95 found = true;
96 }
97 else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
98 ///ystem.err.println("Adding to existing folder fileset!");
99 ///ystem.err.println("filename_pattern = '" + filename_pattern + "'");
100 found = true;
101 }
102 // No match. On to the next one.
103 else {
104 fileset_element = null;
105 }
106 filename_pattern = null;
107 filename_element = null;
108 }
109 }
110 fileset_elements = null;
111 // If we still haven't found an existing fileset, then its time to create one.
112 if(fileset_element == null) {
113 ///ystem.err.println("Creating a new fileset.");
114 fileset_element = base_document.createElement(FILESET_ELEMENT);
115 Element filename_element = base_document.createElement(FILENAME_ELEMENT);
116 Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
117 fileset_element.appendChild(filename_element);
118 fileset_element.appendChild(description_element);
119 Text filename_text = null;
120 // If the filename is null then we add a directory metadata set as directorymetadata_element's first child
121 if(filename == null) {
122 filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
123 if(directorymetadata_element.hasChildNodes()) {
124 directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild());
125 }
126 else {
127 directorymetadata_element.appendChild(fileset_element);
128 }
129 }
130 // Otherwise we just append the new fileset to directorymetadata_element's children.
131 else {
132 filename_text = base_document.createTextNode(filename);
133 directorymetadata_element.appendChild(fileset_element);
134 }
135 filename_element.appendChild(filename_text);
136 filename_text = null;
137 description_element = null;
138 filename_element = null;
139 }
140 // Now, finally, we can add the metadata.
141 Element metadata_element = base_document.createElement(METADATA_ELEMENT);
142 metadata_element.setAttribute(NAME_ATTRIBUTE, metadata.getElement().getName());
143
144 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
145 boolean will_accumulate = false;
146 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
147 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
148 Element sibling_description_element = (Element) sibling_description_elements.item(k);
149 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(METADATA_ELEMENT);
150 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
151 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
152 will_accumulate = sibling_metadata_element.getAttribute(NAME_ATTRIBUTE).equals(metadata_element.getAttribute(NAME_ATTRIBUTE));
153 sibling_metadata_element = null;
154 }
155 sibling_metadata_elements = null;
156 sibling_description_element = null;
157 }
158 sibling_description_elements = null;
159 if(will_accumulate) { //mode.equals(ACCUMULATE)) {
160 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
161 }
162 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm adding a new attribute called hvalue which indicates the hierarchy value path as a '\' separated string.
163 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
164 if(model != null && model.isHierarchy()) {
165 metadata_element.setAttribute(HVALUE_ATTRIBUTE, metadata.getValueNode().getFullPath());
166 }
167 metadata_element.appendChild(base_document.createTextNode(metadata.getAbsoluteValue()));
168 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
169 NodeList description_elements = fileset_element.getElementsByTagName("Description");
170 Element description_element = (Element) description_elements.item(0);
171 description_element.appendChild(metadata_element);
172 description_element = null;
173 metadata_element = null;
174 //mode = null;
175 fileset_element = null;
176 directorymetadata_element = null;
177 up_to_date = false;
178 }
179 catch (Exception error) {
180 Gatherer.printStackTrace(error);
181 }
182 }
183
184 /** Retrieve the document this class is wrapping. */
185 public Document getDocument() {
186 return base_document;
187 }
188 /** Get all of the metadata, including directory level, associated with this file. */
189 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file) {
190 return getMetadata(filename, remove, metadatum_so_far, file, false);
191 }
192 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */
193 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean purge) {
194 ///ystem.err.println("Get metadata for " + filename);
195 ArrayList metadatum = null;
196 if(metadatum_so_far == null) {
197 metadatum = new ArrayList();
198 }
199 else {
200 metadatum = metadatum_so_far;
201 }
202 try {
203 // Retrieve the document element.
204 Element directorymetadata_element = base_document.getDocumentElement();
205 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
206 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
207 for(int i = 0; i < fileset_elements.getLength(); i++) {
208 Element fileset_element = (Element) fileset_elements.item(i);
209 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
210 for(int j = 0; j < filename_elements.getLength(); j++) {
211 Element filename_element = (Element) filename_elements.item(j);
212 String filename_text = MSMUtils.getValue(filename_element);
213 if((filename != null && filename.matches(filename_text)) || filename_text.equals(DIRECTORY_FILENAME) || purge) {
214 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite).
215 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
216 for(int k = 0; k < description_elements.getLength(); k++) {
217 Element description_element = (Element) description_elements.item(k);
218 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT);
219 for(int l = 0; l < metadata_elements.getLength(); l++) {
220 Element metadata_element = (Element) metadata_elements.item(l);
221 String raw_element = metadata_element.getAttribute(NAME_ATTRIBUTE);
222 //String language = metadata_element.getAttribute("language");
223 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
224 String raw_value = metadata_element.getAttribute(HVALUE_ATTRIBUTE);
225 if(raw_value == null || raw_value.length() == 0) {
226 raw_value = MSMUtils.getValue(metadata_element);
227 }
228 // Using the element string and value, retrieve a matching Metadata object from the cache
229 Metadata metadata = null;
230 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
231 if(GDMManager.metadata_cache.contains(raw_element, raw_value)) {
232 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
233 metadata = (Metadata) GDMManager.metadata_cache.get(raw_element, raw_value);
234 }
235 else {
236 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
237 if (element != null) {
238
239 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
240 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
241 metadata = new Metadata(element, value);
242 GDMManager.metadata_cache.put(raw_element, raw_value, metadata);
243 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
244 value = null;
245 element = null;
246 }
247 }
248 // check whether the metadata is null
249 if (metadata != null) {
250 // We determine whether this metadata is file or folder level
251 if(filename != null) {
252 ///ystem.err.println("Filename = " + filename);
253 ///ystem.err.println("filename_text = " + filename_text);
254 // If can only be file level if there is no folder path details in filename and if the filename matched the filename text node (it may have matched .* instead)!
255 if(filename.indexOf(File.separator) == -1 && filename.equals(filename_text)) {
256 metadata.setFileLevel(true);
257 ///ystem.err.println("File level!!!");
258 }
259 else {
260 metadata.setFileLevel(false);
261 ///ystem.err.println("Inherited!!!");
262 }
263 }
264 else {
265 ///ystem.err.println("Filename is null therefore this is file level metadata.");
266 metadata.setFileLevel(true);
267 }
268 metadata.setFile(file);
269
270 // If mode is overwrite, then remove any previous values for this metadata element.
271 if(mode.equals("accumulate")) {
272 metadata.setAccumulate(true);
273 }
274 else {
275 metadata.setAccumulate(false);
276 ///ystem.err.println("Metadata overwrites: " + metadata);
277 for(int m = metadatum.size() - 1; m >= 0; m--) {
278 Metadata old_metadata = (Metadata) metadatum.get(m);
279 if(old_metadata.getElement().equals(metadata.getElement())) {
280 metadatum.remove(m);
281 ///ystem.err.println("Removing overridden metadata: " + old_metadata);
282 }
283 old_metadata = null;
284 }
285 }
286 mode = null;
287
288 // Add the completed metadata and clean up
289 ///ystem.err.println("Adding metadata: " + metadata);
290 metadatum.add(metadata);
291
292 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
293 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
294 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
295 ///ystem.err.println("Removing " + metadata + " from " + file);
296 description_element.removeChild(metadata_element);
297 // Remove the description element if empty.
298 if(!description_element.hasChildNodes()) {
299 fileset_element.removeChild(description_element);
300 }
301 }
302 else {
303 String current_value = metadata.getAbsoluteValue();
304 if(!raw_value.equals(current_value)) {
305 // Remove old text
306 while(metadata_element.hasChildNodes()) {
307 metadata_element.removeChild(metadata_element.getFirstChild());
308 }
309 // Add new.
310 metadata_element.appendChild(base_document.createTextNode(current_value));
311 }
312 }
313 }
314 metadata = null;
315 raw_value = null;
316 raw_element = null;
317 metadata_element = null;
318 }
319 metadata_elements = null;
320 description_element = null;
321 }
322 description_elements = null;
323 }
324 filename_text = null;
325 filename_element = null;
326 }
327 // If the file set no longer has any description entries, remove it entirely
328 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
329 if(description_elements.getLength() == 0) {
330 directorymetadata_element.removeChild(fileset_element);
331 }
332 description_elements = null;
333 filename_elements = null;
334 fileset_element = null;
335 }
336 fileset_elements = null;
337 directorymetadata_element = null;
338 }
339 catch (Exception error) {
340 Gatherer.self.printStackTrace(error);
341 }
342 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
343 return metadatum;
344 }
345
346 /** Determine if this document has been saved recently, and thus xml file version is up to date. */
347 public boolean isUpToDate() {
348 return false;
349 }
350
351 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
352 public boolean isValid() {
353 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
354 String doctype_name = base_document.getDoctype().getName();
355 String root_name = base_document.getDocumentElement().getTagName();
356 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
357 }
358 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
359 public void removeMetadata(String filename, Metadata metadata) {
360 try {
361 boolean found = false;
362 boolean first_metadata_element_found = true;
363 boolean make_next_metadata_element_overwrite = false;
364 // Retrieve the document element.
365 Element directorymetadata_element = base_document.getDocumentElement();
366 // Iterate through the filesets looking for the directory level one.
367 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
368 for(int i = 0; i < fileset_elements.getLength(); i++) {
369 Element fileset_element = (Element) fileset_elements.item(i);
370 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
371 for(int j = 0; j < filename_elements.getLength(); j++) {
372 Element filename_element = (Element) filename_elements.item(j);
373 String filename_text = MSMUtils.getValue(filename_element);
374 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
375 // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove.
376 NodeList description_elements = fileset_element.getElementsByTagName("Description");
377 for(int k = 0; k < description_elements.getLength(); k++) {
378 Element description_element = (Element) description_elements.item(k);
379 NodeList metadata_elements = description_element.getElementsByTagName("Metadata");
380 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
381 Element metadata_element = (Element) metadata_elements.item(l);
382 String element = metadata_element.getAttribute("name");
383 String value = MSMUtils.getValue(metadata_element);
384 // See if this is the metadata we wish to remove
385 if(element.equals(metadata.getElement().getName())) {
386 if(value.equals(metadata.getAbsoluteValue())) {
387 // Remove it
388 ///ystem.err.println("Remove " + element + "-" + value);
389 description_element.removeChild(metadata_element);
390 found = true;
391 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
392 if(first_metadata_element_found && !metadata.accumulates()) {
393 ///ystem.err.println("First of this element found!");
394 make_next_metadata_element_overwrite = true;
395 }
396 }
397 // If this was the first metadata we've found with the element of the one to be removed set first found to false.
398 else if(first_metadata_element_found) {
399 ///ystem.err.println("Found a matching element: " + element + "=" + value);
400 first_metadata_element_found = false;
401 }
402 // Otherwise we should make this metadata overwrite as requested.
403 else if(make_next_metadata_element_overwrite) {
404 ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
405 metadata_element.setAttribute(MODE_ATTRIBUTE, "");
406 }
407 }
408 value = null;
409 element = null;
410 metadata_element = null;
411 }
412 metadata_elements = null;
413 description_element = null;
414 }
415 description_elements = null;
416 }
417 filename_text = null;
418 filename_element = null;
419 }
420 filename_elements = null;
421 fileset_element = null;
422 }
423 fileset_elements = null;
424 directorymetadata_element = null;
425 up_to_date = false;
426 }
427 catch (Exception error) {
428 Gatherer.printStackTrace(error);
429 }
430 }
431
432 /** Change the up to date flag. */
433 public void setUpToDate(boolean up_to_date) {
434 this.up_to_date = up_to_date;
435 }
436
437 /** Decode a string that was previously made Perl safe. */
438 private String decode(String safe) {
439 return safe.replaceAll("\\\\.",".");
440 }
441
442 /** Encodes unsafe filename characters (such as the . before the file extension) into Perl safe ones. */
443 private String encode(String dangerous) {
444 return dangerous.replaceAll("\\.", "\\\\.");
445 }
446}
Note: See TracBrowser for help on using the repository browser.