source: trunk/gli/src/org/greenstone/gatherer/msm/GDMDocument.java@ 4399

Last change on this file since 4399 was 4399, checked in by jmt12, 21 years ago

2030087: Fixed both the problem with adding metadata where an existing fileset element was found but not used, and the bug where removing the first (and hence overwriting) entry of several metadata entries with the same element caused inherited metadata to be, um, inherited where the second entry should have still been overwriting.

  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1package org.greenstone.gatherer.msm;
2/**
3 *#########################################################################
4 *
5 * A component of the Gatherer application, part of the Greenstone digital
6 * library suite from the New Zealand Digital Library Project at the
7 * University of Waikato, New Zealand.
8 *
9 * Author: John Thompson, Greenstone Digital Library, University of Waikato
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28import java.io.*;
29import java.util.*;
30import org.greenstone.gatherer.Gatherer;
31import org.greenstone.gatherer.msm.GDMManager;
32import org.greenstone.gatherer.msm.MSMUtils;
33import org.greenstone.gatherer.util.HashMap3D;
34import org.greenstone.gatherer.util.Utility;
35import org.greenstone.gatherer.valuetree.GValueModel;
36import org.greenstone.gatherer.valuetree.GValueNode;
37import org.w3c.dom.*;
38/** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
39 * @author John Thompson, Greenstone Digital Library, University of Waikato
40 * @version 2.3b
41 */
42public class GDMDocument {
43 /** Record if the document this object is based on is up to date. */
44 private boolean up_to_date = true;
45 /** The document this class sources its data from. */
46 private Document base_document;
47 static final private String ACCUMULATE = "accumulate";
48 /** The pattern to match when searching for directory level assignments. */
49 static final private String DIRECTORY_FILENAME = ".*";
50 static final private String DESCRIPTION_ELEMENT = "Description";
51 static final private String FILENAME_ELEMENT = "FileName";
52 static final private String FILESET_ELEMENT = "FileSet";
53 static final private String HVALUE_ATTRIBUTE = "hvalue";
54 static final private String METADATA_ELEMENT = "Metadata";
55 static final private String MODE_ATTRIBUTE = "mode";
56 static final private String NAME_ATTRIBUTE = "name";
57 static final private String OVERWRITE = "overwrite";
58 /** Constructor which creates a brand new metadata.xml document. */
59 public GDMDocument() {
60 // Create new document. We do this by loading a copy of the template. */
61 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
62 }
63 /** Constructor which parses an existing metadata.xml document. */
64 public GDMDocument(File file) {
65 try {
66 this.base_document = Utility.parse(file.getAbsolutePath(), false);
67 }
68 catch (Exception error) {
69 // Poorly formed, or completely invalid metadata.xml file!
70 }
71 }
72 /** Constructor which wraps around an existing metadata.xml document. */
73 public GDMDocument(Document base_document) {
74 this.base_document = base_document;
75 }
76 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! */
77 public void addMetadata(String filename, Metadata metadata) {
78 ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
79 try {
80 // Retrieve the document element.
81 Element directorymetadata_element = base_document.getDocumentElement();
82 // Iterate through the filesets looking for one that matches the given filename.
83 Element fileset_element = null;
84 boolean found = false;
85 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
86 for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
87 fileset_element = (Element) fileset_elements.item(i);
88 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
89 for(int j = 0; !found && j < filename_elements.getLength(); j++) {
90 Element filename_element = (Element) filename_elements.item(j);
91 String filename_pattern = MSMUtils.getValue(filename_element);
92 // Have we found a match. If so break out of for loop.
93 if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) {
94 ///ystem.err.println("Adding to existing file fileset!");
95 found = true;
96 }
97 else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
98 ///ystem.err.println("Adding to existing folder fileset!");
99 ///ystem.err.println("filename_pattern = '" + filename_pattern + "'");
100 found = true;
101 }
102 // No match. On to the next one.
103 else {
104 fileset_element = null;
105 }
106 filename_pattern = null;
107 filename_element = null;
108 }
109 }
110 fileset_elements = null;
111 // If we still haven't found an existing fileset, then its time to create one.
112 if(fileset_element == null) {
113 ///ystem.err.println("Creating a new fileset.");
114 fileset_element = base_document.createElement(FILESET_ELEMENT);
115 Element filename_element = base_document.createElement(FILENAME_ELEMENT);
116 Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
117 fileset_element.appendChild(filename_element);
118 fileset_element.appendChild(description_element);
119 Text filename_text = null;
120 // If the filename is null then we add a directory metadata set as directorymetadata_element's first child
121 if(filename == null) {
122 filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
123 if(directorymetadata_element.hasChildNodes()) {
124 directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild());
125 }
126 else {
127 directorymetadata_element.appendChild(fileset_element);
128 }
129 }
130 // Otherwise we just append the new fileset to directorymetadata_element's children.
131 else {
132 filename_text = base_document.createTextNode(filename);
133 directorymetadata_element.appendChild(fileset_element);
134 }
135 filename_element.appendChild(filename_text);
136 filename_text = null;
137 description_element = null;
138 filename_element = null;
139 }
140 // Now, finally, we can add the metadata.
141 Element metadata_element = base_document.createElement(METADATA_ELEMENT);
142 metadata_element.setAttribute(NAME_ATTRIBUTE, metadata.getElement().getName());
143
144 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
145 boolean will_accumulate = false;
146 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
147 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
148 Element sibling_description_element = (Element) sibling_description_elements.item(k);
149 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(METADATA_ELEMENT);
150 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
151 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
152 will_accumulate = sibling_metadata_element.getAttribute(NAME_ATTRIBUTE).equals(metadata_element.getAttribute(NAME_ATTRIBUTE));
153 sibling_metadata_element = null;
154 }
155 sibling_metadata_elements = null;
156 sibling_description_element = null;
157 }
158 sibling_description_elements = null;
159 if(will_accumulate) { //mode.equals(ACCUMULATE)) {
160 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
161 }
162 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm adding a new attribute called hvalue which indicates the hierarchy value path as a '\' separated string.
163 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
164 if(model != null && model.isHierarchy()) {
165 metadata_element.setAttribute(HVALUE_ATTRIBUTE, metadata.getValueNode().getFullPath());
166 }
167 metadata_element.appendChild(base_document.createTextNode(metadata.getAbsoluteValue()));
168 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
169 NodeList description_elements = fileset_element.getElementsByTagName("Description");
170 Element description_element = (Element) description_elements.item(0);
171 description_element.appendChild(metadata_element);
172 description_element = null;
173 metadata_element = null;
174 //mode = null;
175 fileset_element = null;
176 directorymetadata_element = null;
177 up_to_date = false;
178 }
179 catch (Exception error) {
180 Gatherer.printStackTrace(error);
181 }
182 }
183
184 /** Retrieve the document this class is wrapping. */
185 public Document getDocument() {
186 return base_document;
187 }
188 /** Get all of the metadata, including directory level, associated with this file. */
189 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file) {
190 return getMetadata(filename, remove, metadatum_so_far, file, false);
191 }
192 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */
193 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean purge) {
194 ///ystem.err.println("Get metadata for " + filename);
195 ArrayList metadatum = null;
196 if(metadatum_so_far == null) {
197 metadatum = new ArrayList();
198 }
199 else {
200 metadatum = metadatum_so_far;
201 }
202 try {
203 // Retrieve the document element.
204 Element directorymetadata_element = base_document.getDocumentElement();
205 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
206 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
207 for(int i = 0; i < fileset_elements.getLength(); i++) {
208 Element fileset_element = (Element) fileset_elements.item(i);
209 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
210 for(int j = 0; j < filename_elements.getLength(); j++) {
211 Element filename_element = (Element) filename_elements.item(j);
212 String filename_text = MSMUtils.getValue(filename_element);
213 if((filename != null && filename.matches(filename_text)) || filename_text.equals(DIRECTORY_FILENAME) || purge) {
214 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite).
215 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
216 for(int k = 0; k < description_elements.getLength(); k++) {
217 Element description_element = (Element) description_elements.item(k);
218 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT);
219 for(int l = 0; l < metadata_elements.getLength(); l++) {
220 Element metadata_element = (Element) metadata_elements.item(l);
221 String raw_element = metadata_element.getAttribute(NAME_ATTRIBUTE);
222 //String language = metadata_element.getAttribute("language");
223 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
224 String raw_value = metadata_element.getAttribute(HVALUE_ATTRIBUTE);
225 if(raw_value == null || raw_value.length() == 0) {
226 raw_value = MSMUtils.getValue(metadata_element);
227 }
228 // Using the element string and value, retrieve a matching Metadata object from the cache
229 Metadata metadata = null;
230 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
231 if(GDMManager.metadata_cache.contains(raw_element, raw_value)) {
232 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
233 metadata = (Metadata) GDMManager.metadata_cache.get(raw_element, raw_value);
234 }
235 else {
236 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
237 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
238 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
239 metadata = new Metadata(element, value);
240 GDMManager.metadata_cache.put(raw_element, raw_value, metadata);
241 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
242 value = null;
243 element = null;
244 }
245 // We determine whether this metadata is file or folder level
246 if(filename != null) {
247 ///ystem.err.println("Filename = " + filename);
248 ///ystem.err.println("filename_text = " + filename_text);
249 // If can only be file level if there is no folder path details in filename and if the filename matched the filename text node (it may have matched .* instead)!
250 if(filename.indexOf(File.separator) == -1 && filename.equals(filename_text)) {
251 metadata.setFileLevel(true);
252 ///ystem.err.println("File level!!!");
253 }
254 else {
255 metadata.setFileLevel(false);
256 ///ystem.err.println("Inherited!!!");
257 }
258 }
259 else {
260 ///ystem.err.println("Filename is null therefore this is file level metadata.");
261 metadata.setFileLevel(true);
262 }
263 metadata.setFile(file);
264
265 // If mode is overwrite, then remove any previous values for this metadata element.
266 if(mode.equals("accumulate")) {
267 metadata.setAccumulate(true);
268 }
269 else {
270 metadata.setAccumulate(false);
271 ///ystem.err.println("Metadata overwrites: " + metadata);
272 for(int m = metadatum.size() - 1; m >= 0; m--) {
273 Metadata old_metadata = (Metadata) metadatum.get(m);
274 if(old_metadata.getElement().equals(metadata.getElement())) {
275 metadatum.remove(m);
276 ///ystem.err.println("Removing overridden metadata: " + old_metadata);
277 }
278 old_metadata = null;
279 }
280 }
281 mode = null;
282
283 // Add the completed metadata and clean up
284 ///ystem.err.println("Adding metadata: " + metadata);
285 metadatum.add(metadata);
286
287 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
288 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
289 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
290 ///ystem.err.println("Removing " + metadata + " from " + file);
291 description_element.removeChild(metadata_element);
292 // Remove the description element if empty.
293 if(!description_element.hasChildNodes()) {
294 fileset_element.removeChild(description_element);
295 }
296 }
297 else {
298 String current_value = metadata.getAbsoluteValue();
299 if(!raw_value.equals(current_value)) {
300 // Remove old text
301 while(metadata_element.hasChildNodes()) {
302 metadata_element.removeChild(metadata_element.getFirstChild());
303 }
304 // Add new.
305 metadata_element.appendChild(base_document.createTextNode(current_value));
306 }
307 }
308
309 metadata = null;
310 raw_value = null;
311 raw_element = null;
312 metadata_element = null;
313 }
314 metadata_elements = null;
315 description_element = null;
316 }
317 description_elements = null;
318 }
319 filename_text = null;
320 filename_element = null;
321 }
322 // If the file set no longer has any description entries, remove it entirely
323 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
324 if(description_elements.getLength() == 0) {
325 directorymetadata_element.removeChild(fileset_element);
326 }
327 description_elements = null;
328 filename_elements = null;
329 fileset_element = null;
330 }
331 fileset_elements = null;
332 directorymetadata_element = null;
333 }
334 catch (Exception error) {
335 Gatherer.self.printStackTrace(error);
336 }
337 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
338 return metadatum;
339 }
340
341 /** Determine if this document has been saved recently, and thus xml file version is up to date. */
342 public boolean isUpToDate() {
343 return false;
344 }
345
346 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
347 public boolean isValid() {
348 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
349 String doctype_name = base_document.getDoctype().getName();
350 String root_name = base_document.getDocumentElement().getTagName();
351 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
352 }
353 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
354 public void removeMetadata(String filename, Metadata metadata) {
355 try {
356 boolean found = false;
357 boolean first_metadata_element_found = true;
358 boolean make_next_metadata_element_overwrite = false;
359 // Retrieve the document element.
360 Element directorymetadata_element = base_document.getDocumentElement();
361 // Iterate through the filesets looking for the directory level one.
362 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
363 for(int i = 0; i < fileset_elements.getLength(); i++) {
364 Element fileset_element = (Element) fileset_elements.item(i);
365 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
366 for(int j = 0; j < filename_elements.getLength(); j++) {
367 Element filename_element = (Element) filename_elements.item(j);
368 String filename_text = MSMUtils.getValue(filename_element);
369 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
370 // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove.
371 NodeList description_elements = fileset_element.getElementsByTagName("Description");
372 for(int k = 0; k < description_elements.getLength(); k++) {
373 Element description_element = (Element) description_elements.item(k);
374 NodeList metadata_elements = description_element.getElementsByTagName("Metadata");
375 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
376 Element metadata_element = (Element) metadata_elements.item(l);
377 String element = metadata_element.getAttribute("name");
378 String value = MSMUtils.getValue(metadata_element);
379 // See if this is the metadata we wish to remove
380 if(element.equals(metadata.getElement().getName())) {
381 if(value.equals(metadata.getAbsoluteValue())) {
382 // Remove it
383 System.err.println("Remove " + element + "-" + value);
384 description_element.removeChild(metadata_element);
385 found = true;
386 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
387 if(first_metadata_element_found && !metadata.accumulates()) {
388 System.err.println("First of this element found!");
389 make_next_metadata_element_overwrite = true;
390 }
391 }
392 // If this was the first metadata we've found with the element of the one to be removed set first found to false.
393 else if(first_metadata_element_found) {
394 System.err.println("Found a matching element: " + element + "=" + value);
395 first_metadata_element_found = false;
396 }
397 // Otherwise we should make this metadata overwrite as requested.
398 else if(make_next_metadata_element_overwrite) {
399 System.err.println("Changing to overwrite: " + element + "=" + value);
400 metadata_element.setAttribute(MODE_ATTRIBUTE, "");
401 }
402 }
403 value = null;
404 element = null;
405 metadata_element = null;
406 }
407 metadata_elements = null;
408 description_element = null;
409 }
410 description_elements = null;
411 }
412 filename_text = null;
413 filename_element = null;
414 }
415 filename_elements = null;
416 fileset_element = null;
417 }
418 fileset_elements = null;
419 directorymetadata_element = null;
420 up_to_date = false;
421 }
422 catch (Exception error) {
423 Gatherer.printStackTrace(error);
424 }
425 }
426
427 /** Change the up to date flag. */
428 public void setUpToDate(boolean up_to_date) {
429 this.up_to_date = up_to_date;
430 }
431
432 /** Decode a string that was previously made Perl safe. */
433 private String decode(String safe) {
434 return safe.replaceAll("\\\\.",".");
435 }
436
437 /** Encodes unsafe filename characters (such as the . before the file extension) into Perl safe ones. */
438 private String encode(String dangerous) {
439 return dangerous.replaceAll("\\.", "\\\\.");
440 }
441}
Note: See TracBrowser for help on using the repository browser.