source: trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFile.java@ 7116

Last change on this file since 7116 was 6889, checked in by mdewsnip, 20 years ago

Fixed some bugs concerning file/folder level metadata.

  • Property svn:keywords set to Author Date Id Revision
File size: 30.0 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29import java.io.*;
30import java.util.*;
31import org.greenstone.gatherer.Gatherer;
32import org.greenstone.gatherer.msm.MetadataXMLFileManager;
33import org.greenstone.gatherer.msm.MSMUtils;
34import org.greenstone.gatherer.util.Codec;
35import org.greenstone.gatherer.util.HashMap3D;
36import org.greenstone.gatherer.util.StaticStrings;
37import org.greenstone.gatherer.util.Utility;
38import org.greenstone.gatherer.valuetree.GValueModel;
39import org.greenstone.gatherer.valuetree.GValueNode;
40import org.w3c.dom.*;
41
42/** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
43 * @author John Thompson, Greenstone Digital Library, University of Waikato
44 * @version 2.3b
45 */
46public class MetadataXMLFile {
47 /** Record if the document this object is based on is up to date. */
48 private boolean up_to_date = true;
49 /** The document this class sources its data from. */
50 private Document base_document;
51 static final private String ACCUMULATE = "accumulate";
52 /** The pattern to match when searching for directory level assignments. */
53 static final private String DIRECTORY_FILENAME = ".*";
54 static final private String DESCRIPTION_ELEMENT = "Description";
55 static final private String FILENAME_ELEMENT = "FileName";
56 static final private String FILESET_ELEMENT = "FileSet";
57 static final private String HVALUE_ATTRIBUTE = "hvalue";
58 static final private String MODE_ATTRIBUTE = "mode";
59 static final private String OVERWRITE = "overwrite";
60 static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
61
62 /** Constructor which creates a brand new metadata.xml document. */
63 public MetadataXMLFile() {
64 // Create new document. We do this by loading a copy of the template. */
65 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
66 }
67
68 /** Constructor which parses an existing metadata.xml document. */
69 public MetadataXMLFile(File file) {
70 try {
71 this.base_document = Utility.parse(file.getAbsolutePath(), false);
72 }
73 catch (Exception error) {
74 // Poorly formed, or completely invalid metadata.xml file!
75 }
76 }
77
78 /** Constructor which wraps around an existing metadata.xml document. */
79 public MetadataXMLFile(Document base_document) {
80 this.base_document = base_document;
81 }
82
83 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */
84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
85 {
86 // System.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
87 try {
88 // Retrieve the document element
89 Element gdm_element = base_document.getDocumentElement();
90
91 // Find the fileset element for the filename
92 Element fileset_element = findFileSetElementForFile(gdm_element, filename);
93
94 // If there is no existing fileset, then create one
95 if (fileset_element == null) {
96 // System.err.println("Creating a new fileset.");
97 fileset_element = base_document.createElement(FILESET_ELEMENT);
98 Element filename_element = base_document.createElement(FILENAME_ELEMENT);
99 Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
100 fileset_element.appendChild(filename_element);
101 fileset_element.appendChild(description_element);
102 Text filename_text = null;
103 // If the filename is null then we add a directory metadata set as gdm_element's first child
104 if (filename == null) {
105 filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
106 if (gdm_element.hasChildNodes()) {
107 gdm_element.insertBefore(fileset_element, gdm_element.getFirstChild());
108 }
109 else {
110 gdm_element.appendChild(fileset_element);
111 }
112 }
113 // Otherwise we append the new fileset to gdm_element's children
114 else {
115 // System.err.println("New fileset for " + filename);
116 filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
117 // System.err.println("After transform: " + filename);
118 filename_text = base_document.createTextNode(filename);
119 gdm_element.appendChild(fileset_element);
120 }
121 filename_element.appendChild(filename_text);
122 filename_text = null;
123 description_element = null;
124 filename_element = null;
125 }
126 // Now, finally, we can add the metadata.
127 Element metadata_element = null;
128 String name = metadata.getElement().getName();
129 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
130 if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) {
131 metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]);
132 name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1);
133 }
134 else {
135 metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]);
136 }
137 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
138
139 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
140 boolean will_accumulate = false;
141 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
142 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) {
143 Element sibling_description_element = (Element) sibling_description_elements.item(k);
144 // We have to do this for each type of metadata
145 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
146 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
147 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) {
148 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l);
149 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice.
150 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
151 // Check the values and return if they are the same.
152 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) {
153 return;
154 }
155 will_accumulate = true;
156 }
157 sibling_metadata_element = null;
158 }
159 sibling_metadata_elements = null;
160 }
161 sibling_description_element = null;
162 }
163 sibling_description_elements = null;
164 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
165 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
166 }
167 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
168 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
169 String node_value = null;
170 if(model != null && model.isHierarchy()) {
171 //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
172 node_value = metadata.getValueNode().getFullPath(false);
173 }
174 else {
175 node_value = metadata.getAbsoluteValue();
176 }
177 ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'");
178 metadata_element.appendChild(base_document.createTextNode(node_value));
179 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
180 NodeList description_elements = fileset_element.getElementsByTagName("Description");
181 Element description_element = (Element) description_elements.item(0);
182 description_element.appendChild(metadata_element);
183 description_element = null;
184 metadata_element = null;
185 fileset_element = null;
186 gdm_element = null;
187 up_to_date = false;
188 }
189 catch (Exception error) {
190 Gatherer.printStackTrace(error);
191 }
192 }
193
194 public int countMetadata() {
195 int count = 0;
196 try {
197 // Retrieve the document element.
198 Element directorymetadata_element = base_document.getDocumentElement();
199 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
200 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
201 for(int i = 0; i < fileset_elements.getLength(); i++) {
202 Element fileset_element = (Element) fileset_elements.item(i);
203 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
204 for(int k = 0; k < description_elements.getLength(); k++) {
205 Element description_element = (Element) description_elements.item(k);
206 // We have to do this for each type of metadata
207 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
208 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
209 count = count + metadata_elements.getLength();
210 metadata_elements = null;
211 }
212 description_element = null;
213 }
214 description_elements = null;
215 fileset_element = null;
216 }
217 fileset_elements = null;
218 directorymetadata_element = null;
219 }
220 catch (Exception error) {
221 Gatherer.printStackTrace(error);
222 }
223 return count;
224 }
225
226
227 private Element findFileSetElementForFile(Element gdm_element, String filename)
228 {
229 // Iterate through the filesets looking for one that matches the given filename
230 NodeList fileset_elements = gdm_element.getElementsByTagName(FILESET_ELEMENT);
231 for (int i = 0; i < fileset_elements.getLength(); i++) {
232 Element fileset_element = (Element) fileset_elements.item(i);
233
234 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
235 for (int j = 0; j < filename_elements.getLength(); j++) {
236 Element filename_element = (Element) filename_elements.item(j);
237 String filename_pattern = MSMUtils.getValue(filename_element);
238
239 // System.err.println("Checking " + filename + " against " + filename_pattern);
240
241 // Is this a file match?
242 if (filename != null && filename.matches(filename_pattern) &&
243 !filename_pattern.equals(DIRECTORY_FILENAME)) {
244 // System.err.println("Existing file fileset!");
245 return fileset_element;
246 }
247 // Is this a folder match?
248 else if (filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
249 // System.err.println("Existing folder fileset!");
250 return fileset_element;
251 }
252 }
253 }
254
255 // No match found
256 return null;
257 }
258
259
260 /** Retrieve the document this class is wrapping. */
261 public Document getDocument() {
262 return base_document;
263 }
264
265 /** Get all of the metadata, including directory level, associated with this file. */
266 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
267 return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false);
268 }
269 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */
270 // !! Michael has written a much nicer version of this function !!
271 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) {
272 Gatherer.println("Get metadata for " + filename);
273 Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level + ", purge = " + purge);
274
275 // Determine the file's path relative to the location of the metadata.xml file
276 String file_relative_path = ((filename != null) ? filename : "");
277 if (file_relative_path.endsWith(File.separator)) {
278 file_relative_path = file_relative_path.substring(0, file_relative_path.length() - File.separator.length());
279 }
280
281 ArrayList metadatum = null;
282 ArrayList queued_for_removal = new ArrayList();
283 if(metadatum_so_far == null) {
284 metadatum = new ArrayList();
285 }
286 else {
287 metadatum = metadatum_so_far;
288 }
289 try {
290 // Retrieve the document element.
291 Element directorymetadata_element = base_document.getDocumentElement();
292 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
293 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
294 for(int i = 0; i < fileset_elements.getLength(); i++) {
295 Element fileset_element = (Element) fileset_elements.item(i);
296 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
297 for(int j = 0; j < filename_elements.getLength(); j++) {
298 Element filename_element = (Element) filename_elements.item(j);
299 String filename_text = MSMUtils.getValue(filename_element);
300 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME)) || purge) {
301 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite).
302 // Normal metadata
303 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
304 for(int k = 0; k < description_elements.getLength(); k++) {
305 Element description_element = (Element) description_elements.item(k);
306 // We have to do this for each type of metadata
307 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
308 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
309 for(int l = 0; l < metadata_elements.getLength(); l++) {
310 Element metadata_element = (Element) metadata_elements.item(l);
311 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
312 //String language = metadata_element.getAttribute("language");
313 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
314 String raw_value = MSMUtils.getValue(metadata_element);
315 //
316 //raw_value = Codec.transform(raw_value, Codec.DOM_TO_);
317 ///ystem.err.println("Retrieved raw value: " + raw_value);
318 // ***** LEGACY SUPPORT *****
319 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
320 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
321 ///ystem.err.println("Blarg");
322 Gatherer.println("Detected Legacy Path: " + raw_value);
323 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
324 Gatherer.println("Updated Path To: " + raw_value);
325 MSMUtils.setValue(metadata_element, raw_value);
326 }
327 // **************************
328 // Using the element string and value, retrieve a matching Metadata object from the cache
329 Metadata metadata = null;
330 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
331 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
332 if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value) && !purge) {
333 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
334 metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
335 }
336 else {
337 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
338 if (element != null) {
339 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
340 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
341 metadata = new Metadata(element, value);
342 if(!purge) {
343 MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
344 }
345 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
346 value = null;
347 element = null;
348 }
349 }
350
351 // Determine whether this metadata is file or folder level
352 if (metadata != null) {
353 // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
354 // Direct match to regular expression
355 if (file_relative_path.matches(filename_text)) {
356 boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
357 metadata.setFile(file);
358 metadata.setFileLevel(!is_folder_level);
359 }
360 // Indirect match to regular expression (always folder level)
361 else if (file_relative_path.startsWith(filename_text + File.separator)) {
362 metadata.setFile(new File(file, filename_text));
363 metadata.setFileLevel(false);
364 }
365
366 // If mode is overwrite, then remove any previous values for this metadata element.
367 if(mode.equals("accumulate")) {
368 metadata.setAccumulate(true);
369 }
370 else {
371 metadata.setAccumulate(false);
372 ///ystem.err.println("Metadata overwrites: " + metadata);
373 for(int m = metadatum.size() - 1; m >= 0; m--) {
374 Metadata old_metadata = (Metadata) metadatum.get(m);
375 if(old_metadata.getElement().equals(metadata.getElement())) {
376 metadatum.remove(m);
377 ///ystem.err.println("Removing overridden metadata: " + old_metadata);
378 }
379 old_metadata = null;
380 }
381 }
382 mode = null;
383 // Add the completed metadata and clean up
384 ///ystem.err.println("Adding metadata: " + metadata);
385 metadatum.add(metadata);
386 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
387 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
388 ///atherer.println("Have we been asked to remove the metadata: " + metadata);
389 ///atherer.println("Given:");
390 ///atherer.println("\tremove = " + remove);
391 ///atherer.println("\tfilename = " + filename);
392 ///atherer.println("\tfilename_text = " + filename_text + "?");
393 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
394 ///atherer.println("Yes! Queuing for Removal.");
395 queued_for_removal.add(metadata_element);
396 }
397 else {
398 ///atherer.println("No. Updating.");
399 String current_value = metadata.getValueNode().getFullPath(false);
400 ///ystem.err.println("Checking the current mdv path: " + current_value);
401 ///ystem.err.println("Against whats in the metadata file: " + raw_value);
402 if(!raw_value.equals(current_value)) {
403 // Remove old text
404 while(metadata_element.hasChildNodes()) {
405 metadata_element.removeChild(metadata_element.getFirstChild());
406 }
407 // Add new.
408 metadata_element.appendChild(base_document.createTextNode(current_value));
409 }
410 }
411 }
412 metadata = null;
413 raw_value = null;
414 raw_element = null;
415 metadata_element = null;
416 }
417 metadata_elements = null;
418 }
419
420 // Now we remove any elements that have been queued for deletion
421 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
422 Element metadata_element = (Element) queued_for_removal.get(a);
423 description_element.removeChild(metadata_element);
424 up_to_date = false;
425 }
426 queued_for_removal.clear();
427 queued_for_removal = null;
428
429 // If the description_element no longer has any children remove it
430 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
431 NodeList extracted_elements = description_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
432 if(metadata_elements.getLength() == 0 && extracted_elements.getLength() == 0) {
433 fileset_element.removeChild(description_element);
434 up_to_date = false;
435 }
436 description_element = null;
437 }
438 description_elements = null;
439 }
440 filename_text = null;
441 filename_element = null;
442 }
443 // If the file set no longer has any description entries, remove it entirely
444 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
445 if(description_elements.getLength() == 0) {
446 directorymetadata_element.removeChild(fileset_element);
447 up_to_date = false;
448 }
449 description_elements = null;
450 filename_elements = null;
451 fileset_element = null;
452 }
453 fileset_elements = null;
454 directorymetadata_element = null;
455 }
456 catch (Exception error) {
457 Gatherer.self.printStackTrace(error);
458 }
459 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
460 return metadatum;
461 }
462
463 /** Determine if this document has been saved recently, and thus xml file version is up to date. */
464 public boolean isUpToDate() {
465 return false;
466 }
467
468 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
469 public boolean isValid() {
470 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
471 String doctype_name = base_document.getDoctype().getName();
472 String root_name = base_document.getDocumentElement().getTagName();
473 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
474 }
475
476 /** Remove all of the extracted metadata (XMetadata) from this document. */
477 public void removeExtractedMetadata() {
478 try {
479 Element document_element = base_document.getDocumentElement();
480 NodeList extracted_metadata_elements = document_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
481 document_element = null;
482 for(int i = extracted_metadata_elements.getLength(); i != 0; i--) {
483 Element extracted_metadata_element = (Element) extracted_metadata_elements.item(i - 1);
484 String element_name = extracted_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
485 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(element_name);
486 if(element != null) {
487 element.dec();
488 }
489 element = null;
490 ///ystem.err.println("Removing extracted metadata: " + element_name + "=" + MSMUtils.getValue(extracted_metadata_element));
491 element_name = null;
492 Node parent_node = extracted_metadata_element.getParentNode();
493 parent_node.removeChild(extracted_metadata_element);
494 parent_node = null;
495 extracted_metadata_element = null;
496 }
497 extracted_metadata_elements = null;
498 up_to_date = false;
499 }
500 catch(Exception exception) {
501 Gatherer.println("Exception in MetadataXMLFile.removeExtractedMetadata() - unexpected");
502 Gatherer.printStackTrace(exception);
503 }
504 }
505
506 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
507 public void removeMetadata(String filename, Metadata metadata) {
508 Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename);
509 try {
510 boolean found = false;
511 boolean first_metadata_element_found = true;
512 boolean make_next_metadata_element_overwrite = false;
513 boolean remove_fileset = false;
514 // Retrieve the document element.
515 Element directorymetadata_element = base_document.getDocumentElement();
516 // Iterate through the filesets looking for the directory level one.
517 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
518 for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
519 Element fileset_element = (Element) fileset_elements.item(i);
520 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
521 for(int j = 0; !found && j < filename_elements.getLength(); j++) {
522 Element filename_element = (Element) filename_elements.item(j);
523 String filename_text = MSMUtils.getValue(filename_element);
524 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
525 // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove.
526 NodeList description_elements = fileset_element.getElementsByTagName("Description");
527 for(int k = 0; !found && k < description_elements.getLength(); k++) {
528 Element description_element = (Element) description_elements.item(k);
529 // We have to do this for each type of metadata
530 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
531 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
532 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
533 Element metadata_element = (Element) metadata_elements.item(l);
534 String element = metadata_element.getAttribute("name");
535 String value = MSMUtils.getValue(metadata_element);
536 // See if this is the metadata we wish to remove
537 if(element.equals(metadata.getElement().getName())) {
538 if(value.equals(metadata.getValueNode().getFullPath(false))) {
539 // Remove it
540 ///ystem.err.println("Remove " + element + "-" + value);
541 description_element.removeChild(metadata_element);
542 found = true;
543 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
544 if(first_metadata_element_found && !metadata.accumulates()) {
545 ///ystem.err.println("First of this element found!");
546 make_next_metadata_element_overwrite = true;
547 }
548 }
549 // If this was the first metadata we've found with the element of the one to be removed set first found to false.
550 else if(first_metadata_element_found) {
551 ///ystem.err.println("Found a matching element: " + element + "=" + value);
552 first_metadata_element_found = false;
553 }
554 // Otherwise we should make this metadata overwrite as requested.
555 else if(make_next_metadata_element_overwrite) {
556 ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
557 metadata_element.setAttribute(MODE_ATTRIBUTE, "");
558 }
559 }
560 value = null;
561 element = null;
562 metadata_element = null;
563 }
564 NodeList normal_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[0]);
565 NodeList extracted_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[1]);
566 // If we found it, removed it, and now the description tag has no children, mark the fileset for removal
567 if(normal_metadata_elements.getLength() == 0 && extracted_metadata_elements.getLength() == 0) {
568 remove_fileset = true;
569 }
570 extracted_metadata_elements = null;
571 normal_metadata_elements = null;
572 metadata_elements = null;
573 }
574 description_element = null;
575 }
576 description_elements = null;
577 }
578 filename_text = null;
579 filename_element = null;
580 }
581 filename_elements = null;
582 if(found && remove_fileset) {
583 directorymetadata_element.removeChild(fileset_element);
584 }
585 fileset_element = null;
586 }
587 fileset_elements = null;
588 directorymetadata_element = null;
589 up_to_date = false;
590 }
591 catch (Exception error) {
592 Gatherer.printStackTrace(error);
593 }
594 }
595
596 /** Change the up to date flag.
597 * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise
598 */
599 public void setUpToDate(boolean up_to_date) {
600 this.up_to_date = up_to_date;
601 }
602}
Note: See TracBrowser for help on using the repository browser.