source: trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFile.java@ 8236

Last change on this file since 8236 was 8236, checked in by mdewsnip, 20 years ago

Replaced all Gatherer.print* with DebugStream.print*.

  • Property svn:keywords set to Author Date Id Revision
File size: 30.3 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29import java.io.*;
30import java.util.*;
31import org.greenstone.gatherer.DebugStream;
32import org.greenstone.gatherer.Gatherer;
33import org.greenstone.gatherer.msm.MetadataXMLFileManager;
34import org.greenstone.gatherer.msm.MSMUtils;
35import org.greenstone.gatherer.util.Codec;
36import org.greenstone.gatherer.util.HashMap3D;
37import org.greenstone.gatherer.util.StaticStrings;
38import org.greenstone.gatherer.util.Utility;
39import org.greenstone.gatherer.valuetree.GValueModel;
40import org.greenstone.gatherer.valuetree.GValueNode;
41import org.w3c.dom.*;
42
43/** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
44 * @author John Thompson, Greenstone Digital Library, University of Waikato
45 * @version 2.3b
46 */
47public class MetadataXMLFile {
48 /** Record if the document this object is based on is up to date. */
49 private boolean up_to_date = true;
50 /** The document this class sources its data from. */
51 private Document base_document;
52 static final private String ACCUMULATE = "accumulate";
53 /** The pattern to match when searching for directory level assignments. */
54 static final private String DIRECTORY_FILENAME = ".*";
55 static final private String DESCRIPTION_ELEMENT = "Description";
56 static final public String FILENAME_ELEMENT = "FileName";
57 static final public String FILESET_ELEMENT = "FileSet";
58 static final private String HVALUE_ATTRIBUTE = "hvalue";
59 static final private String MODE_ATTRIBUTE = "mode";
60 static final private String OVERWRITE = "overwrite";
61 static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
62
63 /** Constructor which creates a brand new metadata.xml document. */
64 public MetadataXMLFile() {
65 // Create new document. We do this by loading a copy of the template. */
66 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
67 }
68
69 /** Constructor which parses an existing metadata.xml document. */
70 public MetadataXMLFile(File file) {
71 try {
72 this.base_document = Utility.parse(file.getAbsolutePath(), false);
73 }
74 catch (Exception error) {
75 // Poorly formed, or completely invalid metadata.xml file!
76 }
77 }
78
79 /** Constructor which wraps around an existing metadata.xml document. */
80 public MetadataXMLFile(Document base_document) {
81 this.base_document = base_document;
82 }
83
84 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */
85 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
86 {
87 ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
88 try {
89 // Retrieve the document element
90 Element gdm_element = base_document.getDocumentElement();
91
92 // Find the fileset element for the filename
93 Element fileset_element = findFileSetElementForFile(gdm_element, filename);
94
95 // If there is no existing fileset, then create one
96 if (fileset_element == null) {
97 fileset_element = base_document.createElement(FILESET_ELEMENT);
98 Element filename_element = base_document.createElement(FILENAME_ELEMENT);
99 Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
100 fileset_element.appendChild(filename_element);
101 fileset_element.appendChild(description_element);
102 Text filename_text = null;
103 // If the filename is null then we add a directory metadata set as gdm_element's first child
104 if (filename == null) {
105 filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
106 if (gdm_element.hasChildNodes()) {
107 gdm_element.insertBefore(fileset_element, gdm_element.getFirstChild());
108 }
109 else {
110 gdm_element.appendChild(fileset_element);
111 }
112 }
113 // Otherwise we append the new fileset to gdm_element's children
114 else {
115 ///ystem.err.println("New fileset for " + filename);
116 filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
117 ///ystem.err.println("After transform: " + filename);
118 filename_text = base_document.createTextNode(filename);
119 gdm_element.appendChild(fileset_element);
120 }
121 filename_element.appendChild(filename_text);
122 filename_text = null;
123 description_element = null;
124 filename_element = null;
125 }
126 // Now, finally, we can add the metadata.
127 Element metadata_element = null;
128 String name = metadata.getElement().getName();
129 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
130 String current_metadata_type = StaticStrings.METADATA_ELEMENT;
131 if(name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
132 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
133 name = name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
134 }
135
136 metadata_element = base_document.createElement(current_metadata_type);
137 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
138
139 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
140 // also we are checking for the existence of exactly the same metadata cos sometimes we can be asked to add the same metadata twice.
141 boolean will_accumulate = false;
142 // only look through Metadata or XMetadata depending on which type we are trying to add
143 NodeList sibling_metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
144 for (int i=0; i<sibling_metadata_elements.getLength(); i++) {
145 Element sib_meta = (Element) sibling_metadata_elements.item(i);
146 if(name.equals(sib_meta.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
147 // found one with the same name - the new metadata will accumulate
148 will_accumulate = true;
149 // check for the same value
150 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sib_meta))) {
151 return;
152 }
153
154 }
155 sib_meta = null;
156 }
157 sibling_metadata_elements = null;
158
159 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
160 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
161 }
162 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
163 // getAbsoluteValue now does return the full path
164 String node_value = metadata.getAbsoluteValue();
165 //GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
166 //String node_value = null;
167 //if(model != null && model.isHierarchy()) {
168 //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
169 //node_value = metadata.getValueNode().getFullPath(false);
170 //}
171 //else {
172 //node_value = metadata.getAbsoluteValue();
173 // }
174 ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'");
175 metadata_element.appendChild(base_document.createTextNode(node_value));
176 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
177 NodeList description_elements = fileset_element.getElementsByTagName("Description");
178 Element description_element = (Element) description_elements.item(0);
179 description_element.appendChild(metadata_element);
180 description_element = null;
181 metadata_element = null;
182 fileset_element = null;
183 gdm_element = null;
184 up_to_date = false;
185 }
186 catch (Exception error) {
187 DebugStream.printStackTrace(error);
188 }
189 }
190
191 /** this is used to 'purge' the metadata - I've taken the purge code out of getMetadata and put it in to here, cos its only called from one place and we dont want to retrieve the metadata, just update it */
192 public void cleanUpMetadataRefs() {
193 //DebugStream.println("clean up metadata refs!");
194
195 String file_relative_path = "";
196 try {
197 // Retrieve the document element.
198 Element directorymetadata_element = base_document.getDocumentElement();
199 // Iterate through all the metadata
200 // We have to do this for each type of metadata - do we need it for extracted metadata - will this only affect hierarchical metadata in which case only need to do normal metadata ?
201 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
202 NodeList metadata_elements = directorymetadata_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
203 for(int l = 0; l < metadata_elements.getLength(); l++) {
204 Element metadata_element = (Element) metadata_elements.item(l);
205 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
206 String raw_value = MSMUtils.getValue(metadata_element);
207 //
208 // ***** LEGACY SUPPORT *****
209 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
210 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
211 DebugStream.println("Detected Legacy Path: " + raw_value);
212 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
213 MSMUtils.setValue(metadata_element, raw_value);
214 }
215
216 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
217 if (element != null) {
218 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
219 String current_value = value.getFullPath(false);
220 if(!raw_value.equals(current_value)) {
221 // set the new value
222 MSMUtils.setValue(metadata_element, current_value);
223 }
224 value = null;
225 current_value = null;
226 }
227 element = null;
228 metadata_element = null;
229 raw_element = null;
230 raw_value = null;
231 } // for each metadata element
232 metadata_elements = null;
233 } // for each metadata type
234 } catch (Exception exception) {
235 DebugStream.printStackTrace(exception);
236 }
237 }
238
239
240 private Element findFileSetElementForFile(Element gdm_element, String filename)
241 {
242 // Iterate through the filesets looking for one that matches the given filename
243 NodeList fileset_elements = gdm_element.getElementsByTagName(FILESET_ELEMENT);
244 for (int i = 0; i < fileset_elements.getLength(); i++) {
245 Element fileset_element = (Element) fileset_elements.item(i);
246
247 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
248 for (int j = 0; j < filename_elements.getLength(); j++) {
249 Element filename_element = (Element) filename_elements.item(j);
250 String filename_pattern = MSMUtils.getValue(filename_element);
251
252 // System.err.println("Checking " + filename + " against " + filename_pattern);
253
254 // Is this a file match?
255 if (filename != null && filename.matches(filename_pattern) &&
256 !filename_pattern.equals(DIRECTORY_FILENAME)) {
257 // System.err.println("Existing file fileset!");
258 return fileset_element;
259 }
260 // Is this a folder match?
261 else if (filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
262 // System.err.println("Existing folder fileset!");
263 return fileset_element;
264 }
265 }
266 }
267
268 // No match found
269 return null;
270 }
271
272
273 /** Retrieve the document this class is wrapping. */
274 public Document getDocument() {
275 return base_document;
276 }
277
278 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. */
279 // !! Michael has written a much nicer version of this function !!
280 // Kath has cleaned up this version a bit
281 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
282 DebugStream.println("Get metadata for " + filename);
283 DebugStream.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level);
284
285 // Determine the file's path relative to the location of the metadata.xml file
286 String file_relative_path = ((filename != null) ? filename : "");
287 if (file_relative_path.endsWith(File.separator)) {
288 file_relative_path = file_relative_path.substring(0, file_relative_path.length() - File.separator.length());
289 }
290
291 ArrayList metadatum = null;
292 ArrayList queued_for_removal = new ArrayList();
293 if(metadatum_so_far == null) {
294 metadatum = new ArrayList();
295 }
296 else {
297 metadatum = metadatum_so_far;
298 }
299 try {
300 // Retrieve the document element.
301 Element directorymetadata_element = base_document.getDocumentElement();
302 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
303 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
304 for(int i = 0; i < fileset_elements.getLength(); i++) {
305 Element fileset_element = (Element) fileset_elements.item(i);
306 boolean fileset_matches = false;
307 // look through the filename elements of this and see if we have a match
308 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
309 String filename_text = "";
310 for(int j = 0; j < filename_elements.getLength(); j++) {
311 Element filename_element = (Element) filename_elements.item(j);
312 filename_text = MSMUtils.getValue(filename_element);
313 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME))) {
314 fileset_matches = true;
315 filename_element = null;
316 break;
317 }
318 filename_element = null;
319 }
320
321 if (!fileset_matches) {
322 // go to teh next fileset
323 fileset_element = null;
324 continue;
325 }
326 // If this fileset matches add all of the metadata found in the fileset, remembering to abide by desired mode (accumulate vs. overwrite).
327 // We have to do this for each type of metadata
328 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
329 NodeList metadata_elements = fileset_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
330 for(int l = 0; l < metadata_elements.getLength(); l++) {
331 Element metadata_element = (Element) metadata_elements.item(l);
332 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
333 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
334 String raw_value = MSMUtils.getValue(metadata_element);
335 // ***** LEGACY SUPPORT *****
336 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
337 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
338 DebugStream.println("Detected Legacy Path: " + raw_value);
339 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
340 DebugStream.println("Updated Path To: " + raw_value);
341 MSMUtils.setValue(metadata_element, raw_value);
342 }
343 // **************************
344 // Using the element string and value, retrieve a matching Metadata object from the cache
345 Metadata metadata = null;
346 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
347 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
348 if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value)) {
349 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
350 metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
351 }
352 else {
353 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
354 if (element != null) {
355 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
356 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
357 metadata = new Metadata(element, value);
358 MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
359
360 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
361 value = null;
362 element = null;
363 }
364 }
365
366 // Determine whether this metadata is file or folder level
367 if (metadata != null) {
368 // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
369 // Direct match to regular expression
370 if (file_relative_path.matches(filename_text)) {
371 boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
372 metadata.setFile(file);
373 metadata.setFileLevel(!is_folder_level);
374 }
375 // Indirect match to regular expression (always folder level)
376 else if (file_relative_path.startsWith(filename_text + File.separator)) {
377 metadata.setFile(new File(file, filename_text));
378 metadata.setFileLevel(false);
379 }
380
381 // If mode is overwrite, then remove any previous values for this metadata element.
382 if(mode.equals("accumulate")) {
383 metadata.setAccumulate(true);
384 }
385 else {
386 metadata.setAccumulate(false);
387 ///ystem.err.println("Metadata overwrites: " + metadata);
388 for(int m = metadatum.size() - 1; m >= 0; m--) {
389 Metadata old_metadata = (Metadata) metadatum.get(m);
390 if(old_metadata.getElement().equals(metadata.getElement())) {
391 metadatum.remove(m);
392 ///ystem.err.println("Removing overridden metadata: " + old_metadata);
393 }
394 old_metadata = null;
395 }
396 }
397 mode = null;
398 // Add the completed metadata and clean up
399 ///ystem.err.println("Adding metadata: " + metadata);
400 metadatum.add(metadata);
401 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
402 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
403 ///atherer.println("Have we been asked to remove the metadata: " + metadata);
404 ///atherer.println("Given:");
405 ///atherer.println("\tremove = " + remove);
406 ///atherer.println("\tfilename = " + filename);
407 ///atherer.println("\tfilename_text = " + filename_text + "?");
408 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
409 ///atherer.println("Yes! Queuing for Removal.");
410 queued_for_removal.add(metadata_element);
411 }
412 else {
413 ///atherer.println("No. Updating.");
414 String current_value = metadata.getValueNode().getFullPath(false);
415 ///ystem.err.println("Checking the current mdv path: " + current_value);
416 ///ystem.err.println("Against whats in the metadata file: " + raw_value);
417 if(!raw_value.equals(current_value)) {
418 MSMUtils.setValue(metadata_element, current_value);
419 }
420 }
421 }
422 metadata = null;
423 raw_value = null;
424 raw_element = null;
425 metadata_element = null;
426 } // for all metadata elements
427 metadata_elements = null;
428 } // for all metadata types
429
430 // Now we remove any elements that have been queued for deletion
431 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
432 Element metadata_element = (Element) queued_for_removal.get(a);
433 Element parent = (Element) metadata_element.getParentNode();
434 parent.removeChild(metadata_element);
435
436 up_to_date = false;
437 }
438 queued_for_removal.clear();
439
440 // If the fileset no longer has any metadata remove it
441 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
442 if (metadata_elements.getLength()==0) {
443 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
444 if (metadata_elements.getLength()==0) {
445 directorymetadata_element.removeChild(fileset_element);
446 up_to_date = false;
447 }
448 }
449 metadata_elements = null;
450 fileset_element = null;
451 filename_text = null;
452 } // for each fileset element
453
454 fileset_elements = null;
455 directorymetadata_element = null;
456 }
457 catch (Exception exception) {
458 DebugStream.printStackTrace(exception);
459 }
460 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
461 queued_for_removal = null;
462 return metadatum;
463 }
464
465 /** returns true if the document has at least one Metadata or XMetadata element */
466 public boolean hasMetadata() {
467 boolean has_meta = true;
468 try {
469 // Retrieve the document element.
470 Element directory_metadata_element = base_document.getDocumentElement();
471 NodeList metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
472 if (metadata_nodes.getLength()==0) {
473 // try extracted metadata
474 metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
475 if (metadata_nodes.getLength()==0) {
476 has_meta = false;
477 }
478 }
479 directory_metadata_element=null;
480 metadata_nodes=null;
481 }
482 catch (Exception error) {
483 DebugStream.printStackTrace(error);
484 }
485 return has_meta;
486 }
487
488 /** Determine if this document has been saved recently, and thus xml file version is up to date. */
489 public boolean isUpToDate() {
490 return false;
491 }
492
493 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
494 public boolean isValid() {
495 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
496 String doctype_name = base_document.getDoctype().getName();
497 String root_name = base_document.getDocumentElement().getTagName();
498 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
499 }
500
501 /** Remove all of the extracted metadata (XMetadata) from this document. */
502 public void removeExtractedMetadata() {
503 try {
504 Element document_element = base_document.getDocumentElement();
505 NodeList extracted_metadata_elements = document_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
506 document_element = null;
507 for(int i = extracted_metadata_elements.getLength(); i != 0; i--) {
508 Element extracted_metadata_element = (Element) extracted_metadata_elements.item(i - 1);
509 String element_name = extracted_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
510 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(element_name);
511 if(element != null) {
512 element.dec();
513 }
514 element = null;
515 ///ystem.err.println("Removing extracted metadata: " + element_name + "=" + MSMUtils.getValue(extracted_metadata_element));
516 element_name = null;
517 Node parent_node = extracted_metadata_element.getParentNode();
518 parent_node.removeChild(extracted_metadata_element);
519 parent_node = null;
520 extracted_metadata_element = null;
521 }
522 extracted_metadata_elements = null;
523 up_to_date = false;
524 }
525 catch(Exception exception) {
526 DebugStream.println("Exception in MetadataXMLFile.removeExtractedMetadata() - unexpected");
527 DebugStream.printStackTrace(exception);
528 }
529 }
530
531 /** Remove the given metadata from this document.If filename is null, then removes directory level metadata, otherwise just removes it from the specified file. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
532 public void removeMetadata(String filename, Metadata metadata) {
533 DebugStream.println("Remove metadata: " + metadata + "\nFrom filename: " + filename);
534 try {
535 boolean found = false;
536 boolean first_metadata_element_found = true;
537 boolean make_next_metadata_element_overwrite = false;
538 boolean remove_fileset = false;
539 // is this extracted or normal metadata?
540 String removing_metadata_name = metadata.getElement().getName();
541 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
542 String current_metadata_type = StaticStrings.METADATA_ELEMENT;
543 if(removing_metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
544 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
545 removing_metadata_name = removing_metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
546 }
547
548 // Retrieve the document element.
549 Element directorymetadata_element = base_document.getDocumentElement();
550 // Iterate through the filesets looking for the appropriate one.
551 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
552 for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
553 Element fileset_element = (Element) fileset_elements.item(i);
554 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
555 for(int j = 0; !found && j < filename_elements.getLength(); j++) {
556 Element filename_element = (Element) filename_elements.item(j);
557 String filename_text = MSMUtils.getValue(filename_element);
558 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
559 // Retrieve the Metadata Elements for this fileset, and iterate through them looking for the one which we are to remove.
560 NodeList metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
561 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
562 Element metadata_element = (Element) metadata_elements.item(l);
563 String element = metadata_element.getAttribute("name");
564 String value = MSMUtils.getValue(metadata_element);
565 // See if this is the metadata we wish to remove
566 if(element.equals(removing_metadata_name)) {
567 if(value.equals(metadata.getValueNode().getFullPath(false))) {
568 // Remove it
569 ///ystem.err.println("Remove " + element + "-" + value);
570 Element parent_elem = (Element)metadata_element.getParentNode();
571 parent_elem.removeChild(metadata_element);
572
573 //description_element.removeChild(metadata_element);
574 found = true;
575 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
576 if(first_metadata_element_found && !metadata.accumulates()) {
577 ///ystem.err.println("First of this element found!");
578 make_next_metadata_element_overwrite = true;
579 }
580 }
581 // If this was the first metadata we've found with the element of the one to be removed set first found to false.
582 else if(first_metadata_element_found) {
583 ///ystem.err.println("Found a matching element: " + element + "=" + value);
584 first_metadata_element_found = false;
585 }
586 // Otherwise we should make this metadata overwrite as requested.
587 else if(make_next_metadata_element_overwrite) {
588 ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
589 metadata_element.setAttribute(MODE_ATTRIBUTE, "");
590 }
591 }
592 value = null;
593 element = null;
594 metadata_element = null;
595 } // for each metadata
596 metadata_elements = null;
597 } // if the filename matches
598
599 if (found) {
600 // if we found an element and removed it, we now want to check whether the fileset is empty or not
601 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
602 if (metadata_elements.getLength() ==0) {
603 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
604 if (metadata_elements.getLength() ==0) {
605 // remove the fileset
606 directorymetadata_element.removeChild(fileset_element);
607 }
608 }
609 metadata_elements = null;
610 }
611 filename_text = null;
612 filename_element = null;
613 } // for each filename element
614 filename_elements = null;
615 fileset_element = null;
616 } // for each fileset element
617 fileset_elements = null;
618 directorymetadata_element = null;
619 up_to_date = false;
620 }
621 catch (Exception error) {
622 DebugStream.printStackTrace(error);
623 }
624 }
625
626 /** Change the up to date flag.
627 * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise
628 */
629 public void setUpToDate(boolean up_to_date) {
630 this.up_to_date = up_to_date;
631 }
632}
Note: See TracBrowser for help on using the repository browser.