source: trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFile.java@ 7624

Last change on this file since 7624 was 7624, checked in by mdewsnip, 20 years ago

Fixed quite a few places where the GLI was looking for "ex" instead of "ex." to determine if a piece of metadata was extracted.

  • Property svn:keywords set to Author Date Id Revision
File size: 30.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29import java.io.*;
30import java.util.*;
31import org.greenstone.gatherer.Gatherer;
32import org.greenstone.gatherer.msm.MetadataXMLFileManager;
33import org.greenstone.gatherer.msm.MSMUtils;
34import org.greenstone.gatherer.util.Codec;
35import org.greenstone.gatherer.util.HashMap3D;
36import org.greenstone.gatherer.util.StaticStrings;
37import org.greenstone.gatherer.util.Utility;
38import org.greenstone.gatherer.valuetree.GValueModel;
39import org.greenstone.gatherer.valuetree.GValueNode;
40import org.w3c.dom.*;
41
42/** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
43 * @author John Thompson, Greenstone Digital Library, University of Waikato
44 * @version 2.3b
45 */
46public class MetadataXMLFile {
47 /** Record if the document this object is based on is up to date. */
48 private boolean up_to_date = true;
49 /** The document this class sources its data from. */
50 private Document base_document;
51 static final private String ACCUMULATE = "accumulate";
52 /** The pattern to match when searching for directory level assignments. */
53 static final private String DIRECTORY_FILENAME = ".*";
54 static final private String DESCRIPTION_ELEMENT = "Description";
55 static final public String FILENAME_ELEMENT = "FileName";
56 static final public String FILESET_ELEMENT = "FileSet";
57 static final private String HVALUE_ATTRIBUTE = "hvalue";
58 static final private String MODE_ATTRIBUTE = "mode";
59 static final private String OVERWRITE = "overwrite";
60 static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
61
62 /** Constructor which creates a brand new metadata.xml document. */
63 public MetadataXMLFile() {
64 // Create new document. We do this by loading a copy of the template. */
65 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
66 }
67
68 /** Constructor which parses an existing metadata.xml document. */
69 public MetadataXMLFile(File file) {
70 try {
71 this.base_document = Utility.parse(file.getAbsolutePath(), false);
72 }
73 catch (Exception error) {
74 // Poorly formed, or completely invalid metadata.xml file!
75 }
76 }
77
78 /** Constructor which wraps around an existing metadata.xml document. */
79 public MetadataXMLFile(Document base_document) {
80 this.base_document = base_document;
81 }
82
83 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */
84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
85 {
86 ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
87 try {
88 // Retrieve the document element
89 Element gdm_element = base_document.getDocumentElement();
90
91 // Find the fileset element for the filename
92 Element fileset_element = findFileSetElementForFile(gdm_element, filename);
93
94 // If there is no existing fileset, then create one
95 if (fileset_element == null) {
96 fileset_element = base_document.createElement(FILESET_ELEMENT);
97 Element filename_element = base_document.createElement(FILENAME_ELEMENT);
98 Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
99 fileset_element.appendChild(filename_element);
100 fileset_element.appendChild(description_element);
101 Text filename_text = null;
102 // If the filename is null then we add a directory metadata set as gdm_element's first child
103 if (filename == null) {
104 filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
105 if (gdm_element.hasChildNodes()) {
106 gdm_element.insertBefore(fileset_element, gdm_element.getFirstChild());
107 }
108 else {
109 gdm_element.appendChild(fileset_element);
110 }
111 }
112 // Otherwise we append the new fileset to gdm_element's children
113 else {
114 ///ystem.err.println("New fileset for " + filename);
115 filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
116 ///ystem.err.println("After transform: " + filename);
117 filename_text = base_document.createTextNode(filename);
118 gdm_element.appendChild(fileset_element);
119 }
120 filename_element.appendChild(filename_text);
121 filename_text = null;
122 description_element = null;
123 filename_element = null;
124 }
125 // Now, finally, we can add the metadata.
126 Element metadata_element = null;
127 String name = metadata.getElement().getName();
128 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
129 String current_metadata_type = StaticStrings.METADATA_ELEMENT;
130 if(name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
131 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
132 name = name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
133 }
134
135 metadata_element = base_document.createElement(current_metadata_type);
136 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
137
138 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
139 // also we are checking for the existence of exactly the same metadata cos sometimes we can be asked to add the same metadata twice.
140 boolean will_accumulate = false;
141 // only look through Metadata or XMetadata depending on which type we are trying to add
142 NodeList sibling_metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
143 for (int i=0; i<sibling_metadata_elements.getLength(); i++) {
144 Element sib_meta = (Element) sibling_metadata_elements.item(i);
145 if(name.equals(sib_meta.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
146 // found one with the same name - the new metadata will accumulate
147 will_accumulate = true;
148 // check for the same value
149 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sib_meta))) {
150 return;
151 }
152
153 }
154 sib_meta = null;
155 }
156 sibling_metadata_elements = null;
157
158 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
159 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
160 }
161 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
162 // getAbsoluteValue now does return the full path
163 String node_value = metadata.getAbsoluteValue();
164 //GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
165 //String node_value = null;
166 //if(model != null && model.isHierarchy()) {
167 //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
168 //node_value = metadata.getValueNode().getFullPath(false);
169 //}
170 //else {
171 //node_value = metadata.getAbsoluteValue();
172 // }
173 ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'");
174 metadata_element.appendChild(base_document.createTextNode(node_value));
175 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
176 NodeList description_elements = fileset_element.getElementsByTagName("Description");
177 Element description_element = (Element) description_elements.item(0);
178 description_element.appendChild(metadata_element);
179 description_element = null;
180 metadata_element = null;
181 fileset_element = null;
182 gdm_element = null;
183 up_to_date = false;
184 }
185 catch (Exception error) {
186 Gatherer.printStackTrace(error);
187 }
188 }
189
190 /** this is used to 'purge' the metadata - I've taken the purge code out of getMetadata and put it in to here, cos its only called from one place and we dont want to retrieve the metadata, just update it */
191 public void cleanUpMetadataRefs() {
192 //Gatherer.println("clean up metadata refs!");
193
194 String file_relative_path = "";
195 try {
196 // Retrieve the document element.
197 Element directorymetadata_element = base_document.getDocumentElement();
198 // Iterate through all the metadata
199 // We have to do this for each type of metadata - do we need it for extracted metadata - will this only affect hierarchical metadata in which case only need to do normal metadata ?
200 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
201 NodeList metadata_elements = directorymetadata_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
202 for(int l = 0; l < metadata_elements.getLength(); l++) {
203 Element metadata_element = (Element) metadata_elements.item(l);
204 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
205 String raw_value = MSMUtils.getValue(metadata_element);
206 //
207 // ***** LEGACY SUPPORT *****
208 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
209 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
210 Gatherer.println("Detected Legacy Path: " + raw_value);
211 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
212 MSMUtils.setValue(metadata_element, raw_value);
213 }
214
215 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
216 if (element != null) {
217 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
218 String current_value = value.getFullPath(false);
219 if(!raw_value.equals(current_value)) {
220 // set the new value
221 MSMUtils.setValue(metadata_element, current_value);
222 }
223 value = null;
224 current_value = null;
225 }
226 element = null;
227 metadata_element = null;
228 raw_element = null;
229 raw_value = null;
230 } // for each metadata element
231 metadata_elements = null;
232 } // for each metadata type
233 } catch (Exception error) {
234 Gatherer.self.printStackTrace(error);
235 }
236 }
237
238
239 private Element findFileSetElementForFile(Element gdm_element, String filename)
240 {
241 // Iterate through the filesets looking for one that matches the given filename
242 NodeList fileset_elements = gdm_element.getElementsByTagName(FILESET_ELEMENT);
243 for (int i = 0; i < fileset_elements.getLength(); i++) {
244 Element fileset_element = (Element) fileset_elements.item(i);
245
246 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
247 for (int j = 0; j < filename_elements.getLength(); j++) {
248 Element filename_element = (Element) filename_elements.item(j);
249 String filename_pattern = MSMUtils.getValue(filename_element);
250
251 // System.err.println("Checking " + filename + " against " + filename_pattern);
252
253 // Is this a file match?
254 if (filename != null && filename.matches(filename_pattern) &&
255 !filename_pattern.equals(DIRECTORY_FILENAME)) {
256 // System.err.println("Existing file fileset!");
257 return fileset_element;
258 }
259 // Is this a folder match?
260 else if (filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
261 // System.err.println("Existing folder fileset!");
262 return fileset_element;
263 }
264 }
265 }
266
267 // No match found
268 return null;
269 }
270
271
272 /** Retrieve the document this class is wrapping. */
273 public Document getDocument() {
274 return base_document;
275 }
276
277 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. */
278 // !! Michael has written a much nicer version of this function !!
279 // Kath has cleaned up this version a bit
280 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
281 Gatherer.println("Get metadata for " + filename);
282 Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level);
283
284 // Determine the file's path relative to the location of the metadata.xml file
285 String file_relative_path = ((filename != null) ? filename : "");
286 if (file_relative_path.endsWith(File.separator)) {
287 file_relative_path = file_relative_path.substring(0, file_relative_path.length() - File.separator.length());
288 }
289
290 ArrayList metadatum = null;
291 ArrayList queued_for_removal = new ArrayList();
292 if(metadatum_so_far == null) {
293 metadatum = new ArrayList();
294 }
295 else {
296 metadatum = metadatum_so_far;
297 }
298 try {
299 // Retrieve the document element.
300 Element directorymetadata_element = base_document.getDocumentElement();
301 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
302 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
303 for(int i = 0; i < fileset_elements.getLength(); i++) {
304 Element fileset_element = (Element) fileset_elements.item(i);
305 boolean fileset_matches = false;
306 // look through the filename elements of this and see if we have a match
307 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
308 String filename_text = "";
309 for(int j = 0; j < filename_elements.getLength(); j++) {
310 Element filename_element = (Element) filename_elements.item(j);
311 filename_text = MSMUtils.getValue(filename_element);
312 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME))) {
313 fileset_matches = true;
314 filename_element = null;
315 break;
316 }
317 filename_element = null;
318 }
319
320 if (!fileset_matches) {
321 // go to teh next fileset
322 fileset_element = null;
323 continue;
324 }
325 // If this fileset matches add all of the metadata found in the fileset, remembering to abide by desired mode (accumulate vs. overwrite).
326 // We have to do this for each type of metadata
327 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
328 NodeList metadata_elements = fileset_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
329 for(int l = 0; l < metadata_elements.getLength(); l++) {
330 Element metadata_element = (Element) metadata_elements.item(l);
331 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
332 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
333 String raw_value = MSMUtils.getValue(metadata_element);
334 // ***** LEGACY SUPPORT *****
335 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
336 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
337 Gatherer.println("Detected Legacy Path: " + raw_value);
338 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
339 Gatherer.println("Updated Path To: " + raw_value);
340 MSMUtils.setValue(metadata_element, raw_value);
341 }
342 // **************************
343 // Using the element string and value, retrieve a matching Metadata object from the cache
344 Metadata metadata = null;
345 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
346 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
347 if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value)) {
348 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
349 metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
350 }
351 else {
352 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
353 if (element != null) {
354 GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
355 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
356 metadata = new Metadata(element, value);
357 MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
358
359 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
360 value = null;
361 element = null;
362 }
363 }
364
365 // Determine whether this metadata is file or folder level
366 if (metadata != null) {
367 // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
368 // Direct match to regular expression
369 if (file_relative_path.matches(filename_text)) {
370 boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
371 metadata.setFile(file);
372 metadata.setFileLevel(!is_folder_level);
373 }
374 // Indirect match to regular expression (always folder level)
375 else if (file_relative_path.startsWith(filename_text + File.separator)) {
376 metadata.setFile(new File(file, filename_text));
377 metadata.setFileLevel(false);
378 }
379
380 // If mode is overwrite, then remove any previous values for this metadata element.
381 if(mode.equals("accumulate")) {
382 metadata.setAccumulate(true);
383 }
384 else {
385 metadata.setAccumulate(false);
386 ///ystem.err.println("Metadata overwrites: " + metadata);
387 for(int m = metadatum.size() - 1; m >= 0; m--) {
388 Metadata old_metadata = (Metadata) metadatum.get(m);
389 if(old_metadata.getElement().equals(metadata.getElement())) {
390 metadatum.remove(m);
391 ///ystem.err.println("Removing overridden metadata: " + old_metadata);
392 }
393 old_metadata = null;
394 }
395 }
396 mode = null;
397 // Add the completed metadata and clean up
398 ///ystem.err.println("Adding metadata: " + metadata);
399 metadatum.add(metadata);
400 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
401 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
402 ///atherer.println("Have we been asked to remove the metadata: " + metadata);
403 ///atherer.println("Given:");
404 ///atherer.println("\tremove = " + remove);
405 ///atherer.println("\tfilename = " + filename);
406 ///atherer.println("\tfilename_text = " + filename_text + "?");
407 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
408 ///atherer.println("Yes! Queuing for Removal.");
409 queued_for_removal.add(metadata_element);
410 }
411 else {
412 ///atherer.println("No. Updating.");
413 String current_value = metadata.getValueNode().getFullPath(false);
414 ///ystem.err.println("Checking the current mdv path: " + current_value);
415 ///ystem.err.println("Against whats in the metadata file: " + raw_value);
416 if(!raw_value.equals(current_value)) {
417 MSMUtils.setValue(metadata_element, current_value);
418 }
419 }
420 }
421 metadata = null;
422 raw_value = null;
423 raw_element = null;
424 metadata_element = null;
425 } // for all metadata elements
426 metadata_elements = null;
427 } // for all metadata types
428
429 // Now we remove any elements that have been queued for deletion
430 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
431 Element metadata_element = (Element) queued_for_removal.get(a);
432 Element parent = (Element) metadata_element.getParentNode();
433 parent.removeChild(metadata_element);
434
435 up_to_date = false;
436 }
437 queued_for_removal.clear();
438
439 // If the fileset no longer has any metadata remove it
440 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
441 if (metadata_elements.getLength()==0) {
442 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
443 if (metadata_elements.getLength()==0) {
444 directorymetadata_element.removeChild(fileset_element);
445 up_to_date = false;
446 }
447 }
448 metadata_elements = null;
449 fileset_element = null;
450 filename_text = null;
451 } // for each fileset element
452
453 fileset_elements = null;
454 directorymetadata_element = null;
455 }
456 catch (Exception error) {
457 Gatherer.self.printStackTrace(error);
458 }
459 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
460 queued_for_removal = null;
461 return metadatum;
462 }
463
464 /** returns true if the document has at least one Metadata or XMetadata element */
465 public boolean hasMetadata() {
466 boolean has_meta = true;
467 try {
468 // Retrieve the document element.
469 Element directory_metadata_element = base_document.getDocumentElement();
470 NodeList metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
471 if (metadata_nodes.getLength()==0) {
472 // try extracted metadata
473 metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
474 if (metadata_nodes.getLength()==0) {
475 has_meta = false;
476 }
477 }
478 directory_metadata_element=null;
479 metadata_nodes=null;
480 }
481 catch (Exception error) {
482 Gatherer.printStackTrace(error);
483 }
484 return has_meta;
485 }
486
487 /** Determine if this document has been saved recently, and thus xml file version is up to date. */
488 public boolean isUpToDate() {
489 return false;
490 }
491
492 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
493 public boolean isValid() {
494 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
495 String doctype_name = base_document.getDoctype().getName();
496 String root_name = base_document.getDocumentElement().getTagName();
497 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
498 }
499
500 /** Remove all of the extracted metadata (XMetadata) from this document. */
501 public void removeExtractedMetadata() {
502 try {
503 Element document_element = base_document.getDocumentElement();
504 NodeList extracted_metadata_elements = document_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
505 document_element = null;
506 for(int i = extracted_metadata_elements.getLength(); i != 0; i--) {
507 Element extracted_metadata_element = (Element) extracted_metadata_elements.item(i - 1);
508 String element_name = extracted_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
509 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(element_name);
510 if(element != null) {
511 element.dec();
512 }
513 element = null;
514 ///ystem.err.println("Removing extracted metadata: " + element_name + "=" + MSMUtils.getValue(extracted_metadata_element));
515 element_name = null;
516 Node parent_node = extracted_metadata_element.getParentNode();
517 parent_node.removeChild(extracted_metadata_element);
518 parent_node = null;
519 extracted_metadata_element = null;
520 }
521 extracted_metadata_elements = null;
522 up_to_date = false;
523 }
524 catch(Exception exception) {
525 Gatherer.println("Exception in MetadataXMLFile.removeExtractedMetadata() - unexpected");
526 Gatherer.printStackTrace(exception);
527 }
528 }
529
530 /** Remove the given metadata from this document.If filename is null, then removes directory level metadata, otherwise just removes it from the specified file. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
531 public void removeMetadata(String filename, Metadata metadata) {
532 Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename);
533 try {
534 boolean found = false;
535 boolean first_metadata_element_found = true;
536 boolean make_next_metadata_element_overwrite = false;
537 boolean remove_fileset = false;
538 // is this extracted or normal metadata?
539 String removing_metadata_name = metadata.getElement().getName();
540 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
541 String current_metadata_type = StaticStrings.METADATA_ELEMENT;
542 if(removing_metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
543 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
544 removing_metadata_name = removing_metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
545 }
546
547 // Retrieve the document element.
548 Element directorymetadata_element = base_document.getDocumentElement();
549 // Iterate through the filesets looking for the appropriate one.
550 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
551 for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
552 Element fileset_element = (Element) fileset_elements.item(i);
553 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
554 for(int j = 0; !found && j < filename_elements.getLength(); j++) {
555 Element filename_element = (Element) filename_elements.item(j);
556 String filename_text = MSMUtils.getValue(filename_element);
557 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
558 // Retrieve the Metadata Elements for this fileset, and iterate through them looking for the one which we are to remove.
559 NodeList metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
560 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
561 Element metadata_element = (Element) metadata_elements.item(l);
562 String element = metadata_element.getAttribute("name");
563 String value = MSMUtils.getValue(metadata_element);
564 // See if this is the metadata we wish to remove
565 if(element.equals(removing_metadata_name)) {
566 if(value.equals(metadata.getValueNode().getFullPath(false))) {
567 // Remove it
568 ///ystem.err.println("Remove " + element + "-" + value);
569 Element parent_elem = (Element)metadata_element.getParentNode();
570 parent_elem.removeChild(metadata_element);
571
572 //description_element.removeChild(metadata_element);
573 found = true;
574 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
575 if(first_metadata_element_found && !metadata.accumulates()) {
576 ///ystem.err.println("First of this element found!");
577 make_next_metadata_element_overwrite = true;
578 }
579 }
580 // If this was the first metadata we've found with the element of the one to be removed set first found to false.
581 else if(first_metadata_element_found) {
582 ///ystem.err.println("Found a matching element: " + element + "=" + value);
583 first_metadata_element_found = false;
584 }
585 // Otherwise we should make this metadata overwrite as requested.
586 else if(make_next_metadata_element_overwrite) {
587 ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
588 metadata_element.setAttribute(MODE_ATTRIBUTE, "");
589 }
590 }
591 value = null;
592 element = null;
593 metadata_element = null;
594 } // for each metadata
595 metadata_elements = null;
596 } // if the filename matches
597
598 if (found) {
599 // if we found an element and removed it, we now want to check whether the fileset is empty or not
600 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
601 if (metadata_elements.getLength() ==0) {
602 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
603 if (metadata_elements.getLength() ==0) {
604 // remove the fileset
605 directorymetadata_element.removeChild(fileset_element);
606 }
607 }
608 metadata_elements = null;
609 }
610 filename_text = null;
611 filename_element = null;
612 } // for each filename element
613 filename_elements = null;
614 fileset_element = null;
615 } // for each fileset element
616 fileset_elements = null;
617 directorymetadata_element = null;
618 up_to_date = false;
619 }
620 catch (Exception error) {
621 Gatherer.printStackTrace(error);
622 }
623 }
624
625 /** Change the up to date flag.
626 * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise
627 */
628 public void setUpToDate(boolean up_to_date) {
629 this.up_to_date = up_to_date;
630 }
631}
Note: See TracBrowser for help on using the repository browser.