/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: John Thompson, Greenstone Digital Library, University of Waikato * * Copyright (C) 1999 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.msm; import java.io.*; import java.util.*; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.msm.GDMManager; import org.greenstone.gatherer.msm.MSMUtils; import org.greenstone.gatherer.util.Codec; import org.greenstone.gatherer.util.HashMap3D; import org.greenstone.gatherer.util.StaticStrings; import org.greenstone.gatherer.util.Utility; import org.greenstone.gatherer.valuetree.GValueModel; import org.greenstone.gatherer.valuetree.GValueNode; import org.w3c.dom.*; /** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file. * @author John Thompson, Greenstone Digital Library, University of Waikato * @version 2.3b */ public class GDMDocument { /** Record if the document this object is based on is up to date. */ private boolean up_to_date = true; /** The document this class sources its data from. */ private Document base_document; static final private String ACCUMULATE = "accumulate"; /** The pattern to match when searching for directory level assignments. */ static final private String DIRECTORY_FILENAME = ".*"; static final private String DESCRIPTION_ELEMENT = "Description"; static final private String FILENAME_ELEMENT = "FileName"; static final private String FILESET_ELEMENT = "FileSet"; static final private String HVALUE_ATTRIBUTE = "hvalue"; static final private String MODE_ATTRIBUTE = "mode"; static final private String OVERWRITE = "overwrite"; static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT}; /** Constructor which creates a brand new metadata.xml document. */ public GDMDocument() { // Create new document. We do this by loading a copy of the template. */ this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true); } /** Constructor which parses an existing metadata.xml document. */ public GDMDocument(File file) { try { this.base_document = Utility.parse(file.getAbsolutePath(), false); } catch (Exception error) { // Poorly formed, or completely invalid metadata.xml file! } } /** Constructor which wraps around an existing metadata.xml document. */ public GDMDocument(Document base_document) { this.base_document = base_document; } /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */ public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) { Gatherer.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory.")); try { // Retrieve the document element. Element directorymetadata_element = base_document.getDocumentElement(); // Iterate through the filesets looking for one that matches the given filename. Element fileset_element = null; boolean found = false; NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); for(int i = 0; !found && i < fileset_elements.getLength(); i++) { fileset_element = (Element) fileset_elements.item(i); NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); for(int j = 0; !found && j < filename_elements.getLength(); j++) { Element filename_element = (Element) filename_elements.item(j); String filename_pattern = MSMUtils.getValue(filename_element); // Have we found a match. If so break out of for loop. if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) { ///ystem.err.println("Adding to existing file fileset!"); found = true; } else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) { ///ystem.err.println("Adding to existing folder fileset!"); ///ystem.err.println("filename_pattern = '" + filename_pattern + "'"); found = true; } // No match. On to the next one. else { fileset_element = null; } filename_pattern = null; filename_element = null; } } fileset_elements = null; // If we still haven't found an existing fileset, then its time to create one. if(fileset_element == null) { ///ystem.err.println("Creating a new fileset."); fileset_element = base_document.createElement(FILESET_ELEMENT); Element filename_element = base_document.createElement(FILENAME_ELEMENT); Element description_element = base_document.createElement(DESCRIPTION_ELEMENT); fileset_element.appendChild(filename_element); fileset_element.appendChild(description_element); Text filename_text = null; // If the filename is null then we add a directory metadata set as directorymetadata_element's first child if(filename == null) { filename_text = base_document.createTextNode(DIRECTORY_FILENAME); if(directorymetadata_element.hasChildNodes()) { directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild()); } else { directorymetadata_element.appendChild(fileset_element); } } // Otherwise we just append the new fileset to directorymetadata_element's children. else { filename_text = base_document.createTextNode(filename); directorymetadata_element.appendChild(fileset_element); } filename_element.appendChild(filename_text); filename_text = null; description_element = null; filename_element = null; } // Now, finally, we can add the metadata. Element metadata_element = null; String name = metadata.getElement().getName(); // If this is extracted metadata, we use a special element name that won't be recognized by greenstone if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]); name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); } else { metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]); } metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name); // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset. boolean will_accumulate = false; NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) { Element sibling_description_element = (Element) sibling_description_elements.item(k); // We have to do this for each type of metadata for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) { Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l); // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice. if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { // Check the values and return if they are the same. if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { return; } will_accumulate = true; } sibling_metadata_element = null; } sibling_metadata_elements = null; } sibling_description_element = null; } sibling_description_elements = null; if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) { metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE); } // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement()); String node_value = null; if(model != null && model.isHierarchy()) { //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM); node_value = metadata.getValueNode().getFullPath(false); } else { node_value = metadata.getAbsoluteValue(); } System.err.println("Creating node in GDMDocument: '" + node_value + "'"); metadata_element.appendChild(base_document.createTextNode(node_value)); // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe). NodeList description_elements = fileset_element.getElementsByTagName("Description"); Element description_element = (Element) description_elements.item(0); description_element.appendChild(metadata_element); description_element = null; metadata_element = null; //mode = null; fileset_element = null; directorymetadata_element = null; up_to_date = false; } catch (Exception error) { Gatherer.printStackTrace(error); } } public int countMetadata() { int count = 0; try { // Retrieve the document element. Element directorymetadata_element = base_document.getDocumentElement(); // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); for(int i = 0; i < fileset_elements.getLength(); i++) { Element fileset_element = (Element) fileset_elements.item(i); NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); for(int k = 0; k < description_elements.getLength(); k++) { Element description_element = (Element) description_elements.item(k); // We have to do this for each type of metadata for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); count = count + metadata_elements.getLength(); metadata_elements = null; } description_element = null; } description_elements = null; fileset_element = null; } fileset_elements = null; directorymetadata_element = null; } catch (Exception error) { Gatherer.printStackTrace(error); } return count; } /** Retrieve the document this class is wrapping. */ public Document getDocument() { return base_document; } /** Get all of the metadata, including directory level, associated with this file. */ public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) { return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false); } /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */ public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) { Gatherer.println("Get metadata for " + filename); ArrayList metadatum = null; if(metadatum_so_far == null) { metadatum = new ArrayList(); } else { metadatum = metadatum_so_far; } try { // Retrieve the document element. Element directorymetadata_element = base_document.getDocumentElement(); // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); for(int i = 0; i < fileset_elements.getLength(); i++) { Element fileset_element = (Element) fileset_elements.item(i); NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); for(int j = 0; j < filename_elements.getLength(); j++) { Element filename_element = (Element) filename_elements.item(j); String filename_text = MSMUtils.getValue(filename_element); if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME)) || purge) { // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite). // Normal metadata NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); for(int k = 0; k < description_elements.getLength(); k++) { Element description_element = (Element) description_elements.item(k); // We have to do this for each type of metadata for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); for(int l = 0; l < metadata_elements.getLength(); l++) { Element metadata_element = (Element) metadata_elements.item(l); String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); //String language = metadata_element.getAttribute("language"); String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); String raw_value = MSMUtils.getValue(metadata_element); // //raw_value = Codec.transform(raw_value, Codec.DOM_TO_); ///ystem.err.println("Retrieved raw value: " + raw_value); // ***** LEGACY SUPPORT ***** // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { ///ystem.err.println("Blarg"); Gatherer.println("Detected Legacy Path: " + raw_value); raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); Gatherer.println("Updated Path To: " + raw_value); MSMUtils.setValue(metadata_element, raw_value); } // ************************** // Using the element string and value, retrieve a matching Metadata object from the cache Metadata metadata = null; // If this element has hierarchy values then we must ensure the raw value is a full path, not an index. // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable if(GDMManager.metadata_cache.contains(raw_element, raw_value) && !purge) { ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n"); metadata = (Metadata) GDMManager.metadata_cache.get(raw_element, raw_value); } else { ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); if (element != null) { GValueNode value = Metadata.getDefaultValueNode(element, raw_value); ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n"); metadata = new Metadata(element, value); if(!purge) { GDMManager.metadata_cache.put(raw_element, raw_value, metadata); } ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n"); value = null; element = null; } } // check whether the metadata is null if (metadata != null) { // We determine whether this metadata is file or folder level if(filename != null) { ///ystem.err.println("Filename = " + filename); ///ystem.err.println("filename_text = " + filename_text); // If can only be file level if there is no folder path details in filename and if the filename matched the filename text node (it may have matched .* instead)! if(filename.indexOf(File.separator) == -1 && filename.equals(filename_text)) { metadata.setFileLevel(true); ///ystem.err.println("File level!!!"); } else { metadata.setFileLevel(false); ///ystem.err.println("Inherited!!!"); } } else { ///ystem.err.println("Filename is null therefore this is file level metadata."); metadata.setFileLevel(true); } metadata.setFile(file); // If mode is overwrite, then remove any previous values for this metadata element. if(mode.equals("accumulate")) { metadata.setAccumulate(true); } else { metadata.setAccumulate(false); ///ystem.err.println("Metadata overwrites: " + metadata); for(int m = metadatum.size() - 1; m >= 0; m--) { Metadata old_metadata = (Metadata) metadatum.get(m); if(old_metadata.getElement().equals(metadata.getElement())) { metadatum.remove(m); ///ystem.err.println("Removing overridden metadata: " + old_metadata); } old_metadata = null; } } mode = null; // Add the completed metadata and clean up ///ystem.err.println("Adding metadata: " + metadata); metadatum.add(metadata); // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete). // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level. if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) { ///ystem.err.println("Removing " + metadata + " from " + file); description_element.removeChild(metadata_element); // Remove the description element if empty. if(!description_element.hasChildNodes()) { fileset_element.removeChild(description_element); } } else { //String current_value = metadata.getAbsoluteValue(); //String current_value = Codec.transform(metadata.getValueNode().getFullPath(false), Codec.TEXT_TO_DOM); String current_value = metadata.getValueNode().getFullPath(false); //System.err.println("Checking the current mdv path: " + current_value); //System.err.println("Against whats in the metadata file: " + raw_value); if(!raw_value.equals(current_value)) { // Remove old text while(metadata_element.hasChildNodes()) { metadata_element.removeChild(metadata_element.getFirstChild()); } // Add new. metadata_element.appendChild(base_document.createTextNode(current_value)); } } } metadata = null; raw_value = null; raw_element = null; metadata_element = null; } metadata_elements = null; } description_element = null; } description_elements = null; } filename_text = null; filename_element = null; } // If the file set no longer has any description entries, remove it entirely NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); if(description_elements.getLength() == 0) { directorymetadata_element.removeChild(fileset_element); } description_elements = null; filename_elements = null; fileset_element = null; } fileset_elements = null; directorymetadata_element = null; } catch (Exception error) { Gatherer.self.printStackTrace(error); } ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata."); return metadatum; } /** Determine if this document has been saved recently, and thus xml file version is up to date. */ public boolean isUpToDate() { return false; } /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */ public boolean isValid() { // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata. String doctype_name = base_document.getDoctype().getName(); String root_name = base_document.getDocumentElement().getTagName(); return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata"))); } /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */ public void removeMetadata(String filename, Metadata metadata) { Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename); try { boolean found = false; boolean first_metadata_element_found = true; boolean make_next_metadata_element_overwrite = false; boolean remove_fileset = false; // Retrieve the document element. Element directorymetadata_element = base_document.getDocumentElement(); // Iterate through the filesets looking for the directory level one. NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); for(int i = 0; !found && i < fileset_elements.getLength(); i++) { Element fileset_element = (Element) fileset_elements.item(i); NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); for(int j = 0; !found && j < filename_elements.getLength(); j++) { Element filename_element = (Element) filename_elements.item(j); String filename_text = MSMUtils.getValue(filename_element); if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) { // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove. NodeList description_elements = fileset_element.getElementsByTagName("Description"); for(int k = 0; !found && k < description_elements.getLength(); k++) { Element description_element = (Element) description_elements.item(k); // We have to do this for each type of metadata for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) { Element metadata_element = (Element) metadata_elements.item(l); String element = metadata_element.getAttribute("name"); String value = MSMUtils.getValue(metadata_element); // See if this is the metadata we wish to remove if(element.equals(metadata.getElement().getName())) { if(value.equals(metadata.getAbsoluteValue())) { // Remove it ///ystem.err.println("Remove " + element + "-" + value); description_element.removeChild(metadata_element); found = true; // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now. if(first_metadata_element_found && !metadata.accumulates()) { ///ystem.err.println("First of this element found!"); make_next_metadata_element_overwrite = true; } } // If this was the first metadata we've found with the element of the one to be removed set first found to false. else if(first_metadata_element_found) { ///ystem.err.println("Found a matching element: " + element + "=" + value); first_metadata_element_found = false; } // Otherwise we should make this metadata overwrite as requested. else if(make_next_metadata_element_overwrite) { ///ystem.err.println("Changing to overwrite: " + element + "=" + value); metadata_element.setAttribute(MODE_ATTRIBUTE, ""); } } value = null; element = null; metadata_element = null; } metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); // If we found it, removed it, and now the description tag has no children, mark the fileset for removal if(metadata_elements.getLength() == 0) { remove_fileset = true; } metadata_elements = null; } description_element = null; } description_elements = null; } filename_text = null; filename_element = null; } filename_elements = null; if(found && remove_fileset) { directorymetadata_element.removeChild(fileset_element); } fileset_element = null; } fileset_elements = null; directorymetadata_element = null; up_to_date = false; } catch (Exception error) { Gatherer.printStackTrace(error); } } /** Change the up to date flag. */ public void setUpToDate(boolean up_to_date) { this.up_to_date = up_to_date; } }