/** *############################################################################ * A component of the Greenstone Librarian Interface, part of the Greenstone * digital library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ * * Copyright (C) 2004 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *############################################################################ */ package org.greenstone.gatherer.metadata; import java.io.*; import java.util.*; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.util.XMLTools; import org.w3c.dom.*; /** This class represents one metadata.xml file */ public class MetadataXMLFile extends File { static final private String DESCRIPTION_ELEMENT = "Description"; static final private String DIRECTORY_FILENAME = ".*"; static final private String FILENAME_ELEMENT = "FileName"; static final private String FILESET_ELEMENT = "FileSet"; static final private String METADATA_ELEMENT = "Metadata"; // To speed things up a bit we keep the last accessed metadata.xml file in memory static private File loaded_file = null; static private Document loaded_file_document = null; static private boolean loaded_file_changed = false; public MetadataXMLFile(String metadata_xml_file_path) { super(metadata_xml_file_path); } public void addMetadata(File file, ArrayList metadata_values) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = getParentFile().getAbsolutePath(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } // Find the appropriate FileSet element for this file Element appropriate_fileset_element = null; // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be extended with new metadata if (current_filename_element_value.equals(file_path_regexp)) { appropriate_fileset_element = current_fileset_element; break; } } } // If no appropriate FileSet element exists create a new one for this file if (appropriate_fileset_element == null) { DebugStream.println("Creating new FileSet element for file since none exists..."); appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT); Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT); new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp)); appropriate_fileset_element.appendChild(new_filename_element); Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT); appropriate_fileset_element.appendChild(new_description_element); loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element); } // Add each of the metadata values to the FileSet's Description element Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0); for (int i = 0; i < metadata_values.size(); i++) { MetadataValue metadata_value = (MetadataValue) metadata_values.get(i); String metadata_element_name_full = metadata_value.getMetadataElement().getFullName(); // Remove any characters that are invalid in XML String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone metadata_value_string = metadata_value_string.replaceAll("\\[", "["); metadata_value_string = metadata_value_string.replaceAll("\\]", "]"); // Check if this piece of metadata has already been assigned to this FileSet element boolean metadata_already_assigned = false; NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check if the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (current_metadata_element_name_full.equals(metadata_element_name_full)) { // Check if the metadata element value matches String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(metadata_value_string)) { // Metadata already assigned metadata_already_assigned = true; break; } } } // If the piece of metadata hasn't already been assigned, add it now if (!metadata_already_assigned) { // Create a new Metadata element to record this metadata Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT); new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName()); new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override")); new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string)); // Accumulating metadata: add at the end if (metadata_value.isAccumulatingMetadata()) { description_element.appendChild(new_metadata_element); } // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata) else { description_element.insertBefore(new_metadata_element, description_element.getFirstChild()); } } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } public ArrayList getMetadataAssignedToFile(File file) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return new ArrayList(); } loaded_file = this; loaded_file_document = document; } // Determine the file's path relative to the location of the metadata.xml file File metadata_xml_file_directory = getParentFile(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory.getAbsolutePath().length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Build up a list of metadata assigned to this file ArrayList metadata_values = new ArrayList(); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; boolean is_one_file_only_metadata = true; File folder_metadata_inherited_from = null; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Does this fileset specify metadata for one file only? is_one_file_only_metadata = true; if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) { // No, it specifies metadata for multiple files (but not all the files in the directory) is_one_file_only_metadata = false; } // This fileset specifies metadata for the file if (file_relative_path.matches(current_filename_element_value)) { current_fileset_matches = true; if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { folder_metadata_inherited_from = metadata_xml_file_directory; } break; } // This fileset specifies metadata for the folder the file is in if (file_relative_path.startsWith(current_filename_element_value + File.separator)) { current_fileset_matches = true; folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value); break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Read all the Metadata elements in the fileset NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); String metadata_element_name_full = current_metadata_element.getAttribute("name"); String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); // Ignore legacy crap if (metadata_set_namespace.equals("hidden")) { continue; } MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); if (metadata_set == null) { // The metadata set isn't loaded, so give the option of mapping the element into a loaded set String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full); String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); // If the element doesn't exist in the metadata set, we're not interested //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file if (metadata_element == null) { metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name); // continue; } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); metadata_value_string = metadata_value_string.replaceAll("[", "["); metadata_value_string = metadata_value_string.replaceAll("]", "]"); MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string); // If there is no metadata value tree node for this value, create it if (metadata_value_tree_node == null) { DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\""); metadata_element.addMetadataValue(metadata_value_string); metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string); } MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node); metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from); metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata); // Is this accumulating metadata? if (current_metadata_element.getAttribute("mode").equals("accumulate")) { metadata_value.setIsAccumulatingMetadata(true); } // Add the new metadata value to the list metadata_values.add(metadata_value); } } return metadata_values; } public void removeMetadata(File file, ArrayList metadata_values) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = getParentFile().getAbsolutePath(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } // Find the appropriate FileSet element for this file Element appropriate_fileset_element = null; // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be extended with new metadata if (current_filename_element_value.equals(file_path_regexp)) { appropriate_fileset_element = current_fileset_element; break; } } } // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file if (appropriate_fileset_element == null) { DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this); return; } // Remove each of the metadata values from the FileSet's Description element for (int i = 0; i < metadata_values.size(); i++) { MetadataValue metadata_value = (MetadataValue) metadata_values.get(i); // Remove any characters that are invalid in XML String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone metadata_value_string = metadata_value_string.replaceAll("\\[", "["); metadata_value_string = metadata_value_string.replaceAll("\\]", "]"); // Find the Metadata element to delete from the fileset String metadata_element_name_full = metadata_value.getMetadataElement().getFullName(); NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (current_metadata_element_name_full.equals(metadata_element_name_full)) { // Check the metadata element value matches String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(metadata_value_string)) { // Remove this Metadata element current_metadata_element.getParentNode().removeChild(current_metadata_element); // If there are no Metadata elements left now, remove the (empty) FileSet element if (metadata_elements_nodelist.getLength() == 0) { appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element); } break; } } } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } public void replaceMetadata(File file, MetadataValue old_metadata_value, MetadataValue new_metadata_value) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = getParentFile().getAbsolutePath(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } // Remove any characters that are invalid in XML String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue()); String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "["); old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "]"); new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "["); new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "]"); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be edited if (current_filename_element_value.equals(file_path_regexp)) { current_fileset_matches = true; break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Each metadata value is only allowed to be assigned once boolean new_metadata_value_already_exists = false; Element metadata_element_to_edit = null; // Find the Metadata element to replace in the fileset String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName(); NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (!current_metadata_element_name_full.equals(metadata_element_name_full)) { continue; } // Check the new metadata value doesn't already exist String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(new_metadata_value_string)) { new_metadata_value_already_exists = true; } // Check the metadata element value matches if (current_metadata_value_string.equals(old_metadata_value_string)) { metadata_element_to_edit = current_metadata_element; } } // If the new metadata value already existed, remove the original value if (new_metadata_value_already_exists) { metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit); } // Otherwise replace the old value with the new value // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected) else if (metadata_element_to_edit != null) { XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string); } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } static public void saveLoadedFile() { // If we have a file loaded into memory and it has been modified, save it now if (loaded_file != null && loaded_file_changed == true) { XMLTools.writeXMLFile(loaded_file, loaded_file_document); loaded_file_changed = false; } } /** * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons: * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten) * - To get a complete list of the metadata elements in the collection (used in Design and Format panes) * - To build complete and accurate metadata value trees (used in the Enrich pane) */ public void skimFile() { boolean file_changed = false; // Parse the metadata.xml file DebugStream.println("Skimming metadata.xml file " + this + "..."); Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } // Read all the Metadata elements in the file HashMap target_metadata_element_name_attrs_cache = new HashMap(); NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT); for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(i); String metadata_element_name_full = current_metadata_element.getAttribute("name"); String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); // Ignore legacy crap if (metadata_set_namespace.equals("hidden")) { continue; } MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); if (metadata_set == null) { // The metadata set isn't loaded, so give the option of mapping the element into a loaded set String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } // Update the metadata.xml file to have the new (namespaced) element name // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full) // we create an Attr object for each target metadata element name, and cache them // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow? Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full); if (target_metadata_element_name_attr == null) { target_metadata_element_name_attr = document.createAttribute("name"); target_metadata_element_name_attr.setValue(target_metadata_element_name_full); target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr); } // Remove the old name attribute and add the new (namespaced) one current_metadata_element.removeAttribute("name"); current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false)); file_changed = true; metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name); // If the element doesn't exist in the metadata set, add it if (metadata_element == null) { metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name); } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); metadata_value_string = metadata_value_string.replaceAll("[", "["); metadata_value_string = metadata_value_string.replaceAll("]", "]"); metadata_element.addMetadataValue(metadata_value_string); } // Rewrite the metadata.xml file if it has changed if (file_changed) { XMLTools.writeXMLFile(this, document); } } }