/** *############################################################################ * A component of the Greenstone Librarian Interface, part of the Greenstone * digital library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ * * Copyright (C) 2004 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *############################################################################ */ package org.greenstone.gatherer.metadata; import java.io.*; import java.util.*; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.collection.CollectionTreeNode; import org.greenstone.gatherer.util.XMLTools; import org.w3c.dom.*; import org.greenstone.gatherer.util.Utility; /** This class represents one metadata.xml file */ public class MetadataXMLFile extends File { static final private String DESCRIPTION_ELEMENT = "Description"; static final private String DIRECTORY_FILENAME = ".*"; static final private String FILENAME_ELEMENT = "FileName"; static final private String FILESET_ELEMENT = "FileSet"; static final private String METADATA_ELEMENT = "Metadata"; static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT}; /** Special metadata field: the filename encoding is a unique sort of metadata in * that it is not just information stored with a collection file, but also needs to * be applied in real-time to the collection file (to its filename) for display. */ static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding"; // To speed things up a bit we keep the last accessed metadata.xml file in memory static private File loaded_file = null; static private Document loaded_file_document = null; static private boolean loaded_file_changed = false; public MetadataXMLFile(String metadata_xml_file_path) { super(metadata_xml_file_path); } public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; reEncodeFilenamesInMetadataXML(loaded_file_document); } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile()); String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: " // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp)); // Find the appropriate FileSet element for this file Element appropriate_fileset_element = null; // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be extended with new metadata if (current_filename_element_value.equals(file_path_regexp)) { appropriate_fileset_element = current_fileset_element; break; } } } // If no appropriate FileSet element exists create a new one for this file if (appropriate_fileset_element == null) { DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp); appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT); Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT); new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp)); appropriate_fileset_element.appendChild(new_filename_element); Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT); appropriate_fileset_element.appendChild(new_description_element); // add the fileset element for .* at the top: especially important for // non-accumulating (and override mode) meta. Other type fileset elements can be appended if(file_path_regexp.equals(DIRECTORY_FILENAME)) { loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element, loaded_file_document.getDocumentElement().getFirstChild()); } else { loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element); } } // Add each of the metadata values to the FileSet's Description element Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0); for (int i = 0; i < metadata_values.size(); i++) { MetadataValue metadata_value = (MetadataValue) metadata_values.get(i); String metadata_element_name_full = metadata_value.getMetadataElement().getFullName(); // Remove any characters that are invalid in XML String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone metadata_value_string = metadata_value_string.replaceAll("\\[", "["); metadata_value_string = metadata_value_string.replaceAll("\\]", "]"); // the gs.filenameEncoding metadata is unique in that, when added, removed or // changed, it must be applied on the file(name) whose metadata has been adjusted if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) { metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, metadata_value_string, false); // true only if removing meta } // Check if this piece of metadata has already been assigned to this FileSet element boolean metadata_already_assigned = false; NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check if the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (current_metadata_element_name_full.equals(metadata_element_name_full)) { // if the metadata must not accumulate, then edit the current value if (!metadata_value.isAccumulatingMetadata()) { XMLTools.setNodeText(current_metadata_element, metadata_value_string); metadata_already_assigned = true; break; } // Check if the metadata element value matches String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(metadata_value_string)) { // Metadata already assigned metadata_already_assigned = true; break; } } } // If the piece of metadata hasn't already been assigned, add it now if (!metadata_already_assigned) { // Create a new Metadata element to record this metadata Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT); new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName()); new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override")); new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string)); // Accumulating metadata: add at the end if (metadata_value.isAccumulatingMetadata()) { description_element.appendChild(new_metadata_element); } // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata) else { description_element.insertBefore(new_metadata_element, description_element.getFirstChild()); } } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return new ArrayList(); } loaded_file = this; loaded_file_document = document; reEncodeFilenamesInMetadataXML(loaded_file_document); } // Determine the file's path relative to the location of the metadata.xml file String file_relative_path = FilenameEncoding.fileToURLEncoding(file); File metadata_xml_file_directory = getParentFile(); String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory); file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); } // Build up a list of metadata assigned to this file ArrayList metadata_values = new ArrayList(); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; boolean is_one_file_only_metadata = true; File folder_metadata_inherited_from = null; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName())); String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path); //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path)); // Does this fileset specify metadata for one file only? is_one_file_only_metadata = true; if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) { // No, it specifies metadata for multiple files (but not all the files in the directory) is_one_file_only_metadata = false; } String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value); //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value)); //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value)); // This fileset specifies metadata for the file // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) { //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n"); current_fileset_matches = true; if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { folder_metadata_inherited_from = metadata_xml_file_directory; } break; } //else { //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value); //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value)); //} // This fileset specifies metadata for the folder the file is in if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) { current_fileset_matches = true; folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value); break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Read all the Metadata elements in the fileset NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); String metadata_element_name_full = current_metadata_element.getAttribute("name"); // if we're only looking for fileEncoding metadata and this isn't it, skip to the next if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) { continue; } String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); // Ignore legacy crap if (metadata_set_namespace.equals("hidden")) { continue; } MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); if (metadata_set == null) { // The metadata set isn't loaded, so give the option of mapping the element into a loaded set String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full); String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); // If the element doesn't exist in the metadata set, we're not interested //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file if (metadata_element == null) { metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name); // continue; } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); metadata_value_string = metadata_value_string.replaceAll("[", "["); metadata_value_string = metadata_value_string.replaceAll("]", "]"); MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string); // If there is no metadata value tree node for this value, create it if (metadata_value_tree_node == null) { DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\""); metadata_element.addMetadataValue(metadata_value_string); metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string); } MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node); metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from); metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata); // Is this accumulating metadata? if (current_metadata_element.getAttribute("mode").equals("accumulate")) { metadata_value.setIsAccumulatingMetadata(true); } // Add the new metadata value to the list metadata_values.add(metadata_value); } } return metadata_values; } public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; reEncodeFilenamesInMetadataXML(loaded_file_document); } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile()); String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } // Find the appropriate FileSet element for this file Element appropriate_fileset_element = null; // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be extended with new metadata if (current_filename_element_value.equals(file_path_regexp)) { appropriate_fileset_element = current_fileset_element; break; } } } // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file if (appropriate_fileset_element == null) { DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this); return; } // Remove each of the metadata values from the FileSet's Description element for (int i = 0; i < metadata_values.size(); i++) { MetadataValue metadata_value = (MetadataValue) metadata_values.get(i); // Remove any characters that are invalid in XML String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone metadata_value_string = metadata_value_string.replaceAll("\\[", "["); metadata_value_string = metadata_value_string.replaceAll("\\]", "]"); // Find the Metadata element to delete from the fileset String metadata_element_name_full = metadata_value.getMetadataElement().getFullName(); NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (current_metadata_element_name_full.equals(metadata_element_name_full)) { // Check the metadata element value matches String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(metadata_value_string)) { // Remove this Metadata element current_metadata_element.getParentNode().removeChild(current_metadata_element); // the gs.filenameEncoding metadata is unique in that, when added, removed or // changed, it must be applied on the file(name) whose metadata has been adjusted if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) { // metadata_value_string will hereafter be the inherited gs.FilenameEncoding // metadata (if any), now that the value at this level has been removed metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, "", true); // true only if *removing* this meta } // If there are no Metadata elements left now, remove the (empty) FileSet element if (metadata_elements_nodelist.getLength() == 0) { appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element); } break; } } } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value) { // If this metadata.xml file isn't the one currently loaded, load it now if (loaded_file != this) { // First we must save out the currently loaded file saveLoadedFile(); // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } loaded_file = this; loaded_file_document = document; reEncodeFilenamesInMetadataXML(loaded_file_document); } // Determine the file's path relative to the location of the metadata.xml file String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile()); String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length()); if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); } // Form a regular expression that specifies the scope of the metadata String file_path_regexp; if (file_relative_path.equals("")) { // Special case for matching all files in the directory file_path_regexp = DIRECTORY_FILENAME; } else { // Convert the file path into a regular expression that will match it file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); } // Remove any characters that are invalid in XML String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue()); String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue()); // Square brackets need to be escaped because they are a special character in Greenstone old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "["); old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "]"); new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "["); new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "]"); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be edited if (current_filename_element_value.equals(file_path_regexp)) { current_fileset_matches = true; break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Each metadata value is only allowed to be assigned once boolean new_metadata_value_already_exists = false; Element metadata_element_to_edit = null; // Find the Metadata element to replace in the fileset String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName(); NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (!current_metadata_element_name_full.equals(metadata_element_name_full)) { continue; } // Check the new metadata value doesn't already exist String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); if (current_metadata_value_string.equals(new_metadata_value_string)) { new_metadata_value_already_exists = true; } // Check the metadata element value matches if (current_metadata_value_string.equals(old_metadata_value_string)) { metadata_element_to_edit = current_metadata_element; } } // If the new metadata value already existed, remove the original value if (new_metadata_value_already_exists) { if(metadata_element_to_edit != null) { //????????? metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit); } else { System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null"); } } // Otherwise replace the old value with the new value // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected) else if (metadata_element_to_edit != null) { // the gs.filenameEncoding metadata is unique in that, when added, removed or // changed, it must be applied on the file(name) whose metadata has been adjusted if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) { new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false); // true only if removing meta } XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string); } } // Remember that we've changed the file so it gets saved when a new one is loaded loaded_file_changed = true; } static public void saveLoadedFile() { // If we have a file loaded into memory and it has been modified, save it now if (loaded_file != null && loaded_file_changed == true) { //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true)); XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements); loaded_file_changed = false; } } /** * parseXML(metadata.xml) has the side-effect of resolving html entities. * Although this is not done by the GLIEntityResolver usage in parseXML(), something * in parseXML() is resolving the html entities, including those used in carefully * html-entity-escaped filenames. * We need to get the filenames in the DOM correct after parsing a metadata.xml file * into memory, so that we have the correct filenames and so that we'll write it out correctly. * Therefore, always call this method after a successful parseXML() call on a metadata.xml. * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded. * At the end of this function, the doc will be modified with the re-encoded filenames. * */ static private void reEncodeFilenamesInMetadataXML(Document doc) { if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { return; } String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end //System.err.println("@@@ curr_directory_path: " + curr_directory_path); //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true)); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // get the value of all FileName elements NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element filename_element = (Element) filename_elements_nodelist.item(j); String filename = XMLTools.getElementTextValue(filename_element); if(!filename.equals(DIRECTORY_FILENAME)) { // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements //System.err.println("Filename before reencoding was: " + filename); // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C. String encoded_filename = filename.replace("\\", "%5C"); // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path); // Reintrodudce the backslash characters in place of their %5C hex placeholders encoded_filename = encoded_filename.replace("%5C", "\\"); // Update filename element in DOM XMLTools.setElementTextValue(filename_element, encoded_filename); //System.err.println("Filename after reencoding was: " + encoded_filename); } } } //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true)); } /** * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons: * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten) * - To get a complete list of the metadata elements in the collection (used in Design and Format panes) * - To build complete and accurate metadata value trees (used in the Enrich pane) */ public void skimFile() { boolean file_changed = false; // Parse the metadata.xml file DebugStream.println("Skimming metadata.xml file " + this + "..."); Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } // Always call this method after calling parseXMLFile reEncodeFilenamesInMetadataXML(document); // Read all the Metadata elements in the file HashMap target_metadata_element_name_attrs_cache = new HashMap(); NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT); for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(i); String metadata_element_name_full = current_metadata_element.getAttribute("name"); String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); // Ignore legacy crap if (metadata_set_namespace.equals("hidden")) { continue; } MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); if (metadata_set == null) { // The metadata set isn't loaded, so give the option of mapping the element into a loaded set String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } // Update the metadata.xml file to have the new (namespaced) element name // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full) // we create an Attr object for each target metadata element name, and cache them // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow? Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full); if (target_metadata_element_name_attr == null) { target_metadata_element_name_attr = document.createAttribute("name"); target_metadata_element_name_attr.setValue(target_metadata_element_name_full); target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr); } // Remove the old name attribute and add the new (namespaced) one current_metadata_element.removeAttribute("name"); current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false)); file_changed = true; metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name); // If the element doesn't exist in the metadata set, add it if (metadata_element == null) { metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name); } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element); metadata_value_string = metadata_value_string.replaceAll("[", "["); metadata_value_string = metadata_value_string.replaceAll("]", "]"); metadata_element.addMetadataValue(metadata_value_string); } // Rewrite the metadata.xml file if it has changed if (file_changed) { XMLTools.writeXMLFile(this, document); } } /** * The gs.filenameEncoding metadata is unique in that, when added, removed or * replaced, it must be applied on the file(name) whose metadata has been * adjusted. * This method handles all that, given the regular expression or filepath name * to match on (.* matches subdirectories), the affected fileNode, the new * encoding value and whether a new encoding value has been added/an existing * one has been replaced or whether the encoding metadata has been removed. * The new adjusted value for the encoding metadata is returned. * * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding) * to allow fast access to previously assigned gs.filenameEncoding metadata (if * any) for each file. This hashmap also needs to be updated, but this update * is complicated by the fact that it concerns regular expressions that could * affect multiple filenames. */ public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node, String encoding_metadata_value, boolean removingMetadata) { if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { return encoding_metadata_value; } // Work out this filenode's new encoding and apply it: if(removingMetadata) { // encoding_metadata_value = "" // gs.filenameEncoding metadata being removed, work out // any inherited metadata to replace it with in the meta-table encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding( file_node.getURLEncodedFilePath(), file_node.getFile()); // should be canonical encoding already } else if(!encoding_metadata_value.equals("")) { // if adding or replacing filename encoding, // get the canonical encoding name for this alias encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value); } // Reencode the display of this filenode only as any affected // childnodes will be reencoded on FileNode.refreshDescendantEncodings() file_node.reencodeDisplayName(encoding_metadata_value); // Whether removing or adding/replacing the file's gs.filename encoding meta, // store this in the file-to-encoding map for fast access, since the map stores // empty string values when no meta has been assigned at this file level. // In the case of removingMetadata, the value stored will be the fallback value String urlpath = file_node.getURLEncodedFilePath(); if(removingMetadata) { // remove it from the map instead of inserting "", so that when folders in the collectiontree // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired // for each affected filenodes does not cause the undesirable effect of multiple "" to be // entered into the filename-to-encoding map for filepaths that no longer exist . FilenameEncoding.map.remove(urlpath); } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it) FilenameEncoding.map.put(urlpath, encoding_metadata_value); } // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been // assigned, the file_to_encodings map will be cleared for all descendant folders and files, // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree. // Mark the state as requiring a refresh of the CollectionTree. // This next step also serves to prevent the MetadataValueTableModel from trying to update // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress. FilenameEncoding.setRefreshRequired(true); return encoding_metadata_value; } }