package org.greenstone.gatherer.metadata; import java.io.*; import java.util.*; import org.greenstone.gatherer.util.XMLTools; import org.w3c.dom.*; /** This class represents one metadata.xml file */ public class MetadataXMLFile extends File { static final private String DESCRIPTION_ELEMENT = "Description"; static final private String DIRECTORY_FILENAME = ".*"; static final private String FILENAME_ELEMENT = "FileName"; static final private String FILESET_ELEMENT = "FileSet"; static final private String METADATA_ELEMENT = "Metadata"; public MetadataXMLFile(String metadata_xml_file_path) { super(metadata_xml_file_path); } public void addMetadata(File file, MetadataValue metadata_value) { // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } // Determine the file's path relative to the location of the metadata.xml file File metadata_xml_file_directory = getParentFile(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory.getAbsolutePath().length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Convert the file path into a regular expression that will match it String file_path_regexp = file_relative_path.replaceAll("\\.", "\\\\."); if (file_relative_path.equals("")) { file_path_regexp = DIRECTORY_FILENAME; } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_value_string = metadata_value.getFullValue(); metadata_value_string = metadata_value_string.replaceAll("\\[", "["); metadata_value_string = metadata_value_string.replaceAll("\\]", "]"); // Create a new Metadata element to record this metadata Element new_metadata_value_element = document.createElement(METADATA_ELEMENT); new_metadata_value_element.setAttribute("name", metadata_value.getMetadataElement().getFullName()); new_metadata_value_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override")); new_metadata_value_element.appendChild(document.createTextNode(metadata_value_string)); boolean have_added_metadata = false; // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be extended with new metadata if (current_filename_element_value.equals(file_path_regexp)) { // Append the new Metadata element to the Description element of this FileSet Element description_element = (Element) current_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0); // Accumulating metadata: add at the end if (metadata_value.isAccumulatingMetadata()) { description_element.appendChild(new_metadata_value_element); } // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata) else { description_element.insertBefore(new_metadata_value_element, description_element.getFirstChild()); } have_added_metadata = true; break; } } } // Check if the metadata was added to an existing FileSet if (!have_added_metadata) { // It wasn't, so create a new FileSet element for it Element new_fileset_element = document.createElement(FILESET_ELEMENT); Element new_filename_element = document.createElement(FILENAME_ELEMENT); new_filename_element.appendChild(document.createTextNode(file_path_regexp)); new_fileset_element.appendChild(new_filename_element); // Append the new Metadata element to the Description element of this FileSet Element new_description_element = document.createElement(DESCRIPTION_ELEMENT); new_description_element.appendChild(new_metadata_value_element); new_fileset_element.appendChild(new_description_element); document.getDocumentElement().appendChild(new_fileset_element); } // Rewrite the metadata.xml file XMLTools.writeXMLFile(this, document); } public ArrayList getMetadataAssignedToFile(File file) { // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return null; } // Determine the file's path relative to the location of the metadata.xml file File metadata_xml_file_directory = getParentFile(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory.getAbsolutePath().length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Build up a list of metadata assigned to this file ArrayList metadata_values = new ArrayList(); // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; File folder_metadata_inherited_from = null; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // This fileset specifies metadata for the file if (file_relative_path.matches(current_filename_element_value)) { current_fileset_matches = true; if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { folder_metadata_inherited_from = metadata_xml_file_directory; } break; } // This fileset specifies metadata for the folder the file is in if (file_relative_path.startsWith(current_filename_element_value + File.separator)) { current_fileset_matches = true; folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value); break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Read all the Metadata elements in the fileset NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); String metadata_element_name_full = current_metadata_element.getAttribute("name"); String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); // If the metadata set isn't loaded give the option of mapping the element into a loaded set if (metadata_set == null) { String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); MetadataElement metadata_element = metadata_set.getMetadataElement(metadata_element_name); // If the element doesn't exist in the metadata set, we're not interested if (metadata_element == null) { continue; } // Square brackets need to be escaped because they are a special character in Greenstone String metadata_element_value = XMLTools.getElementTextValue(current_metadata_element); metadata_element_value = metadata_element_value.replaceAll("[", "["); metadata_element_value = metadata_element_value.replaceAll("]", "]"); MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_element_value); // If there is no metadata value tree node for this value, create it if (metadata_value_tree_node == null) { System.err.println("Note: No value tree node for metadata value \"" + metadata_element_value + "\""); metadata_element.addMetadataValue(metadata_element_value); metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_element_value); } MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node); metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from); // Is this accumulating metadata? if (current_metadata_element.getAttribute("mode").equals("accumulate")) { metadata_value.setIsAccumulatingMetadata(true); } // Add the new metadata value to the list metadata_values.add(metadata_value); } } return metadata_values; } public void removeMetadata(File file, MetadataValue metadata_value) { // Parse the metadata.xml file Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } // Determine the file's path relative to the location of the metadata.xml file File metadata_xml_file_directory = getParentFile(); String file_relative_path = file.getAbsolutePath().substring(metadata_xml_file_directory.getAbsolutePath().length()); if (file_relative_path.startsWith(File.separator)) { file_relative_path = file_relative_path.substring(File.separator.length()); } // Convert the file path into a regular expression that will match it String file_path_regexp = file_relative_path.replaceAll("\\.", "\\\\."); if (file_relative_path.equals("")) { file_path_regexp = DIRECTORY_FILENAME; } // Read all the FileSet elements in the file NodeList fileset_elements_nodelist = document.getElementsByTagName(FILESET_ELEMENT); for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) { Element current_fileset_element = (Element) fileset_elements_nodelist.item(i); boolean current_fileset_matches = false; // Check the FileName elements of the FileSet to see if we have a match NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT); for (int j = 0; j < filename_elements_nodelist.getLength(); j++) { Element current_filename_element = (Element) filename_elements_nodelist.item(j); String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); // Only exact matches can be edited if (current_filename_element_value.equals(file_path_regexp)) { current_fileset_matches = true; break; } } // The FileSet doesn't apply, so move onto the next one if (current_fileset_matches == false) { continue; } // Find the Metadata element to delete from the fileset String metadata_element_name_full = metadata_value.getMetadataElement().getFullName(); String metadata_element_value = metadata_value.getFullValue(); NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT); for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(k); // Check the metadata element name matches String current_metadata_element_name_full = current_metadata_element.getAttribute("name"); if (!current_metadata_element_name_full.equals(metadata_element_name_full)) { continue; } // Check the metadata element value matches String current_metadata_element_value = XMLTools.getElementTextValue(current_metadata_element); if (!current_metadata_element_value.equals(metadata_element_value)) { continue; } // Remove this Metadata element current_metadata_element.getParentNode().removeChild(current_metadata_element); } } // Rewrite the metadata.xml file XMLTools.writeXMLFile(this, document); } public void skimFile() { boolean file_changed = false; // Parse the metadata.xml file System.err.println("Skimming metadata.xml file " + this + "..."); Document document = XMLTools.parseXMLFile(this); if (document == null) { System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath()); return; } // Read all the Metadata elements in the file NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT); for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) { Element current_metadata_element = (Element) metadata_elements_nodelist.item(i); String metadata_element_name_full = current_metadata_element.getAttribute("name"); String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); // If the metadata set isn't loaded give the option of mapping the element into a loaded set if (metadata_set == null) { String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full); if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) { // Skip this element if we still don't have a loaded element for it continue; } // Update the metadata.xml file to have the new element name current_metadata_element.setAttribute("name", target_metadata_element_name_full); file_changed = true; metadata_element_name_full = target_metadata_element_name_full; metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full); metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace); } String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full); MetadataElement metadata_element = metadata_set.getMetadataElement(metadata_element_name); // If the element doesn't exist in the metadata set, add it if (metadata_element == null) { metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name); } String metadata_element_value = XMLTools.getElementTextValue(current_metadata_element); metadata_element.addMetadataValue(metadata_element_value); } // Rewrite the metadata.xml file if it has changed if (file_changed) { XMLTools.writeXMLFile(this, document); } } }