/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: John Thompson, Greenstone Digital Library, University of Waikato * * Copyright (C) 1999 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.msm; import java.io.*; import java.util.*; import org.w3c.dom.*; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.cdm.Argument; import org.greenstone.gatherer.cdm.Classifier; import org.greenstone.gatherer.cdm.CollectionDesignManager; import org.greenstone.gatherer.msm.ElementWrapper; import org.greenstone.gatherer.msm.MetadataSetManager; import org.greenstone.gatherer.util.StaticStrings; import org.greenstone.gatherer.util.Utility; import org.greenstone.gatherer.valuetree.GValueModel; /** * Rewrote almost all this class to fix a variety of bugs. * Importing legacy collections involves three main steps: * * 1. Read the existing metadata.xml files and namespace them, usually by prompting the user * to specify the mapping between old metadata elements and new elements. * * 2. Build complete value trees for the new metadata elements. This ensures that all the * metadata will show up and that the hfiles are written out complete. Building the * value trees involves parsing the old hfiles and processing hierarchical metadata * into GLI format (where '|' is the level separator). * * 3. Fix up the classify commands in the collect.cfg file to specify the new element names. * * @author Michael Dewsnip */ public class LegacyCollectionImporter { private File collection_folder; private String collection_folder_path; private CollectionDesignManager cdm; private MetadataSetManager msm; private HashMap source_metadata_to_hfile_mapping; private boolean cancelled; public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm) { this.collection_folder = collection_folder; this.collection_folder_path = collection_folder.getAbsolutePath(); this.cdm = cdm; this.msm = Gatherer.c_man.getCollection().msm; // Create a mapping from metadata element to hierarchy classifier source_metadata_to_hfile_mapping = new HashMap(); ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers(); for (int i = 0; i < hierarchy_classifiers_list.size(); i++) { Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i); // System.err.println("Hierarchy classifier: " + classifier); // Get the element name Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT); String metadata_name = metadata_name_argument.getValue(); // System.err.println("Metadata name: " + metadata_name); // Remove the extracted namespace if it has been added if (metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { metadata_name = metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } // Parse the hfile for this Hierarchy classifier Argument hfile_name_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT); String hfile_name = hfile_name_argument.getValue(); File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_name); // System.err.println("Checking hfile " + hfile_file + " for " + metadata_name); HFile hfile = new HFile(hfile_file); source_metadata_to_hfile_mapping.put(metadata_name, hfile); } } // This copies all the existing metadata.xml files into a backup directory public void backupMetadataXMLFiles(File collection_dir) { File import_dir = new File(collection_dir, Utility.IMPORT_DIR); File import_bak_dir = new File(collection_dir, Utility.IMPORT_BAK_DIR); import_bak_dir.mkdir(); copyMetadataXMLFiles(import_dir, import_bak_dir); } private void copyMetadataXMLFiles(File source_dir, File dest_dir) { if (source_dir == null || !source_dir.exists()) { return; } // Find the metadata file in this dir File meta_file = new File(source_dir, "metadata.xml"); if (meta_file.exists()) { File new_meta_file = new File(dest_dir, "metadata.xml"); try { dest_dir.mkdirs(); Gatherer.f_man.getQueue().copyFile(meta_file, new_meta_file, null); if (!new_meta_file.exists()) { throw new Exception(""); } } catch (Exception e) { Gatherer.println("Exception: couldn't move the file " + meta_file.getPath() + e.getMessage()); } } // Now go through child directories File [] children = source_dir.listFiles(); for (int i = 0; i < children.length; i++) { File child = children[i]; if (child.isDirectory()) { copyMetadataXMLFiles(child, new File(dest_dir, child.getName())); } } } public void importMetadata() { // Nothing to do if we don't have any metadata sets (apart from extracted) loaded if (msm.getSets().size() <= 1) { System.err.println("No metadata sets!"); return; } cancelled = false; importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER)); } private void importMetadata(File file) { if (file.isDirectory()) { // Apply recursively to the contents of the directory File[] files = file.listFiles(); if (files != null) { for (int i = 0; i < files.length && !cancelled; i++) { importMetadata(files[i]); } } return; } // We only care about metadata.xml files if (!file.getName().equals(StaticStrings.METADATA_XML)) { return; } // Parse the metadata.xml file // System.err.println("Importing metadata from " + file); Document document = Utility.parse(file.getAbsolutePath(), false); // Get a list of all the elements in the file, and put them in an array NodeList metadata_elements_list = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT); Node[] metadata_elements = new Node[metadata_elements_list.getLength()]; for (int i = 0; i < metadata_elements_list.getLength(); i++) { metadata_elements[i] = metadata_elements_list.item(i); // System.err.println("Metadata element: " + MSMUtils.getValue(metadata_elements[i])); // System.err.println("Metadata element parent: " + metadata_elements[i].getParentNode().getNodeName()); } // Now, for each metadata element... for (int i = 0; i < metadata_elements.length; i++) { Element source_element = (Element) metadata_elements[i]; String source_element_name = source_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); // System.err.println("Source element name: " + source_element_name); // Check if there is a profile already set up for this element if (msm.profiler.containsAction(collection_folder_path, source_element_name)) { String target_element_name = msm.profiler.getAction(collection_folder_path, source_element_name); // Update the metadata element and move onto the next one if (target_element_name != null) { updateMetadataElement(source_element, target_element_name); } else { // Element has been ignored, so remove it source_element.getParentNode().removeChild(source_element); } continue; } // No profile, so check if the element is already in the metadata set ElementWrapper target_element = msm.getElement(source_element_name, true); if (target_element != null) { String target_element_name = target_element.getName(); // Update the metadata element and move onto the next one updateMetadataElement(source_element, target_element_name); continue; } // We must ask the user how to process this metadata element target_element = msm.prompt.selectElement(source_element_name); if (msm.prompt.wasDialogCancelled()) { cancelled = true; return; } if (target_element == null) { // The user has chosen to ignore this element, so remove it source_element.getParentNode().removeChild(source_element); // Add the user's choice to the profile for this collection msm.profiler.addAction(collection_folder_path, source_element_name, null); } else { // Replace the old metadata element name with the new one String target_element_name = target_element.getName(); // Update the metadata element updateMetadataElement(source_element, target_element_name); // Add the user's choice to the profile for this collection msm.profiler.addAction(collection_folder_path, source_element_name, target_element_name); } } // ---------------------------------------------------------------------------------- // HACK CODE ADDED IN AT VERY LAST MINUTE FOR REWRITING METADATA.XML FILES // Get a list of all the elements in the file, and put them in an array NodeList fileset_elements_list = document.getDocumentElement().getElementsByTagName(MetadataXMLFile.FILESET_ELEMENT); Node[] fileset_elements = new Node[fileset_elements_list.getLength()]; for (int i = 0; i < fileset_elements_list.getLength(); i++) { fileset_elements[i] = fileset_elements_list.item(i); } // For each fileset element... for (int i = 0; i < fileset_elements.length; i++) { Node fileset_node = fileset_elements[i]; NodeList fileset_children = fileset_node.getChildNodes(); for (int j = 0; j < fileset_children.getLength(); j++) { Node fileset_child = fileset_children.item(j); if (fileset_child.getNodeName().equals(MetadataXMLFile.FILENAME_ELEMENT)) { String child_filename = MSMUtils.getValue(fileset_child); File child_file = new File(file.getParentFile(), child_filename); if (child_file.isDirectory()) { MetadataXMLFile child_metadata_xml_file = new MetadataXMLFile(); Document child_metadata_xml_file_document = child_metadata_xml_file.getDocument(); fileset_node = fileset_node.getParentNode().removeChild(fileset_node); // Change the filename value to .* MSMUtils.setValue((Element) fileset_child, ".*"); Node child_fileset_node = child_metadata_xml_file_document.importNode(fileset_node, true); child_metadata_xml_file_document.getDocumentElement().appendChild(child_fileset_node); Utility.export(child_metadata_xml_file_document, new File(child_file, StaticStrings.METADATA_XML)); } break; } } } // END HACK CODE // ---------------------------------------------------------------------------------- // Write the modified metadata.xml file back out Utility.export(document, file); } private void updateMetadataElement(Element metadata_element, String new_element_name) { String source_element_name = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); HFile hfile = (HFile) source_metadata_to_hfile_mapping.get(source_element_name); // Get the value of this metadata element String element_value = MSMUtils.getValue(metadata_element); if (hfile != null) { // Map to the full value String full_element_value = hfile.getFullValue(element_value); if (full_element_value != null) { element_value = full_element_value; } } // Update the name and value of the metadata element metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, new_element_name); MSMUtils.setValue(metadata_element, element_value); // Add the value of this metadata element to the value tree GValueModel value_model = msm.getValueTree(msm.getElement(new_element_name, true)); value_model.addValue(element_value); } public void updateClassifiers() { // Update the metadata elements in each of the classifiers for (int i = 0; i < cdm.classifier_manager.getSize(); i++) { Classifier classifier = cdm.classifier_manager.getClassifier(i); // System.err.println("Classifier: " + classifier); // Update the "-metadata" value mapClassifierArgumentToNewValue(classifier, StaticStrings.METADATA_ARGUMENT); // Update the "-sort" value mapClassifierArgumentToNewValue(classifier, "-sort"); // With Hierarchy classifiers, update the hfile arguments if (classifier.getName().equalsIgnoreCase(StaticStrings.HIERARCHY_CLASSIFIER)) { // Update the "-hfile" value Argument hfile_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT); String hfile_value = hfile_argument.getValue(); // Find the source metadata element Iterator keys = source_metadata_to_hfile_mapping.keySet().iterator(); while (keys.hasNext()) { String source_metadata = (String) keys.next(); String hfile_name = ((HFile) source_metadata_to_hfile_mapping.get(source_metadata)).hfile_name; if (hfile_name.equals(hfile_value)) { // Update the metadata value to the new (namespaced) one if (msm.profiler.containsAction(collection_folder_path, source_metadata)) { String target_value = msm.profiler.getAction(collection_folder_path, source_metadata); hfile_argument.setValue(target_value + ".txt"); } break; } } } // System.err.println("Classifier (after): " + classifier); } } private void mapClassifierArgumentToNewValue(Classifier classifier, String argument_name) { Argument argument = classifier.getArgument(argument_name); if (argument == null) { // there is no such argument return; } String value = argument.getValue(); // System.err.println("Value: " + value); // Remove the extracted namespace if it has been added if (value.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { value = value.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } // Update the metadata value to the new (namespaced) one if (msm.profiler.containsAction(collection_folder_path, value)) { String target_value = msm.profiler.getAction(collection_folder_path, value); argument.setValue(target_value); } } /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */ private class HFile { public String hfile_name; private HashMap index_to_entry_mapping; private HashMap alias_to_value_mapping; public HFile(File file) { hfile_name = file.getName(); index_to_entry_mapping = new HashMap(); alias_to_value_mapping = new HashMap(); try { // Read in the hfile, line by line, creating entry mappings //FileReader file_reader = new FileReader(file); //BufferedReader buffered_reader = new BufferedReader(file_reader); BufferedReader buffered_reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); String line = null; while ((line = buffered_reader.readLine()) != null) { // Read alias String alias = ""; int alias_end; if (line.indexOf("\"") == 0) { alias_end = line.indexOf("\"", 1); alias = line.substring(1, alias_end); } else { alias_end = line.indexOf(" ", 1); alias = line.substring(0, alias_end); } // System.err.println("Alias: " + alias); int value_start = line.indexOf("\"", alias_end + 1); int value_end = line.indexOf("\"", value_start + 1); String value = line.substring(value_start + 1, value_end); // System.err.println("Value: " + value); // if (!alias.equals(value)) { // System.err.println("Alias (" + alias + ") and value (" + value + ") differ!"); // } String index = line.substring(alias_end + 1, value_start).trim(); // System.err.println("Index: " + index); index_to_entry_mapping.put(index, new Entry(alias, value)); } buffered_reader.close(); } catch (Exception ex) { System.err.println("Exception reading hfile " + file); ex.printStackTrace(); } Iterator index_keys = index_to_entry_mapping.keySet().iterator(); while (index_keys.hasNext()) { String index = (String) index_keys.next(); String alias = ((Entry) index_to_entry_mapping.get(index)).alias; String value = ((Entry) index_to_entry_mapping.get(index)).value; // Chop the last reference off index, as we already have it if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) { index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER)); // Then while there are still futher indexes left, retrieve them while (index.length() > 0) { // Retrieve that value (if any). Entry entry = (Entry) index_to_entry_mapping.get(index); if (entry != null) { // Precatenate with the current value separating with a pipe value = entry.value + StaticStrings.PIPE_CHAR + value; } // Then trim the index down if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) { index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER)); } else { index = ""; } } } alias_to_value_mapping.put(alias, value); } index_to_entry_mapping.clear(); } public String getFullValue(String alias) { return (String) alias_to_value_mapping.get(alias); } private class Entry { public String alias; public String value; public Entry(String alias, String value) { this.alias = alias; this.value = value; } } } }