/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: John Thompson, Greenstone Digital Library, University of Waikato * * Copyright (C) 1999 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.cdm; import java.awt.*; import java.awt.event.*; import java.io.*; import java.util.*; import javax.swing.*; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.cdm.CommandTokenizer; import org.greenstone.gatherer.msm.MSMUtils; import org.greenstone.gatherer.util.DOMTree; import org.greenstone.gatherer.util.Codec; import org.greenstone.gatherer.util.StaticStrings; import org.greenstone.gatherer.util.Utility; import org.w3c.dom.*; /** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands). * @author John Thompson, Greenstone Digital Library, University of Waikato * @version 2.3d */ public class CollectionConfiguration extends StaticStrings { static public Document document; static public void main(String[] args) { if(args.length >= 1) { File file = new File(args[0]); CollectionConfiguration collect_cfg = new CollectionConfiguration(file); collect_cfg.save(true); collect_cfg.save(false); collect_cfg = null; } else { System.out.println("Usage: CollectionConfiguration "); } } /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location. * @param element the command Element to be inserted * @return the Element which the given command should be inserted before, or null to append to end of list */ static public Node findInsertionPoint(Element target_element) { ///ystem.err.println("Find insertion point: " + target_element.getNodeName()); String target_element_name = target_element.getNodeName(); Element document_element = document.getDocumentElement(); // Try to find commands with the same tag. NodeList matching_elements = document_element.getElementsByTagName(target_element_name); // If we found matching elements, then we have our most likely insertion location, so check within for groupings if(matching_elements.getLength() != 0) { ///ystem.err.println("Found matching elements."); // Only CollectionMeta are grouped. if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) { ///ystem.err.println("Dealing with collection metadata"); // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end. // So if the command to be added is special add it immediately after any other special command if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { int index = 0; Element matched_element = (Element) matching_elements.item(index); Element sibling_element = (Element) matched_element.getNextSibling(); while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { index++; matched_element = (Element) matching_elements.item(index); sibling_element = (Element) matched_element.getNextSibling(); } if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; } // Otherwise try to find a matching 'name' and add after the last one in that group. else { int index = 0; target_element_name = target_element.getAttribute(NAME_ATTRIBUTE); boolean found = false; // Skip all of the special metadata Element matched_element = (Element) matching_elements.item(index); while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { index++; matched_element = (Element) matching_elements.item(index); } // Begin search while(!found && matched_element != null) { if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) { found = true; } else { index++; matched_element = (Element) matching_elements.item(index); } } // If we found a match, we need to continue checking until we find the last name match. if(found) { index++; Element previous_sibling = matched_element; Element sibling_element = (Element) matching_elements.item(index); while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) { previous_sibling = sibling_element; index++; sibling_element = (Element) matching_elements.item(index); } // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines! return previous_sibling.getNextSibling(); } // If not found we just add after last metadata element else { Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1); return last_element.getNextSibling(); } } } else { ///ystem.err.println("Not dealing with collection meta."); Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } } ///ystem.err.println("No matching elements found."); // Locate where this command is in the ordering int command_index = -1; for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) { if(COMMAND_ORDER[i].equals(target_element_name)) { command_index = i; } } ///ystem.err.println("Command index is: " + command_index); // Now move forward, checking for existing elements in each of the preceeding command orders. int preceeding_index = command_index - 1; ///ystem.err.println("Searching before the target command."); while(preceeding_index >= 0) { matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]); // If we've found a match if(matching_elements.getLength() > 0) { // We add after the last element Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } preceeding_index--; } // If all that fails, we now move backwards through the commands int susceeding_index = command_index + 1; ///ystem.err.println("Searching after the target command."); while(susceeding_index < COMMAND_ORDER.length) { matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]); // If we've found a match if(matching_elements.getLength() > 0) { // We add before the first element Element matched_element = (Element) matching_elements.item(0); // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getPreviousSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } susceeding_index++; } // Well. Apparently there are no other commands in this collection configuration. So append away... return null; } static public String toString(Element command_element, boolean show_extracted_namespace) { String command_element_name = command_element.getNodeName(); if(command_element_name.equals(CLASSIFY_ELEMENT)) { return self.classifyToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(FORMAT_ELEMENT)) { return self.formatToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(INDEXES_ELEMENT)) { return self.indexesToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) { return self.indexDefaultToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(LANGUAGES_ELEMENT)) { return self.languagesToString(command_element); } else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) { return self.languageDefaultToString(command_element); } else if(command_element_name.equals(LEVELS_ELEMENT)) { return self.levelsToString(command_element); } else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(PLUGIN_ELEMENT)) { return self.pluginToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) { return self.searchtypeToString(command_element); } else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) { return self.subcollectionToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) { return self.subcollectionDefaultIndexToString(command_element); } else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) { return self.subcollectionIndexesToString(command_element); } else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) { return self.supercollectionToString(command_element); } else if(command_element_name.equals(UNKNOWN_ELEMENT)) { return self.unknownToString(command_element); } return ""; } /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix. * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string * @return a HashMap containing the arguments parsed */ static public HashMap parseArguments(CommandTokenizer tokenizer) { HashMap arguments = new HashMap(); String name = null; String value = null; while(tokenizer.hasMoreTokens() || name != null) { // First we retrieve a name if we need one. if(name == null) { name = tokenizer.nextToken(); } // Now we attempt to retrieve a value if(tokenizer.hasMoreTokens()) { value = tokenizer.nextToken(); // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop. if(value.startsWith(StaticStrings.MINUS_CHARACTER)) { arguments.put(name, null); name = value; } // Otherwise we have a typical name->value pair ready to go else { arguments.put(name, value); name = null; } } // Otherwise its a binary flag else { arguments.put(name, null); name = null; } } return arguments; } static private ArrayList known_metadata; static private CollectionConfiguration self; static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP; /** Gives the preferred ordering of commands */ static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT}; /** ************************** Public Data Members ***************************/ /** ************************** Private Data Members ***************************/ private File collect_config_file; /** ************************** Public Methods ***************************/ public CollectionConfiguration(File collect_config_file) { this.self = this; this.collect_config_file = collect_config_file; // If collect_cfg is xml we can load it straight away String collect_config_name = collect_config_file.getName(); if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) { // Parse with Utility but don't use class loader document = Utility.parse(collect_config_file, false); } // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser else if(collect_config_name.equals(COLLECT_CFG)) { document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true); parse(collect_config_file); } } /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */ public void display() { JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false); dialog.setSize(400,400); JPanel content_pane = (JPanel) dialog.getContentPane(); final DOMTree tree = new DOMTree(document); JButton refresh_button = new JButton("Refresh Tree"); refresh_button.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { tree.setDocument(document); } }); content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5)); content_pane.setLayout(new BorderLayout()); content_pane.add(new JScrollPane(tree), BorderLayout.CENTER); content_pane.add(refresh_button, BorderLayout.SOUTH); dialog.show(); } public Element getBeta() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } public Element getCreator() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } public Element getDocumentElement() { return document.getDocumentElement(); } public File getFile() { return collect_config_file; } /** Retrieve or create the languages Element. */ public Element getLanguages() { return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null); } public Element getLevels() { return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null); } public Element getMaintainer() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */ public Element getMGIndexes() { return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR); } public Element getMGPPIndexes() { return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR); } public Element getPublic() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } /** Retrieve or create the searchtype element. */ public Element getSearchType() { ///ystem.err.println("Get or create element by tag name: " + name); Element document_element = document.getDocumentElement(); NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT); int elements_length = elements.getLength(); if(elements_length > 0) { document_element = null; return (Element) elements.item(0); } // Create the element Element element = document.createElement(SEARCHTYPE_ELEMENT); Node target_node = findInsertionPoint(element); if(target_node != null) { document_element.insertBefore(element, target_node); } else { document_element.appendChild(element); } document_element = null; // Append a default search type node - form Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT); a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]); element.appendChild(a_searchtype_element); return element; } /** Retrieve or create the subindexes Element. */ public Element getSubIndexes() { return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null); } /** Retrieve or create the supercollections Element. */ public Element getSuperCollection() { return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null); } public void save() { save(false); } public void save(boolean force_xml) { if(collect_config_file.exists()) { File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG); File backup_file = new File(collect_config_file.getParentFile(), "collect.bak"); if(backup_file.exists()) { backup_file.delete(); } if(!original_file.renameTo(backup_file)) { Gatherer.println("Can't rename collect.cfg"); } } if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) { ///ystem.err.println("Writing XML"); Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML)); } else { ///ystem.err.println("Writing text"); try { FileWriter file_writer = new FileWriter(collect_config_file, false); BufferedWriter buffered_writer = new BufferedWriter(file_writer); // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he). // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata?? known_metadata = new ArrayList(); Element collect_config_element = document.getDocumentElement(); NodeList command_elements = collect_config_element.getChildNodes(); boolean just_wrote_newline = false; // Prevent two or more newlines in a row for(int i = 0; i < command_elements.getLength(); i++) { Node command_node = command_elements.item(i); if(command_node instanceof Element) { Element command_element = (Element) command_node; // The only thing left are NewLine elements if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) { buffered_writer.newLine(); just_wrote_newline = true; } // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility) else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){ String command = toString(command_element, false); if(command != null && command.length() > 0) { write(buffered_writer, command); buffered_writer.newLine(); just_wrote_newline = false; } } } } buffered_writer.close(); known_metadata = null; } catch (Exception exception) { Gatherer.println("Error in CollectionConfiguration.save(boolean): " + exception); Gatherer.printStackTrace(exception); } } } /** ************************** Private Methods ***************************/ private String classifyToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(CLASSIFY_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(TYPE_ATTRIBUTE)); text.append(SPACE_CHARACTER); NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT); int option_elements_length = option_elements.getLength(); for(int j = 0; j < option_elements_length; j++) { Element option_element = (Element) option_elements.item(j); if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append(StaticStrings.MINUS_CHARACTER); text.append(option_element.getAttribute(NAME_ATTRIBUTE)); String value_str = MSMUtils.getValue(option_element); if(value_str.length() > 0) { // If the value happens to be the identifier of an extracted metadata element, then remove the namespace. if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) { value_str = value_str.substring(EXTRACTED_PREFIX.length()); } text.append(SPACE_CHARACTER); if(value_str.indexOf(SPACE_CHARACTER) == -1) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } } value_str = null; if(j < option_elements_length - 1) { text.append(SPACE_CHARACTER); } } option_element = null; } option_elements = null; return text.toString(); } private String formatToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(FORMAT_STR); text.append(SPACE_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); String value_str = command_element.getAttribute(VALUE_ATTRIBUTE); if(value_str.length() != 0) { text.append(value_str); } else { // Remember to encode format string to Greenstone specification value_str = Codec.transform(MSMUtils.getValue(command_element), Codec.DOM_TO_GREENSTONE); // Remove any references to a namespace for extracted metadata if(!show_extracted_namespace) { value_str.replaceAll(EXTRACTED_PREFIX, ""); } text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } value_str = null; return text.toString(); } /** Retrieve or create the indexes Element. */ private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) { Element document_element = document.getDocumentElement(); NodeList elements = document_element.getElementsByTagName(name); int elements_length = elements.getLength(); if(elements_length > 0) { if(conditional_attribute == null) { document_element = null; return (Element) elements.item(0); } else { for(int i = 0; i < elements_length; i++) { Element element = (Element) elements.item(i); if(element.getAttribute(conditional_attribute).equals(required_value)) { document_element = null; return element; } element = null; } } } // Create the element Element element = document.createElement(name); // If there was a property set it if(conditional_attribute != null) { element.setAttribute(conditional_attribute, required_value); } Node target_node = findInsertionPoint(element); if(target_node != null) { document_element.insertBefore(element, target_node); } else { document_element.appendChild(element); } document_element = null; return element; } private String indexesToString(Element command_element, boolean show_extracted_namespace) { boolean comment_only = false; StringBuffer text = new StringBuffer(""); if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) { text.append("#"); comment_only = true; } text.append(INDEX_STR); text.append(TAB_CHARACTER); if(!comment_only) { text.append(TAB_CHARACTER); } NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT); if (index_elements.getLength() == 0) { // no indexes return ""; } // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list int index_elements_length = index_elements.getLength(); for(int j = 0; j < index_elements_length; j++) { Element index_element = (Element) index_elements.item(j); String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE); if(level_str.length() > 0) { text.append(level_str); text.append(StaticStrings.COLON_CHARACTER); } NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); // Don't output anything if no indexes are set if(content_elements_length == 0) { return null; } for(int k = 0; k < content_elements_length; k++) { Element content_element = (Element) content_elements.item(k); String name_str = content_element.getAttribute(NAME_ATTRIBUTE); if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) { name_str = name_str.substring(EXTRACTED_PREFIX.length()); } text.append(name_str); name_str = null; if(k < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } content_element = null; } if(j < index_elements_length - 1) { text.append(SPACE_CHARACTER); } content_elements = null; index_element = null; } index_elements = null; return text.toString(); } private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(""); if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) { text.append("#"); } text.append(INDEX_DEFAULT_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(LEVEL_ATTRIBUTE)); text.append(StaticStrings.COLON_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); String name_str = content_element.getAttribute(NAME_ATTRIBUTE); if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) { name_str = name_str.substring(EXTRACTED_PREFIX.length()); } text.append(name_str); name_str = null; if(j < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } content_element = null; } content_elements = null; return text.toString(); } private String languagesToString(Element command_element) { StringBuffer text = new StringBuffer(LANGUAGES_STR); text.append(TAB_CHARACTER); // Retrieve all the languages and write them out in a space separated list NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT); int language_elements_length = language_elements.getLength(); if(language_elements_length == 0) { return null; } for(int j = 0; j < language_elements_length; j++) { Element language_element = (Element) language_elements.item(j); text.append(language_element.getAttribute(NAME_ATTRIBUTE)); if(j < language_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } private String languageDefaultToString(Element command_element) { StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); return text.toString(); } private String levelsToString(Element command_element) { StringBuffer text = new StringBuffer(""); if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append("#"); } text.append(LEVELS_STR); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); // Don't output anything if no levels are set. if(content_elements_length == 0) { return null; } for(int i = 0; i < content_elements_length; i++) { Element content_element = (Element) content_elements.item(i); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); } return text.substring(0, text.length() - 1); } static public String metadataToString(Element command_element, boolean text_value) { boolean special = false; // If there is no value attribute, then we don't write anything StringBuffer text = new StringBuffer(""); String name_str = command_element.getAttribute(NAME_ATTRIBUTE); // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons. if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) { text.append(name_str); text.append(TAB_CHARACTER); special = true; } else if(name_str.equals(COLLECTIONMETADATA_BETA_STR) || name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) { text.append(name_str); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); special = true; } else { text.append(COLLECTIONMETADATA_STR); text.append(TAB_CHARACTER); text.append(name_str); text.append(SPACE_CHARACTER); String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE); // If this is element is in english, and it is the first one found, we don't need to write the language argument. //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) { // changed so that we always write the language string text.append(LBRACKET_CHARACTER); text.append(LANGUAGE_ARGUMENT); text.append(language_str); text.append(RBRACKET_CHARACTER); text.append(SPACE_CHARACTER); //} if(known_metadata != null) { known_metadata.add(name_str); } language_str = null; } name_str = null; String value_str = MSMUtils.getValue(command_element); // The value string we retrieved will be encoded for xml, so we now decode it - to text if text_value set. This parameter was originally show_extracted_namespace, but sincethis is only true for 'toString()' commands from within the CDM, its good enough to determine if this toString() will be used to display on screen, or write to collect.cfg if(text_value == CollectionMeta.TEXT) { value_str = Codec.transform(value_str, Codec.DOM_TO_TEXT); } else { value_str = Codec.transform(value_str, Codec.DOM_TO_GREENSTONE); } // We don't wrap the email addresses in quotes, nor the other special metadata if(special) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } value_str = null; return text.toString(); } /** Parse a collect.cfg into a DOM model representation. */ private void parse(File collect_config_file) { try { Element collect_cfg_element = document.getDocumentElement(); // Read in the file one command at a time. FileReader in_reader = new FileReader(collect_config_file); BufferedReader in = new BufferedReader(in_reader); String command_str = null; while((command_str = in.readLine()) != null) { Element command_element = null; // A command may be broken over several lines. command_str = command_str.trim(); boolean eof = false; while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) { String next_line = in.readLine(); if(next_line != null) { next_line = next_line.trim(); if(next_line.length() > 0) { // Remove the new line character command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER)); // And append the next line, which due to the test above must be non-zero length command_str = command_str + next_line; } next_line = null; } // If we've reached the end of the file theres nothing more we can do else { eof = true; } } // If there is still a new line character, then we remove it and hope for the best if(command_str.endsWith(NEWLINE_CHARACTER)) { command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER)); } // Now we've either got a command to parse... if(command_str.length() != 0) { // Start trying to figure out what it is StringTokenizer tokenizer = new StringTokenizer(command_str); String command_type = tokenizer.nextToken().toLowerCase(); tokenizer = null; // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created if(command_element == null && command_type.equals(CLASSIFY_STR)) { command_element = parseClassify(command_str); } if(command_element == null && command_type.equals(FORMAT_STR)) { command_element = parseFormat(command_str, in); } if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) { command_element = parseIndex(command_str); } if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) { command_element = parseIndexDefault(command_str); } if(command_element == null && command_type.equals(LANGUAGES_STR)) { command_element = parseLanguage(command_str); } if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) { command_element = parseLanguageDefault(command_str); } if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) { command_element = parseLevels(command_str); } if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) { // collectionmeta may go over more than one line, so // pass in the reader command_element = parseMetadata(command_str, in); } if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) || command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) { command_element = parseMetadataSpecial(command_str); } if(command_element == null && command_type.equals(PLUGIN_STR)) { command_element = parsePlugIn(command_str); } if(command_element == null && command_type.equals(SEARCHTYPE_STR)) { command_element = parseSearchType(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) { command_element = parseSubCollection(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) { command_element = parseSubCollectionDefaultIndex(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) { command_element = parseSubCollectionIndex(command_str); } if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) { command_element = parseSuperCollection(command_str); } // Doesn't match any known type command_type = null; if(command_element == null) { // No-one knows what to do with this command, so we create an Unknown command element command_element = document.createElement(UNKNOWN_ELEMENT); MSMUtils.setValue(command_element, command_str); } } // Or an empty line to remember for later else { command_element = document.createElement(NEWLINE_ELEMENT); } // Now command element shouldn't be null so we append it to the collection config DOM collect_cfg_element.appendChild(command_element); } } catch(Exception exception) { Gatherer.println("Error in CollectionConfiguration.parse(java.io.File): " + exception); Gatherer.printStackTrace(exception); } } private Element parseClassify(String command_str) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument). if(tokenizer.countTokens() >= 2) { // Must support "classify Phind" (no args) command_element = document.createElement(CLASSIFY_ELEMENT); // First token is classify tokenizer.nextToken(); // The next token is the classifier type command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken()); // Now we parse out the remaining arguments into a hashmapping from name to value HashMap arguments = parseArguments(tokenizer); // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently Iterator names = arguments.keySet().iterator(); while(names.hasNext()) { String name = (String) names.next(); String value = (String) arguments.get(name); // Can be null // The metadata argument gets added as the content attribute if(name.equals(METADATA_ARGUMENT) && value != null) { // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace. if(value.indexOf(MSMUtils.NS_SEP) == -1) { value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value; } //command_element.setAttribute(CONTENT_ATTRIBUTE, value); } // Everything else is an Option Element Element option_element = document.createElement(OPTION_ELEMENT); option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1)); if(value != null) { // Remove any speech marks appended in strings containing whitespace if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) { value = value.substring(1, value.length() - 1); } MSMUtils.setValue(option_element, value); } option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); command_element.appendChild(option_element); option_element = null; name = null; value = null; } names = null; arguments = null; } tokenizer = null; } catch(Exception error) { } return command_element; } private Element parseFormat(String command_str, BufferedReader in) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(FORMAT_ELEMENT); // First token is format tokenizer.nextToken(); command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); String value_str = tokenizer.nextToken(); // If the value is true or false we add it as an attribute if(value_str.equalsIgnoreCase(TRUE_STR) || value_str.equalsIgnoreCase(FALSE_STR)) { command_element.setAttribute(VALUE_ATTRIBUTE, value_str.toLowerCase()); } // Otherwise it gets added as a text node else { // now we need to handle the case where the value is enclosed in quotes (single or double) and may extend across multiple lines String start_string = value_str.substring(0,1); if (start_string.equals("\"") || start_string.equals("\'")) { if (value_str.endsWith(start_string) && value_str.length()!=1) { // we remove the quotes from the ends value_str = value_str.substring(1, value_str.length() - 1); } else { // remove the first quote StringBuffer value_raw = new StringBuffer(value_str.substring(1)); // add the new line back in value_raw.append(StaticStrings.NEW_LINE_CHAR); int pos = value_raw.indexOf(start_string); int old_pos = 0; while (pos != -1 && value_raw.charAt(pos-1)=='\\') { old_pos = pos+1; pos = value_raw.indexOf(start_string, old_pos); } while(pos == -1) { String next_line = in.readLine(); if(next_line != null) { value_raw.append(next_line); value_raw.append(StaticStrings.NEW_LINE_CHAR); } next_line = null; pos = value_raw.indexOf(start_string, old_pos); while (pos != -1 && value_raw.charAt(pos-1)=='\\') { old_pos = pos+1; pos = value_raw.indexOf(start_string, old_pos); } } value_str = value_raw.substring(0, value_raw.lastIndexOf(start_string)); value_raw = null; } // else } // if starts with a quote if(value_str != null) { // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM); MSMUtils.setValue(command_element, value_str); } else { command_element = null; } start_string = null; } value_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); String command = tokenizer.nextToken(); command_element = document.createElement(INDEXES_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR)); command = null; if(!tokenizer.hasMoreTokens()) { // there are no indexes command_element.setAttribute(ASSIGNED_ATTRIBUTE, FALSE_STR); command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); // for now tokenizer = null; return command_element; } while(tokenizer.hasMoreTokens()) { Element index_element = document.createElement(INDEX_ELEMENT); String index_str = tokenizer.nextToken(); // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important. boolean old_index; if(index_str.indexOf(COLON_CHARACTER) != -1) { old_index = true; index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER))); index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1); command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); } else { command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR); old_index = false; } StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); String content_str = content_tokenizer.nextToken(); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(content_str.indexOf(MSMUtils.NS_SEP) == -1) { if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) { // Our special strings are OK. } else { content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str; } } content_element.setAttribute(NAME_ATTRIBUTE, content_str); index_element.appendChild(content_element); content_element = null; } content_tokenizer = null; index_str = null; command_element.appendChild(index_element); index_element = null; } tokenizer = null; } catch (Exception exception) { exception.printStackTrace(); } return command_element; } private Element parseIndexDefault(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { command_element = document.createElement(INDEX_DEFAULT_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR)); String index_str = tokenizer.nextToken(); command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER))); String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); command_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; content_str = null; index_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLanguage(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(LANGUAGES_ELEMENT); while(tokenizer.hasMoreTokens()) { Element language_element = document.createElement(LANGUAGE_ELEMENT); language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(language_element); language_element = null; } } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLanguageDefault(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT); tokenizer.nextToken(); String default_language_str = tokenizer.nextToken(); command_element.setAttribute(NAME_ATTRIBUTE, default_language_str); command_element.setAttribute(ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); default_language_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLevels(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); // First token is command type (levels) String command = tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(LEVELS_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR)); while(tokenizer.hasMoreTokens()) { Element level_element = document.createElement(CONTENT_ELEMENT); level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(level_element); level_element = null; } } command = null; } catch(Exception exception) { } return command_element; } private Element parseMetadata(String command_str, BufferedReader in) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(COLLECTIONMETADATA_ELEMENT); // First token is command type tokenizer.nextToken(); String name_str = tokenizer.nextToken(); String value_str = tokenizer.nextToken(); String language_str = "en"; // By default - why do we assume English??? // Check if the value string is actually a language string if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) { language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1); value_str = tokenizer.nextToken(); } // now we need to handle the case where the value is enclosed in quotes (single or double) and may extend across multiple lines String start_string = value_str.substring(0,1); if (start_string.equals("\"") || start_string.equals("\'")) { if (value_str.endsWith(start_string) && value_str.length()!=1) { // we remove the quotes from the ends value_str = value_str.substring(1, value_str.length() - 1); } else { // remove the first quote StringBuffer value_raw = new StringBuffer(value_str.substring(1)); // add the new line back in value_raw.append(StaticStrings.NEW_LINE_CHAR); int pos = value_raw.indexOf(start_string); int old_pos = 0; while (pos != -1 && value_raw.charAt(pos-1)=='\\') { old_pos = pos+1; pos = value_raw.indexOf(start_string, old_pos); } while(pos == -1) { String next_line = in.readLine(); if(next_line != null) { value_raw.append(next_line); value_raw.append(StaticStrings.NEW_LINE_CHAR); } next_line = null; pos = value_raw.indexOf(start_string, old_pos); while (pos != -1 && value_raw.charAt(pos-1)=='\\') { old_pos = pos+1; pos = value_raw.indexOf(start_string, old_pos); } } value_str = value_raw.substring(0, value_raw.lastIndexOf(start_string)); value_raw = null; } // else } // if starts with a quote if(value_str != null) { // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM); command_element.setAttribute(NAME_ATTRIBUTE, name_str); command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); MSMUtils.setValue(command_element, value_str); } else { command_element = null; } language_str = null; value_str = null; name_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseMetadataSpecial(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { String name_str = tokenizer.nextToken(); String value_str = tokenizer.nextToken(); if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) { command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT); } else if(name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) { command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT); } else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) { command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT); } else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) { command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT); } if(command_element != null) { command_element.setAttribute(NAME_ATTRIBUTE, name_str); command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR); command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) { value_str = value_str.substring(1, value_str.length() - 1); } MSMUtils.setValue(command_element, value_str); } value_str = null; name_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parsePlugIn(String command_str) { Element command_element = null; boolean use_metadata_files = false; boolean show_progress = false; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument). if(tokenizer.countTokens() >= 2) { command_element = document.createElement(PLUGIN_ELEMENT); // First token is plugin tokenizer.nextToken(); // The next token is the type String type = tokenizer.nextToken(); command_element.setAttribute(TYPE_ATTRIBUTE, type); // Now we parse out the remaining arguments into a hashmapping from name to value HashMap arguments = parseArguments(tokenizer); // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently Iterator names = arguments.keySet().iterator(); while(names.hasNext()) { String name = (String) names.next(); String value = (String) arguments.get(name); // Can be null Element option_element = document.createElement(OPTION_ELEMENT); if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) { use_metadata_files = true; } else if(name.substring(1).equals(SHOW_PROGRESS_ARGUMENT)) { show_progress = true; } option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1)); option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments if(value != null) { // Remove any speech marks appended in strings containing whitespace if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) { value = value.substring(1, value.length() - 1); } if(name.equals(METADATA_ARGUMENT)) { // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace. if(value.indexOf(MSMUtils.NS_SEP) == -1) { value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value; } } MSMUtils.setValue(option_element, value); } command_element.appendChild(option_element); option_element = null; name = null; value = null; } // We must have some RecPlug options: use_metadata_files, and show_progress if (type.equals(RECPLUG_STR)) { if (!use_metadata_files) { Element option_element = document.createElement(OPTION_ELEMENT); option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT); option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); command_element.appendChild(option_element); option_element = null; } if(!show_progress) { Element option_element = document.createElement(OPTION_ELEMENT); option_element.setAttribute(NAME_ATTRIBUTE, SHOW_PROGRESS_ARGUMENT); option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); command_element.appendChild(option_element); option_element = null; } } type = null; names = null; arguments = null; } tokenizer = null; } catch(Exception exception) { } return command_element; } private Element parseSearchType(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); // First token is command type (levels) tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(SEARCHTYPE_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); while(tokenizer.hasMoreTokens()) { Element search_element = document.createElement(CONTENT_ELEMENT); search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(search_element); search_element = null; } } } catch(Exception exception) { } return command_element; } private Element parseSubCollection(String command_str) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(SUBCOLLECTION_ELEMENT); // First token is command type tokenizer.nextToken(); // Then subcollection identifier command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); // Then finally the pattern used to build the subcollection partition String full_pattern_str = tokenizer.nextToken(); // To make life easier I'm going to parse this up now. boolean exclusion = (full_pattern_str.substring(1, 2).equals(EXCLAIMATION_CHARACTER)); // Set inclusion/exclusion flag, remove any exclaimation mark and the speech marks if(exclusion) { full_pattern_str = full_pattern_str.substring(2, full_pattern_str.length() - 1); command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR); } else { full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length() - 1); command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR); } StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER); if(pattern_tokenizer.countTokens() >= 2) { String content_str = pattern_tokenizer.nextToken(); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) { content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str; } command_element.setAttribute(CONTENT_ATTRIBUTE, content_str); MSMUtils.setValue(command_element, pattern_tokenizer.nextToken()); if(pattern_tokenizer.hasMoreTokens()) { command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken()); } } pattern_tokenizer = null; } } catch(Exception exception) { exception.printStackTrace(); } return command_element; } private Element parseSubCollectionDefaultIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() == 2) { command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT); tokenizer.nextToken(); //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken()); String content_str = tokenizer.nextToken(); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); command_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; } tokenizer = null; } catch(Exception exception) { } return command_element; } private Element parseSubCollectionIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT); } while(tokenizer.hasMoreTokens()) { Element subcollectionindex_element = document.createElement(INDEX_ELEMENT); //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken()); String content_str = tokenizer.nextToken(); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); subcollectionindex_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; command_element.appendChild(subcollectionindex_element); subcollectionindex_element = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseSuperCollection(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(SUPERCOLLECTION_ELEMENT); tokenizer.nextToken(); while(tokenizer.hasMoreTokens()) { Element collection_element = document.createElement(COLLECTION_ELEMENT); collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(collection_element); collection_element = null; } } tokenizer = null; } catch(Exception exception) { } return command_element; } private String pluginToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(); if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) { text.append(PLUGIN_STR); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(TYPE_ATTRIBUTE)); // Retrieve, and output, the arguments NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT); int option_elements_length = option_elements.getLength(); if(option_elements_length > 0) { text.append(SPACE_CHARACTER); for(int j = 0; j < option_elements_length; j++) { Element option_element = (Element) option_elements.item(j); if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append(StaticStrings.MINUS_CHARACTER); text.append(option_element.getAttribute(NAME_ATTRIBUTE)); String value_str = MSMUtils.getValue(option_element); if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) { value_str = value_str.substring(EXTRACTED_PREFIX.length()); } if(value_str.length() > 0) { text.append(SPACE_CHARACTER); if(value_str.indexOf(SPACE_CHARACTER) == -1) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } } value_str = null; if(j < option_elements_length - 1) { text.append(SPACE_CHARACTER); } } option_element = null; } } option_elements = null; } return text.toString(); } private String searchtypeToString(Element command_element) { if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { StringBuffer text = new StringBuffer(SEARCHTYPE_STR); text.append(TAB_CHARACTER); NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int search_elements_length = search_elements.getLength(); for(int i = 0; i < search_elements_length; i++) { Element search_element = (Element) search_elements.item(i); text.append(search_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); } return text.substring(0, text.length() - 1); } else { return null; } } private String subcollectionToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(SUBCOLLECTION_STR); text.append(SPACE_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); text.append(TAB_CHARACTER); text.append(SPEECH_CHARACTER); if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) { text.append(EXCLAIMATION_CHARACTER); } String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE); if(!show_extracted_namespace && content_str.startsWith(EXTRACTED_PREFIX)) { content_str = content_str.substring(EXTRACTED_PREFIX.length()); } text.append(content_str); content_str = null; text.append(SEPARATOR_CHARACTER); text.append(MSMUtils.getValue(command_element)); text.append(SEPARATOR_CHARACTER); String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE); if(options_str.length() > 0) { text.append(options_str); } options_str = null; text.append(SPEECH_CHARACTER); return text.toString(); } private String subcollectionDefaultIndexToString(Element command_element) { StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR); text.append(TAB_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(j < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } } return text.toString(); } private String subcollectionIndexesToString(Element command_element) { StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR); text.append(TAB_CHARACTER); // Retrieve all of the subcollection index partitions NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT); int subcollectionindex_elements_length = subcollectionindex_elements.getLength(); if(subcollectionindex_elements_length == 0) { return null; } for(int j = 0; j < subcollectionindex_elements_length; j++) { Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j); NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int k = 0; k < content_elements_length; k++) { Element content_element = (Element) content_elements.item(k); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(k < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } } if(j < subcollectionindex_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } private String supercollectionToString(Element command_element) { NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT); int content_elements_length = content_elements.getLength(); if(content_elements_length > 1) { StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR); text.append(TAB_CHARACTER); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(j < content_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } return null; } private String unknownToString(Element command_element) { return MSMUtils.getValue(command_element); } /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset. * @param writer the BufferedWriter to which the str will be written * @param str the String to be written */ private void write(BufferedWriter writer, String str) throws IOException { writer.write(str, 0, str.length()); } }