/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Author: John Thompson, Greenstone Digital Library, University of Waikato * * Copyright (C) 1999 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.cdm; import java.awt.*; import java.awt.event.*; import java.io.*; import java.util.*; import javax.swing.*; import org.greenstone.gatherer.Configuration; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.gui.GLIButton; import org.greenstone.gatherer.metadata.MetadataElement; import org.greenstone.gatherer.metadata.MetadataTools; import org.greenstone.gatherer.util.DOMTree; import org.greenstone.gatherer.util.Codec; import org.greenstone.gatherer.util.StaticStrings; import org.greenstone.gatherer.util.Utility; import org.greenstone.gatherer.util.XMLTools; import org.w3c.dom.*; /** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands). * @author John Thompson, Greenstone Digital Library, University of Waikato * @version 2.3d */ public class CollectionConfiguration extends StaticStrings { static final public String ENCODING = "UTF-8"; static public Document document; static public void main(String[] args) { if(args.length >= 1) { File file = new File(args[0]); CollectionConfiguration collect_cfg = new CollectionConfiguration(file); collect_cfg.save(true); collect_cfg.save(false); collect_cfg = null; } else { System.out.println("Usage: CollectionConfiguration "); } } /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location. * @param target_element the command Element to be inserted * @return the Element which the given command should be inserted before, or null to append to end of list */ static public Node findInsertionPoint(Element target_element) { ///ystem.err.println("Find insertion point: " + target_element.getNodeName()); String target_element_name = target_element.getNodeName(); Element document_element = document.getDocumentElement(); // Try to find commands with the same tag. NodeList matching_elements = document_element.getElementsByTagName(target_element_name); // If we found matching elements, then we have our most likely insertion location, so check within for groupings if(matching_elements.getLength() != 0) { ///ystem.err.println("Found matching elements."); // Only CollectionMeta are grouped. if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) { ///ystem.err.println("Dealing with collection metadata"); // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end. // So if the command to be added is special add it immediately after any other special command if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { int index = 0; Element matched_element = (Element) matching_elements.item(index); Element sibling_element = (Element) matched_element.getNextSibling(); while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { index++; matched_element = (Element) matching_elements.item(index); sibling_element = (Element) matched_element.getNextSibling(); } if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; } // Otherwise try to find a matching 'name' and add after the last one in that group. else { int index = 0; target_element_name = target_element.getAttribute(NAME_ATTRIBUTE); boolean found = false; // Skip all of the special metadata Element matched_element = (Element) matching_elements.item(index); while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) { index++; matched_element = (Element) matching_elements.item(index); } // Begin search while(!found && matched_element != null) { if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) { found = true; } else { index++; matched_element = (Element) matching_elements.item(index); } } // If we found a match, we need to continue checking until we find the last name match. if(found) { index++; Element previous_sibling = matched_element; Element sibling_element = (Element) matching_elements.item(index); while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) { previous_sibling = sibling_element; index++; sibling_element = (Element) matching_elements.item(index); } // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines! return previous_sibling.getNextSibling(); } // If not found we just add after last metadata element else { Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1); return last_element.getNextSibling(); } } } else { ///ystem.err.println("Not dealing with collection meta."); Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } } ///ystem.err.println("No matching elements found."); // Locate where this command is in the ordering int command_index = -1; for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) { if(COMMAND_ORDER[i].equals(target_element_name)) { command_index = i; } } ///ystem.err.println("Command index is: " + command_index); // Now move forward, checking for existing elements in each of the preceeding command orders. int preceeding_index = command_index - 1; ///ystem.err.println("Searching before the target command."); while(preceeding_index >= 0) { matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]); // If we've found a match if(matching_elements.getLength() > 0) { // We add after the last element Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } preceeding_index--; } // If all that fails, we now move backwards through the commands int susceeding_index = command_index + 1; ///ystem.err.println("Searching after the target command."); while(susceeding_index < COMMAND_ORDER.length) { matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]); // If we've found a match if(matching_elements.getLength() > 0) { // We add before the first element Element matched_element = (Element) matching_elements.item(0); // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getPreviousSibling(); if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) { Element newline_element = document.createElement(NEWLINE_ELEMENT); document_element.insertBefore(newline_element, sibling_element); } return sibling_element; // Note that this may be null } susceeding_index++; } // Well. Apparently there are no other commands in this collection configuration. So append away... return null; } static public String toString(Element command_element, boolean show_extracted_namespace) { String command_element_name = command_element.getNodeName(); if(command_element_name.equals(CLASSIFY_ELEMENT)) { return self.classifyToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(FORMAT_ELEMENT)) { return self.formatToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(INDEXES_ELEMENT)) { return self.indexesToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) { return self.indexDefaultToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(LANGUAGES_ELEMENT)) { return self.languagesToString(command_element); } else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) { return self.languageDefaultToString(command_element); } else if(command_element_name.equals(LEVELS_ELEMENT)) { return self.levelsToString(command_element); } else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) { return self.metadataToString(command_element, show_extracted_namespace); } // else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) { // return self.metadataToString(command_element, show_extracted_namespace); // } else if(command_element_name.equals(PLUGIN_ELEMENT)) { return self.pluginToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) { return self.searchtypeToString(command_element); } else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) { return self.subcollectionToString(command_element, show_extracted_namespace); } else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) { return self.subcollectionDefaultIndexToString(command_element); } else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) { return self.subcollectionIndexesToString(command_element); } else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) { return self.supercollectionToString(command_element); } else if(command_element_name.equals(UNKNOWN_ELEMENT)) { return self.unknownToString(command_element); } return ""; } /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix. * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string * @return a HashMap containing the arguments parsed */ static public HashMap parseArguments(CommandTokenizer tokenizer) { HashMap arguments = new HashMap(); String name = null; String value = null; while(tokenizer.hasMoreTokens() || name != null) { // First we retrieve a name if we need one. if(name == null) { name = tokenizer.nextToken(); } // Now we attempt to retrieve a value if(tokenizer.hasMoreTokens()) { value = tokenizer.nextToken(); // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop. if(value.startsWith(StaticStrings.MINUS_CHARACTER)) { arguments.put(name, null); name = value; } // Otherwise we have a typical name->value pair ready to go else { arguments.put(name, value); name = null; } } // Otherwise its a binary flag else { arguments.put(name, null); name = null; } } return arguments; } static private ArrayList known_metadata; static private CollectionConfiguration self; /** Gives the preferred ordering of commands */ static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, /* StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, */ StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT}; /** ************************** Public Data Members ***************************/ /** ************************** Private Data Members ***************************/ /** Is the configuration file currently being read in one of the infamous G2.39 ones. */ private boolean is_twopointthreenine = false; private File collect_config_file; /** ************************** Public Methods ***************************/ public CollectionConfiguration(File collect_config_file) { this.self = this; this.collect_config_file = collect_config_file; // If collect_cfg is xml we can load it straight away String collect_config_name = collect_config_file.getName(); if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) { // Parse with Utility but don't use class loader document = Utility.parse(collect_config_file, false); } // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser else if(collect_config_name.equals(COLLECT_CFG)) { document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true); parse(collect_config_file); } } /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */ public void display() { JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false); dialog.setSize(400,400); JPanel content_pane = (JPanel) dialog.getContentPane(); final DOMTree tree = new DOMTree(document); JButton refresh_button = new GLIButton("Refresh Tree"); refresh_button.setMnemonic(KeyEvent.VK_R); refresh_button.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent event) { tree.setDocument(document); } }); content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5)); content_pane.setLayout(new BorderLayout()); content_pane.add(new JScrollPane(tree), BorderLayout.CENTER); content_pane.add(refresh_button, BorderLayout.SOUTH); dialog.show(); } // public Element getBeta() { // Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null); // element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR); // element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); // return element; // } public Element getCreator() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } public Element getDocumentElement() { return document.getDocumentElement(); } public File getFile() { return collect_config_file; } /** Retrieve or create the languages Element. */ public Element getLanguages() { return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null); } public Element getLevels() { return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null); } public Element getMaintainer() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */ public Element getMGIndexes() { return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR); } public Element getMGPPIndexes() { return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR); } public Element getPublic() { Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null); element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR); element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); return element; } /** Retrieve or create the searchtype element. */ public Element getSearchType() { ///ystem.err.println("Get or create element by tag name: " + name); Element document_element = document.getDocumentElement(); NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT); int elements_length = elements.getLength(); if(elements_length > 0) { document_element = null; return (Element) elements.item(0); } // Create the element Element element = document.createElement(SEARCHTYPE_ELEMENT); Node target_node = findInsertionPoint(element); if(target_node != null) { document_element.insertBefore(element, target_node); } else { document_element.appendChild(element); } document_element = null; // Append a default search type node - form Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT); a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]); element.appendChild(a_searchtype_element); return element; } /** Retrieve or create the subindexes Element. */ public Element getSubIndexes() { return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null); } /** Retrieve or create the supercollections Element. */ public Element getSuperCollection() { return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null); } public boolean ready() { return document != null; } public void save() { save(false); } public void save(boolean force_xml) { if(collect_config_file.exists()) { File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG); File backup_file = new File(collect_config_file.getParentFile(), "collect.bak"); if(backup_file.exists()) { backup_file.delete(); } if(!original_file.renameTo(backup_file)) { DebugStream.println("Can't rename collect.cfg"); } } if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) { ///ystem.err.println("Writing XML"); Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML)); } else { ///ystem.err.println("Writing text"); try { OutputStream ostream = new FileOutputStream(collect_config_file); Writer file_writer = new OutputStreamWriter(ostream, ENCODING); //FileWriter file_writer = new FileWriter(collect_config_file, false); BufferedWriter buffered_writer = new BufferedWriter(file_writer); // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he). // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata?? // Yes we can. Lets see.. // known_metadata = new ArrayList(); 'blamo' Element collect_config_element = document.getDocumentElement(); NodeList command_elements = collect_config_element.getChildNodes(); boolean just_wrote_newline = false; // Prevent two or more newlines in a row for(int i = 0; i < command_elements.getLength(); i++) { Node command_node = command_elements.item(i); if(command_node instanceof Element) { Element command_element = (Element) command_node; // The only thing left are NewLine elements if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) { buffered_writer.newLine(); just_wrote_newline = true; } // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility) else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){ String command; // format statements we write out with ex. still present if (command_element.getNodeName().equals(FORMAT_ELEMENT)) { command = toString(command_element, true); } else { command = toString(command_element, false); } if(command != null && command.length() > 0) { write(buffered_writer, command); buffered_writer.newLine(); just_wrote_newline = false; } } } } buffered_writer.close(); // known_metadata = null; 'poof' } catch (Exception exception) { DebugStream.println("Error in CollectionConfiguration.save(boolean): " + exception); DebugStream.printStackTrace(exception); } } } /** ************************** Private Methods ***************************/ private String classifyToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(CLASSIFY_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(TYPE_ATTRIBUTE)); text.append(SPACE_CHARACTER); NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT); int option_elements_length = option_elements.getLength(); for(int j = 0; j < option_elements_length; j++) { Element option_element = (Element) option_elements.item(j); if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append(StaticStrings.MINUS_CHARACTER); text.append(option_element.getAttribute(NAME_ATTRIBUTE)); String value_str = XMLTools.getValue(option_element); // Remove the extracted metadata namespaces if required if (value_str.length() > 0) { StringTokenizer string_tokenizer = new StringTokenizer(value_str, ","); value_str = ""; while (string_tokenizer.hasMoreElements()) { String token = (String) string_tokenizer.nextElement(); MetadataElement metadata_element = MetadataTools.getMetadataElementWithDisplayName(token); if (metadata_element != null) { token = metadata_element.getFullName(); } if (token.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { token = token.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } value_str = value_str + token; if (string_tokenizer.hasMoreElements()) { value_str = value_str + ","; } } } text.append(SPACE_CHARACTER); if (value_str.indexOf(SPACE_CHARACTER) == -1) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } value_str = null; if(j < option_elements_length - 1) { text.append(SPACE_CHARACTER); } } option_element = null; } option_elements = null; return text.toString(); } private String formatToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(FORMAT_STR); text.append(SPACE_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); String value_str = command_element.getAttribute(VALUE_ATTRIBUTE); if(value_str.length() != 0) { text.append(value_str); } else { // Remember to encode format string to Greenstone specification value_str = Codec.transform(XMLTools.getValue(command_element), Codec.DOM_TO_GREENSTONE); // Remove any references to a namespace for extracted metadata if (!show_extracted_namespace) { String match_string = "\\[" + Utility.EXTRACTED_METADATA_NAMESPACE + "\\."; value_str = value_str.replaceAll(match_string, "["); } text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } value_str = null; return text.toString(); } /** Retrieve or create the indexes Element. */ private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) { Element document_element = document.getDocumentElement(); NodeList elements = document_element.getElementsByTagName(name); int elements_length = elements.getLength(); if(elements_length > 0) { if(conditional_attribute == null) { document_element = null; return (Element) elements.item(0); } else { for(int i = 0; i < elements_length; i++) { Element element = (Element) elements.item(i); if(element.getAttribute(conditional_attribute).equals(required_value)) { document_element = null; return element; } element = null; } } } // Create the element Element element = document.createElement(name); // If there was a property set it if(conditional_attribute != null) { element.setAttribute(conditional_attribute, required_value); } Node target_node = findInsertionPoint(element); if(target_node != null) { document_element.insertBefore(element, target_node); } else { document_element.appendChild(element); } document_element = null; return element; } private String indexesToString(Element command_element, boolean show_extracted_namespace) { boolean comment_only = false; StringBuffer text = new StringBuffer(""); if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) { text.append("#"); comment_only = true; } text.append(INDEX_STR); text.append(TAB_CHARACTER); if(!comment_only) { text.append(TAB_CHARACTER); } NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT); if (index_elements.getLength() == 0) { // no indexes return ""; } // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list int index_elements_length = index_elements.getLength(); for(int j = 0; j < index_elements_length; j++) { Element index_element = (Element) index_elements.item(j); String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE); if(level_str.length() > 0) { text.append(level_str); text.append(StaticStrings.COLON_CHARACTER); } NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); // Don't output anything if no indexes are set if(content_elements_length == 0) { return null; } for(int k = 0; k < content_elements_length; k++) { Element content_element = (Element) content_elements.item(k); String name_str = content_element.getAttribute(NAME_ATTRIBUTE); if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } text.append(name_str); name_str = null; if(k < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } content_element = null; } if(j < index_elements_length - 1) { text.append(SPACE_CHARACTER); } content_elements = null; index_element = null; } index_elements = null; return text.toString(); } private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(""); if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) { text.append("#"); } text.append(INDEX_DEFAULT_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(LEVEL_ATTRIBUTE)); text.append(StaticStrings.COLON_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); String name_str = content_element.getAttribute(NAME_ATTRIBUTE); if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } text.append(name_str); name_str = null; if(j < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } content_element = null; } content_elements = null; return text.toString(); } private String languagesToString(Element command_element) { StringBuffer text = new StringBuffer(LANGUAGES_STR); text.append(TAB_CHARACTER); // Retrieve all the languages and write them out in a space separated list NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT); int language_elements_length = language_elements.getLength(); if(language_elements_length == 0) { return null; } for(int j = 0; j < language_elements_length; j++) { Element language_element = (Element) language_elements.item(j); text.append(language_element.getAttribute(NAME_ATTRIBUTE)); if(j < language_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } private String languageDefaultToString(Element command_element) { StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); return text.toString(); } private String levelsToString(Element command_element) { StringBuffer text = new StringBuffer(""); if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append("#"); } text.append(LEVELS_STR); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); // Don't output anything if no levels are set. if(content_elements_length == 0) { return null; } for(int i = 0; i < content_elements_length; i++) { Element content_element = (Element) content_elements.item(i); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); } return text.substring(0, text.length() - 1); } static public String metadataToString(Element command_element, boolean text_value) { boolean special = false; // If there is no value attribute, then we don't write anything StringBuffer text = new StringBuffer(""); String name_str = command_element.getAttribute(NAME_ATTRIBUTE); // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons. if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) { text.append(name_str); text.append(TAB_CHARACTER); special = true; } else if (/* name_str.equals(COLLECTIONMETADATA_BETA_STR) || */ name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) { text.append(name_str); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); special = true; } else { text.append(COLLECTIONMETADATA_STR); text.append(TAB_CHARACTER); text.append(name_str); text.append(SPACE_CHARACTER); String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE); // If this is element is in english, and it is the first one found, we don't need to write the language argument. //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) { // changed so that we always write the language string text.append(LBRACKET_CHARACTER); text.append(LANGUAGE_ARGUMENT); text.append(language_str); text.append(RBRACKET_CHARACTER); text.append(SPACE_CHARACTER); //} if(known_metadata != null) { known_metadata.add(name_str); } language_str = null; } name_str = null; String value_str = XMLTools.getValue(command_element); // The value string we retrieved will be encoded for xml, so we now decode it - to text if text_value set. This parameter was originally show_extracted_namespace, but sincethis is only true for 'toString()' commands from within the CDM, its good enough to determine if this toString() will be used to display on screen, or write to collect.cfg if(text_value == CollectionMeta.TEXT) { value_str = Codec.transform(value_str, Codec.DOM_TO_TEXT); } else { value_str = Codec.transform(value_str, Codec.DOM_TO_GREENSTONE); } // We don't wrap the email addresses in quotes, nor the other special metadata if(special) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } value_str = null; return text.toString(); } /** Parse a collect.cfg into a DOM model representation. */ private void parse(File collect_config_file) { try { ArrayList acquired_collectionmeta_names = null; ArrayList obsolete_collectionmeta_names = null; HashMap changed_collectionmeta_names = null; // Life is made oh-so-more tricky by the existance of G2.39 config files. There are two ways to handle them: // 1. Notice that the file is G2.39 from the start, then as I parse it magic it into G2.4 standard // 2. Extend the parsing method to handle reading in G2.39, then afterwards go through the DOM changing it to G2.4 as appropriate. // As far as I can tell the second option is twice as much work, so I'll try option 1. The problem here is that I have to determine if the 'buildtype' command is somewhere in the collect.cfg file, which means I'm going to have to read the file twice - once seaching for 'buildtype' and the second time to parse it. // Search for 'buildtype mgpp' InputStream input_stream_one = new FileInputStream(collect_config_file); Reader reader_one = new InputStreamReader(input_stream_one, ENCODING); BufferedReader buffered_reader_one = new BufferedReader(reader_one); String search_line_str = null; while(!is_twopointthreenine && (search_line_str = buffered_reader_one.readLine()) != null) { if(search_line_str.toLowerCase().indexOf(BUILDTYPE_STR) != -1) { is_twopointthreenine = true; acquired_collectionmeta_names = new ArrayList(); changed_collectionmeta_names = new HashMap(); obsolete_collectionmeta_names = new ArrayList(); } } buffered_reader_one.close(); reader_one.close(); input_stream_one.close(); buffered_reader_one = null; reader_one = null; input_stream_one = null; Element collect_cfg_element = document.getDocumentElement(); // Read in the file one command at a time. InputStream istream = new FileInputStream(collect_config_file); Reader in_reader = new InputStreamReader(istream, ENCODING); BufferedReader in = new BufferedReader(in_reader); String command_str = null; while((command_str = in.readLine()) != null) { boolean append_element = true; Element command_element = null; // A command may be broken over several lines. command_str = command_str.trim(); boolean eof = false; while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) { String next_line = in.readLine(); if(next_line != null) { next_line = next_line.trim(); if(next_line.length() > 0) { // Remove the new line character command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER)); // And append the next line, which due to the test above must be non-zero length command_str = command_str + next_line; } next_line = null; } // If we've reached the end of the file theres nothing more we can do else { eof = true; } } // If there is still a new line character, then we remove it and hope for the best if(command_str.endsWith(NEWLINE_CHARACTER)) { command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER)); } // Now we've either got a command to parse... if(command_str.length() != 0) { // Start trying to figure out what it is //StringTokenizer tokenizer = new StringTokenizer(command_str); // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines. CommandTokenizer tokenizer = new CommandTokenizer(command_str, in); String command_type = tokenizer.nextToken().toLowerCase(); // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created if(command_element == null && command_type.equals(BUILDTYPE_STR)) { DebugStream.println("G2.39 Buildtype command detected. Ignoring."); command_element = document.createElement(UNKNOWN_ELEMENT); append_element = false; } if(command_element == null && command_type.equals(CLASSIFY_STR)) { command_element = parseClassify(command_str); } if(command_element == null && command_type.equals(FORMAT_STR)) { command_element = parseFormat(tokenizer); // Revised to handle multiple lines } if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) { // If this was a G2.39 config file then we manipulate the command string a bit before we submit it to the parser. We start by adding allfields as the first index. We then space separate the remaining indexes, and remove duplicates when encountered. Of course before we do any of that we record the various space separated indexes so that we can remove the collection meta assigned to them. if(is_twopointthreenine) { DebugStream.println("G2.39 Index command detected. Modifying."); DebugStream.println("Before: " + command_str); StringBuffer new_command_str = new StringBuffer(command_type); new_command_str.append(SPACE_CHARACTER); new_command_str.append(ALLFIELDS_STR); new_command_str.append(SPACE_CHARACTER); ArrayList known_indexes = new ArrayList(); while(tokenizer.hasMoreTokens()) { String old_index_str = tokenizer.nextToken(); // If this index is a combination of sources, then we need to remove the old collectionmeta, split up the compound index, then request new metadata be added for each part if(old_index_str.indexOf(COMMA_CHARACTER) != -1) { obsolete_collectionmeta_names.add(STOP_CHARACTER + old_index_str); StringTokenizer string_tokenizer = new StringTokenizer(old_index_str, COMMA_CHARACTER); while(string_tokenizer.hasMoreTokens()) { String index_fragment_str = string_tokenizer.nextToken(); if(!known_indexes.contains(index_fragment_str)) { known_indexes.add(index_fragment_str); new_command_str.append(index_fragment_str); new_command_str.append(SPACE_CHARACTER); acquired_collectionmeta_names.add(STOP_CHARACTER + index_fragment_str); } index_fragment_str = null; } string_tokenizer = null; } // However if this was just a single index then a little choir of angels sing haleluja because we don't have to do -anything-. Nothing at all. Zip. Well no changes anyway. I obviously had to write this comment, and you can probably see, assuming you are not blind, that there are several lines of code below doing something, which is of course not nothing but something. And if we assume you are blind then you probably can't see the code, but then you probably didn't not see it doing the not nothing I said it would above. else { if(!known_indexes.contains(old_index_str)) { known_indexes.add(old_index_str); new_command_str.append(old_index_str); new_command_str.append(SPACE_CHARACTER); } else { // Use the collectionmeta for the single index instead of generating a default one acquired_collectionmeta_names.remove(STOP_CHARACTER + old_index_str); } } old_index_str = null; } known_indexes = null; command_str = new_command_str.toString(); new_command_str = null; DebugStream.println("After: " + command_str); } command_element = parseIndex(command_str); } if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) { command_element = parseIndexDefault(command_str); // If this was a G2.39 config file then we squelch the default index (no such thing in G2.4) if(is_twopointthreenine) { DebugStream.println("G2.39 Default Index command detected. Ignoring."); append_element = false; } } if(command_element == null && command_type.equals(LANGUAGES_STR)) { command_element = parseLanguage(command_str); } if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) { command_element = parseLanguageDefault(command_str); } if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) { // Again if this is G2.39 we have to do a tiny bit of magic to the levels command. We need to add document level, and change the remainder to lower case. if(is_twopointthreenine) { DebugStream.println("G2.39 Levels command detected. Modifying."); DebugStream.println("Before: " + command_str); StringBuffer new_command_str = new StringBuffer(command_type); new_command_str.append(SPACE_CHARACTER); new_command_str.append(DOCUMENT_STR); while(tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); // Generate a lower case version String token_lc = token.toLowerCase(); // If they are still the same then it is all good baby, otherwise we have to remember to transform their collectionmeta as well if(!token.equals(token_lc)) { changed_collectionmeta_names.put(STOP_CHARACTER + token, STOP_CHARACTER + token_lc); } new_command_str.append(SPACE_CHARACTER); new_command_str.append(token_lc); token_lc = null; token = null; } command_str = new_command_str.toString(); new_command_str = null; DebugStream.println("After: " + command_str); } command_element = parseLevels(command_str); } if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) { command_element = parseMetadata(tokenizer); // Revised to handle multiple lines } if(command_element == null && (/* command_type.equals(COLLECTIONMETADATA_BETA_STR) || */ command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) { command_element = parseMetadataSpecial(command_str); } if(command_element == null && command_type.equals(PLUGIN_STR)) { command_element = parsePlugIn(command_str); } if(command_element == null && command_type.equals(SEARCHTYPE_STR)) { command_element = parseSearchType(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) { command_element = parseSubCollection(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) { command_element = parseSubCollectionDefaultIndex(command_str); } if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) { command_element = parseSubCollectionIndex(command_str); } if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) { command_element = parseSuperCollection(command_str); } // Doesn't match any known type command_type = null; if(command_element == null) { // No-one knows what to do with this command, so we create an Unknown command element command_element = document.createElement(UNKNOWN_ELEMENT); XMLTools.setValue(command_element, command_str); } } // Or an empty line to remember for later else { command_element = document.createElement(NEWLINE_ELEMENT); } // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it if(append_element) { collect_cfg_element.appendChild(command_element); } } // We have completed parsing the collect configuration file. Now, if we are dealing with the G2.39 nightmare scenario, it's time to add the SearchType command and modify the collectionmeta commands as necessary. if(is_twopointthreenine) { Element search_type_element = getSearchType(); search_type_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); while(search_type_element.hasChildNodes()) { search_type_element.removeChild(search_type_element.getFirstChild()); } Element plain_search_type_element = document.createElement(CONTENT_ELEMENT); plain_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[1]); search_type_element.appendChild(plain_search_type_element); plain_search_type_element = null; Element form_search_type_element = document.createElement(CONTENT_ELEMENT); form_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]); search_type_element.appendChild(form_search_type_element); form_search_type_element = null; search_type_element = null; // Search through the existing collectionmeta Element document_element = document.getDocumentElement(); NodeList collectionmeta_elements = document_element.getElementsByTagName(COLLECTIONMETADATA_ELEMENT); DebugStream.println("There are " + obsolete_collectionmeta_names.size() + " collectionmeta to remove."); DebugStream.println("There are " + changed_collectionmeta_names.size() + " collectionmeta to change."); for(int z = collectionmeta_elements.getLength(); z > 0; z--) { Element collectionmeta_element = (Element) collectionmeta_elements.item(z - 1); String name = collectionmeta_element.getAttribute(NAME_ATTRIBUTE); DebugStream.println("Checking " + name); // Remove any obsolete metadata if(obsolete_collectionmeta_names.contains(name)) { DebugStream.println("G2.39 CollectMeta detected. Removing: " + name); document_element.removeChild(collectionmeta_element); } // We may have been asked to change the index name to lower case else if(changed_collectionmeta_names.containsKey(name)) { String new_name = (String) changed_collectionmeta_names.get(name); DebugStream.println("G2.39 CollectMeta detected. Changing: " + name + " -> " + new_name); collectionmeta_element.setAttribute(NAME_ATTRIBUTE, new_name); new_name = null; } name = null; } // Finally add any newly acquired collectionmeta. This general defaults to the collectionmeta name less the full stop DebugStream.println("There are " + acquired_collectionmeta_names.size() + " collectionmeta to add."); for(int y = 0; y < acquired_collectionmeta_names.size(); y++) { String name = (String) acquired_collectionmeta_names.get(y); String value = name.substring(1); DebugStream.println("G2.39 CollectMeta missing. Adding: " + name + " [l=" + Configuration.getLanguage() + "] \"" + value + "\""); Element element = document.createElement(COLLECTIONMETADATA_ELEMENT); element.setAttribute(NAME_ATTRIBUTE, name); element.setAttribute(LANGUAGE_ATTRIBUTE, Configuration.getLanguage()); element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); XMLTools.setValue(element, value); document_element.appendChild(element); element = null; value = null; name = null; } document_element = null; } } catch(Exception exception) { DebugStream.println("Error in CollectionConfiguration.parse(java.io.File): " + exception); DebugStream.printStackTrace(exception); } } private Element parseClassify(String command_str) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument). if(tokenizer.countTokens() >= 2) { // Must support "classify Phind" (no args) command_element = document.createElement(CLASSIFY_ELEMENT); // First token is classify tokenizer.nextToken(); // The next token is the classifier type command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken()); // Now we parse out the remaining arguments into a hashmapping from name to value HashMap arguments = parseArguments(tokenizer); // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently Iterator names = arguments.keySet().iterator(); while(names.hasNext()) { String name = (String) names.next(); String value = (String) arguments.get(name); // Can be null // The metadata argument gets added as the content attribute if (name.equals(METADATA_ARGUMENT) && value != null) { // Add the extracted namespace onto un-namespaced metadata names StringTokenizer string_tokenizer = new StringTokenizer(value, ","); value = ""; while (string_tokenizer.hasMoreElements()) { String token = (String) string_tokenizer.nextElement(); if (token.indexOf(StaticStrings.NS_SEP) == -1) { token = StaticStrings.EXTRACTED_NAMESPACE + token; } else { MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(token); if (metadata_element != null) { token = metadata_element.getDisplayName(); } } if (!value.equals("")) { value = value + ","; } value = value + token; } } // Everything else is an Option Element Element option_element = document.createElement(OPTION_ELEMENT); option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1)); if(value != null) { // Remove any speech marks appended in strings containing whitespace if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) { value = value.substring(1, value.length() - 1); } XMLTools.setValue(option_element, value); } option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); command_element.appendChild(option_element); option_element = null; name = null; value = null; } names = null; arguments = null; } tokenizer = null; } catch(Exception error) { } return command_element; } private Element parseFormat(CommandTokenizer tokenizer) { Element command_element = null; try { command_element = document.createElement(FORMAT_ELEMENT); String name_str = tokenizer.nextToken(); String value_str = tokenizer.nextToken(); if(name_str != null && value_str != null) { command_element.setAttribute(NAME_ATTRIBUTE, name_str); // If the value is true or false we add it as an attribute if(value_str.equalsIgnoreCase(TRUE_STR) || value_str.equalsIgnoreCase(FALSE_STR)) { command_element.setAttribute(VALUE_ATTRIBUTE, value_str.toLowerCase()); } // Otherwise it gets added as a text node else { // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM); XMLTools.setValue(command_element, value_str); } } else { command_element = null; } name_str = null; value_str = null; } catch (Exception exception) { DebugStream.printStackTrace(exception); command_element = null; } return command_element; } private Element parseIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); String command = tokenizer.nextToken(); command_element = document.createElement(INDEXES_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR)); command = null; if(!tokenizer.hasMoreTokens()) { // there are no indexes command_element.setAttribute(ASSIGNED_ATTRIBUTE, FALSE_STR); command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); // for now tokenizer = null; return command_element; } while(tokenizer.hasMoreTokens()) { Element index_element = document.createElement(INDEX_ELEMENT); String index_str = tokenizer.nextToken(); // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important. boolean old_index; if(index_str.indexOf(COLON_CHARACTER) != -1) { old_index = true; index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER))); index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1); command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); } else { command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR); old_index = false; } StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); String content_str = content_tokenizer.nextToken(); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(content_str.indexOf(StaticStrings.NS_SEP) == -1) { if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) { // Our special strings are OK. } else { content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str; } } content_element.setAttribute(NAME_ATTRIBUTE, content_str); index_element.appendChild(content_element); content_element = null; } content_tokenizer = null; index_str = null; command_element.appendChild(index_element); index_element = null; } tokenizer = null; } catch (Exception exception) { exception.printStackTrace(); } return command_element; } private Element parseIndexDefault(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { command_element = document.createElement(INDEX_DEFAULT_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR)); String index_str = tokenizer.nextToken(); command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER))); String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); command_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; content_str = null; index_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLanguage(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(LANGUAGES_ELEMENT); while(tokenizer.hasMoreTokens()) { Element language_element = document.createElement(LANGUAGE_ELEMENT); language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(language_element); language_element = null; } } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLanguageDefault(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT); tokenizer.nextToken(); String default_language_str = tokenizer.nextToken(); command_element.setAttribute(NAME_ATTRIBUTE, default_language_str); command_element.setAttribute(ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); default_language_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseLevels(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); // First token is command type (levels) String command = tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(LEVELS_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR)); while(tokenizer.hasMoreTokens()) { Element level_element = document.createElement(CONTENT_ELEMENT); level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(level_element); level_element = null; } } command = null; } catch(Exception exception) { } return command_element; } private Element parseMetadata(CommandTokenizer tokenizer) { Element command_element = null; try { command_element = document.createElement(COLLECTIONMETADATA_ELEMENT); String name_str = tokenizer.nextToken(); String value_str = tokenizer.nextToken(); if(name_str != null && value_str != null) { String language_str = Configuration.getLanguage(); // Check if the value string is actually a language string if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) { language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1); value_str = tokenizer.nextToken(); } if(value_str != null) { // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM); command_element.setAttribute(NAME_ATTRIBUTE, name_str); command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); XMLTools.setValue(command_element, value_str); } else { command_element = null; } language_str = null; } else { command_element = null; } name_str = null; value_str = null; } catch (Exception exception) { DebugStream.printStackTrace(exception); command_element = null; } return command_element; } private Element parseMetadataSpecial(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 2) { String name_str = tokenizer.nextToken(); String value_str = tokenizer.nextToken(); // if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) { // command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT); // } if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) { command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT); } else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) { command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT); } else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) { command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT); } if(command_element != null) { command_element.setAttribute(NAME_ATTRIBUTE, name_str); command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR); command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) { value_str = value_str.substring(1, value_str.length() - 1); } XMLTools.setValue(command_element, value_str); } value_str = null; name_str = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parsePlugIn(String command_str) { Element command_element = null; boolean use_metadata_files = false; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument). if(tokenizer.countTokens() >= 2) { command_element = document.createElement(PLUGIN_ELEMENT); // First token is plugin tokenizer.nextToken(); // The next token is the type String type = tokenizer.nextToken(); command_element.setAttribute(TYPE_ATTRIBUTE, type); // Now we parse out the remaining arguments into a hashmapping from name to value HashMap arguments = parseArguments(tokenizer); // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently Iterator names = arguments.keySet().iterator(); while(names.hasNext()) { String name = (String) names.next(); String value = (String) arguments.get(name); // Can be null Element option_element = document.createElement(OPTION_ELEMENT); if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) { use_metadata_files = true; } option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1)); option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments if(value != null) { // Remove any speech marks appended in strings containing whitespace if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) { value = value.substring(1, value.length() - 1); } if(name.equals(METADATA_ARGUMENT)) { // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace. if(value.indexOf(StaticStrings.NS_SEP) == -1) { value = StaticStrings.EXTRACTED_NAMESPACE + value; } } XMLTools.setValue(option_element, value); } command_element.appendChild(option_element); option_element = null; name = null; value = null; } // We must have some RecPlug options: use_metadata_files, and show_progress if (type.equals(RECPLUG_STR)) { if (!use_metadata_files) { Element option_element = document.createElement(OPTION_ELEMENT); option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT); option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); command_element.appendChild(option_element); option_element = null; } } type = null; names = null; arguments = null; } tokenizer = null; } catch(Exception exception) { } return command_element; } private Element parseSearchType(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); // First token is command type (levels) tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(SEARCHTYPE_ELEMENT); command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); while(tokenizer.hasMoreTokens()) { Element search_element = document.createElement(CONTENT_ELEMENT); search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(search_element); search_element = null; } } } catch(Exception exception) { } return command_element; } private Element parseSubCollection(String command_str) { Element command_element = null; try { CommandTokenizer tokenizer = new CommandTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(SUBCOLLECTION_ELEMENT); // First token is command type tokenizer.nextToken(); // Then subcollection identifier command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); // Then finally the pattern used to build the subcollection partition String full_pattern_str = tokenizer.nextToken(); // Set inclusion/exclusion flag and remove any exclamation mark boolean exclusion = full_pattern_str.startsWith(EXCLAMATION_CHARACTER); if (exclusion) { full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length()); command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR); } else { command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR); } StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER); if(pattern_tokenizer.countTokens() >= 2) { String content_str = pattern_tokenizer.nextToken(); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(StaticStrings.NS_SEP) == -1) { content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str; } command_element.setAttribute(CONTENT_ATTRIBUTE, content_str); XMLTools.setValue(command_element, pattern_tokenizer.nextToken()); if(pattern_tokenizer.hasMoreTokens()) { command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken()); } } pattern_tokenizer = null; } } catch(Exception exception) { exception.printStackTrace(); } return command_element; } private Element parseSubCollectionDefaultIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() == 2) { command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT); tokenizer.nextToken(); //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken()); String content_str = tokenizer.nextToken(); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); command_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; } tokenizer = null; } catch(Exception exception) { } return command_element; } private Element parseSubCollectionIndex(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); tokenizer.nextToken(); if(tokenizer.hasMoreTokens()) { command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT); } while(tokenizer.hasMoreTokens()) { Element subcollectionindex_element = document.createElement(INDEX_ELEMENT); //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken()); String content_str = tokenizer.nextToken(); StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens()) { Element content_element = document.createElement(CONTENT_ELEMENT); content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); subcollectionindex_element.appendChild(content_element); content_element = null; } content_tokenizer = null; content_str = null; command_element.appendChild(subcollectionindex_element); subcollectionindex_element = null; } tokenizer = null; } catch (Exception exception) { } return command_element; } private Element parseSuperCollection(String command_str) { Element command_element = null; try { StringTokenizer tokenizer = new StringTokenizer(command_str); if(tokenizer.countTokens() >= 3) { command_element = document.createElement(SUPERCOLLECTION_ELEMENT); tokenizer.nextToken(); while(tokenizer.hasMoreTokens()) { Element collection_element = document.createElement(COLLECTION_ELEMENT); collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken()); command_element.appendChild(collection_element); collection_element = null; } } tokenizer = null; } catch(Exception exception) { } return command_element; } private String pluginToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(); if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) { text.append(PLUGIN_STR); text.append(TAB_CHARACTER); text.append(TAB_CHARACTER); text.append(command_element.getAttribute(TYPE_ATTRIBUTE)); // Retrieve, and output, the arguments NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT); int option_elements_length = option_elements.getLength(); if(option_elements_length > 0) { text.append(SPACE_CHARACTER); for(int j = 0; j < option_elements_length; j++) { Element option_element = (Element) option_elements.item(j); if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { text.append(StaticStrings.MINUS_CHARACTER); text.append(option_element.getAttribute(NAME_ATTRIBUTE)); String value_str = XMLTools.getValue(option_element); if(!show_extracted_namespace && value_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { value_str = value_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } if(value_str.length() > 0) { text.append(SPACE_CHARACTER); if(value_str.indexOf(SPACE_CHARACTER) == -1) { text.append(value_str); } else { text.append(SPEECH_CHARACTER); text.append(value_str); text.append(SPEECH_CHARACTER); } } value_str = null; if(j < option_elements_length - 1) { text.append(SPACE_CHARACTER); } } option_element = null; } } option_elements = null; } return text.toString(); } private String searchtypeToString(Element command_element) { if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) { StringBuffer text = new StringBuffer(SEARCHTYPE_STR); text.append(TAB_CHARACTER); NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int search_elements_length = search_elements.getLength(); for(int i = 0; i < search_elements_length; i++) { Element search_element = (Element) search_elements.item(i); text.append(search_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); } return text.substring(0, text.length() - 1); } else { return null; } } private String subcollectionToString(Element command_element, boolean show_extracted_namespace) { StringBuffer text = new StringBuffer(SUBCOLLECTION_STR); text.append(SPACE_CHARACTER); text.append(command_element.getAttribute(NAME_ATTRIBUTE)); text.append(SPACE_CHARACTER); text.append(TAB_CHARACTER); text.append(SPEECH_CHARACTER); if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) { text.append(EXCLAMATION_CHARACTER); } String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE); if(!show_extracted_namespace && content_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) { content_str = content_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length()); } text.append(content_str); content_str = null; text.append(SEPARATOR_CHARACTER); text.append(XMLTools.getValue(command_element)); text.append(SEPARATOR_CHARACTER); String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE); if(options_str.length() > 0) { text.append(options_str); } options_str = null; text.append(SPEECH_CHARACTER); return text.toString(); } private String subcollectionDefaultIndexToString(Element command_element) { StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR); text.append(TAB_CHARACTER); NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(j < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } } return text.toString(); } private String subcollectionIndexesToString(Element command_element) { StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR); text.append(TAB_CHARACTER); // Retrieve all of the subcollection index partitions NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT); int subcollectionindex_elements_length = subcollectionindex_elements.getLength(); if(subcollectionindex_elements_length == 0) { return null; } for(int j = 0; j < subcollectionindex_elements_length; j++) { Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j); NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT); int content_elements_length = content_elements.getLength(); for(int k = 0; k < content_elements_length; k++) { Element content_element = (Element) content_elements.item(k); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(k < content_elements_length - 1) { text.append(StaticStrings.COMMA_CHARACTER); } } if(j < subcollectionindex_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } private String supercollectionToString(Element command_element) { NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT); int content_elements_length = content_elements.getLength(); if(content_elements_length > 1) { StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR); text.append(TAB_CHARACTER); for(int j = 0; j < content_elements_length; j++) { Element content_element = (Element) content_elements.item(j); text.append(content_element.getAttribute(NAME_ATTRIBUTE)); if(j < content_elements_length - 1) { text.append(SPACE_CHARACTER); } } return text.toString(); } return null; } private String unknownToString(Element command_element) { return XMLTools.getValue(command_element); } /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset. * @param writer the BufferedWriter to which the str will be written * @param str the String to be written */ private void write(BufferedWriter writer, String str) throws IOException { writer.write(str, 0, str.length()); } }