/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Methods to read collectionConfig.xml files into internal XML form, and write * them back out again. * * Copyright (C) 1999 New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.cdm; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.StringTokenizer; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.metadata.MetadataElement; import org.greenstone.gatherer.metadata.MetadataTools; import org.greenstone.gatherer.util.XMLTools; import org.greenstone.gatherer.util.StaticStrings; import org.greenstone.gatherer.util.Utility; import org.w3c.dom.*; public class CollectionConfigXMLReadWrite { static final private String PLUGOUT_ELEMENT = "plugout";//used by building flax collections /** ******************************************************************************************************** The code from this point below are used for greenstone 3 collection configuration, i.e., read ColletionConfig.xml * into the internal DOM tree, and convert the internal DOM tree back to CollectionConfig.xml. * Methods named 'doXXXX' are for convert collectionConfig.xml into the internal configuration xml structure; Methods named 'convertXXXX' are for convert the internal configuration xml structure back to collectionConfig.xml. ************************************************************************************************************ */ /**Arguments: metadataListNode->the 'displayItemList' element in collectionConfig.xml name_value->the value of the 'name' attribute of 'index' element; att_value->the value of the 'name' attribute of 'displayItem' element return: an ArrayList of the contructed 'CollectionMetadata' elements */ static private ArrayList doDisplayItemList (Document to, Node displayListNode, String att_value, String name_value) { Element toElement = to.getDocumentElement (); ArrayList display_item_list = new ArrayList (); ArrayList item_list = XMLTools.getNamedElementList ((Element)displayListNode, StaticStrings.DISPLAYITEM_STR, StaticStrings.NAME_ATTRIBUTE, att_value); if (item_list == null) { return null; } for (int i=0; i indexes_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); indexes_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); NodeList index_children = ((Element)searchNode).getElementsByTagName (StaticStrings.INDEX_LOW_STR);//index int num_nodes = index_children.getLength (); for (int i=0; i // For mg, it's the 'Old G2.38 and earlier' that use level:source tuplets, but we double check it anyway boolean old_index = true; if(index_str.indexOf (StaticStrings.COLON_CHARACTER) == -1) { // It doesn't contain ':' character System.err.println ("Something is wrong! the index should be level:source tuplets."); old_index = false; } else { // Handling 'index' element index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER))); index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1); //Each index may have a list of comma-separated strings. //split them into 'content' elements in the internal structure StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER); //index_str = ""; while(content_tokenizer.hasMoreTokens ()) { // Replace index_str to be qualified name, eg. dc.Subject and keywords insread of dc.Subject. Element content_element = to.createElement (StaticStrings.CONTENT_ELEMENT); String content_str = content_tokenizer.nextToken (); // Since the contents of indexes have to be certain keywords, or metadata elements, //if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(content_str.indexOf (StaticStrings.NS_SEP) == -1) { if(content_str.equals (StaticStrings.TEXT_STR) || (!old_index && content_str.equals (StaticStrings.ALLFIELDS_STR))) { // in this case, do nothing } else { content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str; } } content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str); index_element.appendChild (content_element); content_element = null; } // while ends indexes_element.appendChild (index_element); // Handling 'displayItem' elements and Constructing 'collectionmetadata' elements // Use the fully qualified index names ArrayList collectionmetadata_list = doDisplayItemList (to, e, StaticStrings.NAME_ATTRIBUTE, index_str_display); appendArrayList (toElement, collectionmetadata_list); } //else ends } //for loop ends appendProperly (toElement, indexes_element); //***// // create another set of which will be used when user switches to MGPP/LUCENE // i.e. we build a default index set for a start String []index_strs = {StaticStrings.TEXT_STR, StaticStrings.EXTRACTED_NAMESPACE + StaticStrings.TITLE_ELEMENT, StaticStrings.EXTRACTED_NAMESPACE + StaticStrings.SOURCE_ELEMENT}; Element mgpp_indexes = to.createElement (StaticStrings.INDEXES_ELEMENT);// mgpp_indexes.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR); mgpp_indexes.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR); for (int i=0; i Element content_element = to.createElement (StaticStrings.CONTENT_ELEMENT); content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, index_strs[i]); index_element.appendChild (content_element); mgpp_indexes.appendChild (index_element); // Contructing 'collectionmetadata' elements for 'mgpp' indexes Element collectionmetadata = to.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT); collectionmetadata.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); collectionmetadata.setAttribute (StaticStrings.NAME_ATTRIBUTE, index_strs[i]); collectionmetadata.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR); if (index_strs[i].indexOf (StaticStrings.NS_SEP) != -1) { index_strs[i] = index_strs[i].substring (index_strs[i].indexOf (StaticStrings.NS_SEP) + 1); } XMLTools.setNodeText (collectionmetadata, index_strs[i]); appendProperly (toElement, collectionmetadata); } appendProperly (toElement, mgpp_indexes); } //This is actually doing indexes for both mgpp and lucene static private void doMGPPIndexes (Document to, Node searchNode) { Element toElement = to.getDocumentElement (); Element indexes_element = to.createElement (StaticStrings.INDEXES_ELEMENT);// indexes_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); indexes_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR); NodeList index_children = ((Element)searchNode).getElementsByTagName (StaticStrings.INDEX_LOW_STR);//index int num_nodes = index_children.getLength (); for (int i=0; i Element e = (Element)index_children.item (i); String index_str = e.getAttribute (StaticStrings.NAME_ATTRIBUTE); String index_str_display = index_str;//for creating collectionmetadata for this index // Handling 'index' element // Double check to make sure it's not colon separated style index. boolean old_index = false; if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) { System.err.println ("Something is wrong! the index should NOT be level:source tuplets style."); old_index = true; } //Each index may have a list of comma-separated strings. //split them into 'content' elements in the internal structure StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER); //index_str = ""; while(content_tokenizer.hasMoreTokens ()) { // Replace index_str to be qualified name, eg. dc.Subject and keywords insread of dc.Subject. Element content_element = to.createElement (StaticStrings.CONTENT_ELEMENT); String content_str = content_tokenizer.nextToken (); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(content_str.indexOf (StaticStrings.NS_SEP) == -1) { if(content_str.equals (StaticStrings.TEXT_STR)) { // in this case, do nothing } else { content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str; } } content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str); index_element.appendChild (content_element); content_element = null; } //while ends indexes_element.appendChild (index_element); index_element = null; // Handling 'displayItem' element of this 'index' element // 'e' is the parent element 'index' of 'displayItem' element ArrayList collectionmetadata_list = doDisplayItemList (to, e, StaticStrings.NAME_ATTRIBUTE, index_str_display); appendArrayList (toElement, collectionmetadata_list); } // for loop ends toElement.appendChild (indexes_element); // create another set of which will be used when user switches to MG // i.e. we build a default index set for a start Element mg_indexes = to.createElement (StaticStrings.INDEXES_ELEMENT);// mg_indexes.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR); mg_indexes.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); //put the namespace '.ex' as prefix to the indexes String []index_strs = {StaticStrings.TEXT_STR, StaticStrings.EXTRACTED_NAMESPACE + StaticStrings.TITLE_ELEMENT, StaticStrings.EXTRACTED_NAMESPACE + StaticStrings.SOURCE_ELEMENT}; for (int i=0; i index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, StaticStrings.DOCUMENT_STR); Element content_element = to.createElement (StaticStrings.CONTENT_ELEMENT); content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, index_strs[i]); index_element.appendChild (content_element); mg_indexes.appendChild (index_element); // Contructing 'collectionmetadata' elements for 'mg' indexes Element collectionmetadata = to.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT); collectionmetadata.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); String temp = StaticStrings.DOCUMENT_STR.concat (StaticStrings.COLON_CHARACTER).concat (index_strs[i]); collectionmetadata.setAttribute (StaticStrings.NAME_ATTRIBUTE, temp); collectionmetadata.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR); if (index_strs[i].indexOf (StaticStrings.NS_SEP) != -1) { index_strs[i] = index_strs[i].substring (index_strs[i].indexOf (StaticStrings.NS_SEP) + 1); } XMLTools.setNodeText (collectionmetadata, index_strs[i]); appendProperly (toElement, collectionmetadata); } toElement.appendChild (mg_indexes); } static private void doDisplayFormat (Document to, Element from) { //display element in the xml file Element de = (Element)XMLTools.getChildByTagName (from, StaticStrings.DISPLAY_STR); if (de == null) { return; } //format element in the display element Element fe = (Element)XMLTools.getChildByTagName (de, StaticStrings.FORMAT_STR); to.getDocumentElement ().appendChild (doFormat(to, fe, StaticStrings.DISPLAY_STR)); } //construct 'DefaultIndex' element in the internal structure from collectionConfig.xml static private void doDefaultIndex (Document to, Node searchNode) { Element toElement = to.getDocumentElement (); Element default_index_element = to.createElement (StaticStrings.INDEX_DEFAULT_ELEMENT); default_index_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); Element e = (Element)XMLTools.getChildByTagName (searchNode, StaticStrings.INDEX_DEFAULT_ELEMENT_LOWERCASE);//defaultIndex if (e == null) { return; } String index_str = e.getAttribute (StaticStrings.NAME_ATTRIBUTE); boolean old_index = false; if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) { //The index is 'level:source tuplets' which is for mg. Take out 'level' old_index = true; default_index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER))); index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1); } else { default_index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, ""); } //Each index may have a list of comma-separated strings. //split them into 'content' elements in the internal structure StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER); while(content_tokenizer.hasMoreTokens ()) { Element content_element = to.createElement (StaticStrings.CONTENT_ELEMENT); String content_str = content_tokenizer.nextToken (); // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace. if(content_str.indexOf (StaticStrings.NS_SEP) == -1) { if(content_str.equals (StaticStrings.TEXT_STR) || (!old_index && content_str.equals (StaticStrings.ALLFIELDS_STR))) { // in this case, do nothing } else { content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str; } } content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str); default_index_element.appendChild (content_element); content_element = null; } appendProperly (toElement, default_index_element); } // For mg, this method is still called, but make it 'assigned=false' static private void doDefaultLevel (Document to, Node searchNode) { Element toElement = to.getDocumentElement (); Element default_index_option = to.createElement (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT); default_index_option.setAttribute (StaticStrings.NAME_STR, StaticStrings.LEVEL_DEFAULT_STR); Element e = (Element)XMLTools.getChildByTagName (searchNode, StaticStrings.LEVEL_DEFAULT_ELEMENT); if (e != null) { default_index_option.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); String level = e.getAttribute (StaticStrings.NAME_ATTRIBUTE); default_index_option.setAttribute (StaticStrings.VALUE_ATTRIBUTE, level); } else { //In the case of mg, there's no level! build a default one using 'assigned=false value=document' default_index_option.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR); default_index_option.setAttribute (StaticStrings.VALUE_ATTRIBUTE, StaticStrings.DOCUMENT_STR); } appendProperly (toElement, default_index_option); } // Transform plugins (pluginListNode) of collectionConfig.xml into the internal structure (i.e. Document to) static private void doPlugin (Document to, Node pluginListNode) { Element toElement = to.getDocumentElement (); NodeList plugin_children = ((Element)pluginListNode).getElementsByTagName (StaticStrings.PLUGIN_STR); int plugin_nodes = plugin_children.getLength (); if (plugin_nodes < 1) { return; } for (int i=0; i (i.e. format has no child nodes) // as this will end up embedded in another format statement as // This doubling up of format stmts will then prevent GLI from opening the collection again. if (format != null && format.hasChildNodes()) { // not an empty format statement String gsf_text = XMLTools.xmlNodeToStringWithoutIndenting(format); if (gsf_text.startsWith("<") && (gsf_text.indexOf("<") != gsf_text.lastIndexOf("<"))) { gsf_text = gsf_text.substring(gsf_text.indexOf("//' int pos = filter_str.indexOf (StaticStrings.SEPARATOR_CHARACTER); String meta_str = ""; String meta_value_str = ""; String clude_str = ""; String flag_str = ""; if (pos == -1) { meta_str = meta_value_str = filter_str; clude_str = StaticStrings.INCLUDE_STR; } else { clude_str = StaticStrings.INCLUDE_STR; if (filter_str.startsWith (StaticStrings.EXCLAMATION_CHARACTER)) { clude_str = StaticStrings.EXCLUDE_STR; // Peel off "!" filter_str = filter_str.substring (StaticStrings.EXCLAMATION_CHARACTER.length ()); } String[] strs = filter_str.split (StaticStrings.SEPARATOR_CHARACTER); if (strs[0] != null && strs[0] != "") { meta_str = strs[0]; } if(!meta_str.equals (StaticStrings.FILENAME_STR) && meta_str.indexOf (StaticStrings.NS_SEP) == -1) { meta_str = StaticStrings.EXTRACTED_NAMESPACE + meta_str; } if (strs[1] != null && strs[1] != "") { meta_value_str = strs[1]; } if (strs.length > 2) { //This means there has been set a flag if (strs[2] != null && strs[2] != "") { flag_str = strs[2]; } } } Element subcollection_element = to.createElement (StaticStrings.SUBCOLLECTION_ELEMENT); subcollection_element.setAttribute (StaticStrings.NAME_STR, name_str); subcollection_element.setAttribute (StaticStrings.CONTENT_ATTRIBUTE, meta_str); subcollection_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, clude_str); if (flag_str != "") { subcollection_element.setAttribute (StaticStrings.OPTIONS_ATTRIBUTE, flag_str); } XMLTools.setNodeText (subcollection_element, meta_value_str); toElement.appendChild (subcollection_element); } } //Handle levels (document, section). In the internal structure, the element is called 'IndexOption' static private void doLevel (Document to, Node searchNode) { Element toElement = to.getDocumentElement (); NodeList level_children = ((Element)searchNode).getElementsByTagName (StaticStrings.LEVEL_ATTRIBUTE); int level_nodes = level_children.getLength (); // it's mg, there's no level. So we construct a default 'indexOption' in the internal structure if (level_nodes < 1) { Element index_option = to.createElement (StaticStrings.INDEXOPTIONS_ELEMENT); index_option.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR); index_option.setAttribute (StaticStrings.NAME_STR, StaticStrings.LEVELS_STR); Element option_element = to.createElement (StaticStrings.OPTION_ELEMENT); option_element.setAttribute (StaticStrings.NAME_STR, StaticStrings.DOCUMENT_STR); index_option.appendChild (option_element); appendProperly (toElement, index_option); return; } Element index_option = to.createElement (StaticStrings.INDEXOPTIONS_ELEMENT); index_option.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR); index_option.setAttribute (StaticStrings.NAME_STR, StaticStrings.LEVELS_STR); for (int i=0; i0) { searchtype_str += ","; } searchtype_str += t; } } // pretend its a format statement Element search_type_element = to.createElement (StaticStrings.FORMAT_STR); search_type_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, StaticStrings.SEARCHTYPE_ELEMENT); XMLTools.setNodeText(search_type_element, searchtype_str); appendProperly (to.getDocumentElement (), search_type_element); } // Handling search format statement static private void doSearchFormat (Document to, Node searchNode) { // THere is currently just one format element for search. HOwever, need to check for old config files which used to have NodeList format_children = ((Element)searchNode).getElementsByTagName (StaticStrings.FORMAT_STR); int format_nodes = format_children.getLength (); if (format_nodes < 1) { return; } Element format = null; for(int i=0; i 0) { index_value.append (level_str).append (StaticStrings.COLON_CHARACTER); //index_value = index_value.StaticStrings.COLON_CHARACTER; } } NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT); int content_elements_length = content_elements.getLength (); for(int k = 0; k < content_elements_length; k++) { Element content_element = (Element) content_elements.item (k); if (content_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) { continue; } String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE); full_index_name = full_index_name + name_str; if (k < content_elements_length - 1) { full_index_name = full_index_name + StaticStrings.COMMA_CHARACTER; } if(name_str.startsWith (StaticStrings.EXTRACTED_NAMESPACE) && name_str.indexOf(StaticStrings.NS_SEP, StaticStrings.EXTRACTED_NAMESPACE.length()) == -1) { name_str = name_str.substring (StaticStrings.EXTRACTED_NAMESPACE.length ()); } index_value.append (name_str); name_str = null; // Make it comma separated string if(k < content_elements_length - 1) { index_value.append (StaticStrings.COMMA_CHARACTER); } content_element = null; }//for loop ends String temp_str = index_value.toString (); index_ele.setAttribute (StaticStrings.NAME_ATTRIBUTE, temp_str); // Now constructing 'displayItem' element for this 'index_ele' element // The index names in the collectionmetadata elements in the internal structure are not the names that // are used in the content elements (i.e. ex.Source or dc.Subject and keywords), but the names that are // in the configuration files (i.e. Source or dc.Subject) ArrayList collectionmetadata_list = XMLTools.getNamedElementList (source, StaticStrings.COLLECTIONMETADATA_ELEMENT, StaticStrings.NAME_ATTRIBUTE, temp_str); if (collectionmetadata_list == null) { //try the full name, i.e. with 'ex.' if (mg_indexer == true) { // but first append level info if we are mg full_index_name = level_str+StaticStrings.COLON_CHARACTER+full_index_name; } collectionmetadata_list = XMLTools.getNamedElementList (source, StaticStrings.COLLECTIONMETADATA_ELEMENT, StaticStrings.NAME_ATTRIBUTE, full_index_name); } if (collectionmetadata_list != null) { for(int k = 0; k < collectionmetadata_list.size (); k++) { Element collectionmetadata = (Element)collectionmetadata_list.get (k); if (collectionmetadata.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) { continue; } Element displayItem = constructDisplayItem (collectionmetadata, to); index_ele.appendChild (displayItem); } } search.appendChild (index_ele); } //for loop ends //Convert default index convertDefaultIndex (from, to, search); convertIndexOptions(from, to, search); } // Convert levels for mgpp/lucene. This method is called by converIndex() when mgpp indexer is detected. static private void convertLevels (Document from, Document to, Element search) { Element source = from.getDocumentElement (); Element index_option = XMLTools.getNamedElement (source, StaticStrings.INDEXOPTIONS_ELEMENT, StaticStrings.NAME_ATTRIBUTE, StaticStrings.LEVELS_STR); if (index_option == null) { return; } //Debugging purposes if (index_option.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) { DebugStream.println ("For mgpp, there should be an IndexOption element for levels which is assigned 'true': possible bug."); } NodeList option_elements = index_option.getElementsByTagName (StaticStrings.OPTION_ELEMENT); int num_elements = option_elements.getLength (); // Don't output anything if no indexes are set if(num_elements == 0) { return ;// } for(int k = 0; k < num_elements; k++) { Element e = (Element) option_elements.item (k); String name_str = e.getAttribute (StaticStrings.NAME_ATTRIBUTE); Element level_element = to.createElement (StaticStrings.LEVEL_ELEMENT); level_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str); //Now construct displayItem for this level element from collectionmetadata ArrayList collectionmetadata_list = XMLTools.getNamedElementList (source, StaticStrings.COLLECTIONMETADATA_ELEMENT, StaticStrings.NAME_ATTRIBUTE, name_str); if (collectionmetadata_list != null) { for(int j = 0; j < collectionmetadata_list.size (); j++) { Element collectionmetadata = (Element)collectionmetadata_list.get (j); Element displayItem = constructDisplayItem (collectionmetadata, to); level_element.appendChild (displayItem); } } search.appendChild (level_element); } //Convert default level Element default_index_option = XMLTools.getNamedElement (source, StaticStrings.INDEXOPTION_DEFAULT_ELEMENT, StaticStrings.NAME_ATTRIBUTE, StaticStrings.LEVEL_DEFAULT_STR); if (default_index_option == null) { return; } Element default_level = to.createElement (StaticStrings.LEVEL_DEFAULT_ELEMENT); String default_level_str = default_index_option.getAttribute (StaticStrings.VALUE_ATTRIBUTE); default_level.setAttribute (StaticStrings.NAME_ATTRIBUTE, default_level_str); search.appendChild (default_level); } // Convert indexoptions for mg/mgpp/lucene. This method is called by convertIndex(). static private void convertIndexOptions (Document from, Document to, Element search) { Element source = from.getDocumentElement (); Element index_option = XMLTools.getNamedElement (source, StaticStrings.INDEXOPTIONS_ELEMENT, StaticStrings.NAME_ATTRIBUTE, StaticStrings.INDEXOPTIONS_STR); if (index_option == null) { return; } //Debugging purposes if (index_option.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) { DebugStream.println ("There should be an IndexOption element which is assigned 'true': possible bug."); } Element indexOptionEl = to.createElement(StaticStrings.INDEXOPTION_STR); NodeList option_elements = index_option.getElementsByTagName (StaticStrings.OPTION_ELEMENT); int num_elements = option_elements.getLength (); // Don't output anything if no index if(num_elements == 0) { return ;// } search.appendChild (indexOptionEl); for(int k = 0; k < num_elements; k++) { Element e = (Element) option_elements.item (k); String name_att = e.getAttribute(StaticStrings.NAME_ATTRIBUTE); Element optionEl = to.createElement(StaticStrings.OPTION_STR); optionEl.setAttribute(StaticStrings.NAME_ATTRIBUTE, name_att); indexOptionEl.appendChild(optionEl); } } // Append the element son to the element mother in the appropriate position. static public void appendProperly (Element mother, Element son) { if (son == null) return; Node reference_node = findInsertionPoint (mother, son); if(reference_node != null) { mother.insertBefore (son, reference_node); } else { mother.appendChild (son); } } /** Find the best insertion position for the given DOM Element 'target_element' in the DOM Element 'document_element'. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg Plugins). Failing a command match it will check against the command order for the best insertion location. * @param target_element the command Element to be inserted * @return the Element which the given command should be inserted before, or null to append to end of list */ static public Node findInsertionPoint (Element document_element, Element target_element) { ///ystem.err.println("Find insertion point: " + target_element.getNodeName()); String target_element_name = target_element.getNodeName (); // Try to find commands with the same tag. NodeList matching_elements = document_element.getElementsByTagName (target_element_name); // If we found matching elements, then we have our most likely insertion location, so check within for groupings if(matching_elements.getLength () != 0) { ///ystem.err.println("Found matching elements."); // Only CollectionMeta are grouped. if(target_element_name.equals (StaticStrings.COLLECTIONMETADATA_ELEMENT)) { ///ystem.err.println("Dealing with collection metadata"); // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end. // So if the command to be added is special add it immediately after any other special command if(target_element.getAttribute (StaticStrings.SPECIAL_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) { int index = 0; Element matched_element = (Element) matching_elements.item (index); Element sibling_element = (Element) matched_element.getNextSibling (); while(sibling_element.getAttribute (StaticStrings.SPECIAL_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) { index++; matched_element = (Element) matching_elements.item (index); sibling_element = (Element) matched_element.getNextSibling (); } if(sibling_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT)) { Element newline_element = document_element.getOwnerDocument().createElement (CollectionConfiguration.NEWLINE_ELEMENT); document_element.insertBefore (newline_element, sibling_element); } return sibling_element; } // Otherwise try to find a matching 'name' and add after the last one in that group. else { int index = 0; target_element_name = target_element.getAttribute (StaticStrings.NAME_ATTRIBUTE); boolean found = false; // Skip all of the special metadata Element matched_element = (Element) matching_elements.item (index); while(matched_element.getAttribute (StaticStrings.SPECIAL_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) { index++; matched_element = (Element) matching_elements.item (index); } // Begin search while(!found && matched_element != null) { if(matched_element.getAttribute (StaticStrings.NAME_ATTRIBUTE).equals (target_element_name)) { found = true; } else { index++; matched_element = (Element) matching_elements.item (index); } } // If we found a match, we need to continue checking until we find the last name match. if(found) { index++; Element previous_sibling = matched_element; Element sibling_element = (Element) matching_elements.item (index); while(sibling_element != null && sibling_element.getAttribute (StaticStrings.NAME_ATTRIBUTE).equals (target_element_name)) { previous_sibling = sibling_element; index++; sibling_element = (Element) matching_elements.item (index); } // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines! return previous_sibling.getNextSibling (); } // If not found we just add after last metadata element else { Element last_element = (Element) matching_elements.item (matching_elements.getLength () - 1); return last_element.getNextSibling (); } } } else { ///ystem.err.println("Not dealing with collection meta."); Element matched_element = (Element) matching_elements.item (matching_elements.getLength () - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling (); if(sibling_element != null && sibling_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT)) { Element newline_element = document_element.getOwnerDocument().createElement (CollectionConfiguration.NEWLINE_ELEMENT); document_element.insertBefore (newline_element, sibling_element); } return sibling_element; // Note that this may be null } } ///ystem.err.println("No matching elements found."); // Locate where this command is in the ordering int command_index = -1; for(int i = 0; command_index == -1 && i < CollectionConfiguration.COMMAND_ORDER.length; i++) { if(CollectionConfiguration.COMMAND_ORDER[i].equals (target_element_name)) { command_index = i; } } ///ystem.err.println("Command index is: " + command_index); // Now move forward, checking for existing elements in each of the preceeding command orders. int preceeding_index = command_index - 1; ///ystem.err.println("Searching before the target command."); while(preceeding_index >= 0) { matching_elements = document_element.getElementsByTagName (CollectionConfiguration.COMMAND_ORDER[preceeding_index]); // If we've found a match if(matching_elements.getLength () > 0) { // We add after the last element Element matched_element = (Element) matching_elements.item (matching_elements.getLength () - 1); // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getNextSibling (); if(sibling_element != null && sibling_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT)) { Element newline_element = document_element.getOwnerDocument().createElement (CollectionConfiguration.NEWLINE_ELEMENT); document_element.insertBefore (newline_element, sibling_element); } return sibling_element; // Note that this may be null } preceeding_index--; } // If all that fails, we now move backwards through the commands int susceeding_index = command_index + 1; ///ystem.err.println("Searching after the target command."); while(susceeding_index < CollectionConfiguration.COMMAND_ORDER.length) { matching_elements = document_element.getElementsByTagName (CollectionConfiguration.COMMAND_ORDER[susceeding_index]); // If we've found a match if(matching_elements.getLength () > 0) { // We add before the first element Element matched_element = (Element) matching_elements.item (0); // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines) Node sibling_element = matched_element.getPreviousSibling (); if(sibling_element != null && sibling_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT)) { Element newline_element = document_element.getOwnerDocument().createElement (CollectionConfiguration.NEWLINE_ELEMENT); document_element.insertBefore (newline_element, sibling_element); } return sibling_element; // Note that this may be null } susceeding_index++; } // Well. Apparently there are no other commands in this collection configuration. So append away... return null; } // From collectionConfig.xml to internal structure:add 'ex.' namespace (if none). // From internal structure to collectionConfig.xml:always peel off 'ex.' namespace (if any), except for format statement //This method parses 'xml_file_doc' into 'dOc' static public void parse(File xml_file, Document dOc) { Document xml_file_doc = XMLTools.parseXMLFile (xml_file); Element fromElement = xml_file_doc.getDocumentElement (); Element toElement = dOc.getDocumentElement (); // It's deliberately set that 'creator', 'maintainer', and 'public' are only in English (as they are just names). // So the following ArrayList have only one element. Node metadataListNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.METADATALIST_STR, 0); if (metadataListNode != null) { ArrayList creator = doMetadataList (dOc, metadataListNode, StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_CREATOR_STR); ArrayList maintainer = doMetadataList (dOc, metadataListNode, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR); ArrayList is_public = doMetadataList (dOc, metadataListNode, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_STR); appendArrayList (toElement, creator); appendArrayList (toElement, maintainer); appendArrayList (toElement, is_public); } Node databaseNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.INFODB_STR, 0); String databasetype_value = "gdbm"; if(databaseNode != null) { databasetype_value = ((Element)databaseNode).getAttribute (StaticStrings.TYPE_ATTRIBUTE);//might be gdbm|jdbm|sqlite OR not yet set (in which case it should default to gdbm) } Element databasetype = doDatabaseType(dOc, databasetype_value); appendProperly (toElement, databasetype); Node searchNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.SEARCH_STR, 0); String buildtype_value = ((Element)searchNode).getAttribute (StaticStrings.TYPE_ATTRIBUTE);//might be mg|mgpp|lucene Element buildtype = doBuildType (dOc, buildtype_value); appendProperly (toElement, buildtype); Node importNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.IMPORT_STR, 0); if (importNode == null) { System.out.println ("There is no content in the 'import' block."); } if (importNode != null) { //do plugin list nodes Node pluginListNode = XMLTools.getChildByTagNameIndexed ((Element)importNode, StaticStrings.PLUGINLIST_STR, 0); if (pluginListNode == null) { System.out.println ("There is no pluginlist set."); } if (pluginListNode != null) { doPlugin (dOc, pluginListNode); } //do the plugout element (used by building flax collections) Node plugout = XMLTools.getChildByTagNameIndexed ((Element)importNode, PLUGOUT_ELEMENT, 0); if (plugout != null) { Element to_element = XMLTools.duplicateElement (dOc, (Element)plugout, true); toElement.appendChild (to_element); } } Node browseNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.BROWSE_STR, 0); if (browseNode != null) { if (browseNode == null) { System.out.println ("There is no classifier."); } doClassifier (dOc, browseNode); } Node displayItemListNode = XMLTools.getChildByTagNameIndexed (fromElement, StaticStrings.DISPLAYITEMLIST_STR, 0); if (displayItemListNode != null) { ArrayList description = doDisplayItemList (dOc, displayItemListNode, StaticStrings.DESCRIPTION_STR, StaticStrings.COLLECTIONMETADATA_COLLECTIONEXTRA_STR); ArrayList smallicon = doDisplayItemList (dOc, displayItemListNode, StaticStrings.SMALLICON_STR, StaticStrings.COLLECTIONMETADATA_ICONCOLLECTIONSMALL_STR); ArrayList icon = doDisplayItemList (dOc, displayItemListNode, StaticStrings.ICON_STR, StaticStrings.COLLECTIONMETADATA_ICONCOLLECTION_STR); ArrayList name = doDisplayItemList (dOc, displayItemListNode, StaticStrings.NAME_STR, StaticStrings.COLLECTIONMETADATA_COLLECTIONNAME_STR); appendArrayList (toElement, description); appendArrayList (toElement, smallicon); appendArrayList (toElement, icon); appendArrayList (toElement, name); } if (buildtype_value.equalsIgnoreCase ("mg")) { doMGIndexes (dOc, searchNode); } else { doMGPPIndexes (dOc, searchNode); } doDefaultIndex (dOc, searchNode); doDefaultLevel (dOc, searchNode); doLevel (dOc, searchNode); doIndexOption (dOc, searchNode); doSubcollection (dOc, searchNode); doIndexSubcollection (dOc, searchNode); doIndexLanguage (dOc, searchNode); doDefaultIndexLanguage (dOc, searchNode); doLanguageMetadata (dOc, searchNode); doSearchType (dOc, searchNode); doSearchFormat (dOc, searchNode); doDisplayFormat (dOc, fromElement); doReplaceListRef (dOc, fromElement); doServiceRackList (dOc, fromElement); } static public String generateStringVersion(Document doc) { return XMLTools.xmlNodeToString(doc); } static public void save (File collect_config_xml_file, Document doc) { Document collection_config_xml_document = convertInternalToCollectionConfig (doc); String[] nonEscapingTagNames = {StaticStrings.FORMAT_STR}; XMLTools.writeXMLFile (collect_config_xml_file, collection_config_xml_document, nonEscapingTagNames); } //Convert the internal XML DOM tree (dOc) into that of collectionConfig.xml (skeleton) static private Document convertInternalToCollectionConfig (Document dOc) { //first parse an empty skeleton of xml config file //The aim is to convert the internal structure into this skeleton Document skeleton = XMLTools.parseXMLFile ("xml/CollectionConfig.xml", true); //Element internal = dOc.getDocumentElement(); convertMetadataList (dOc, skeleton); convertDisplayItemList (dOc, skeleton); convertBuildType (dOc, skeleton); convertDatabaseType (dOc, skeleton); convertIndex (dOc, skeleton); convertPlugin (dOc, skeleton);//also do the plugout element convertClassifier (dOc, skeleton); convertSubcollectionIndexes (dOc, skeleton); convertLanguages (dOc, skeleton); convertSubcollection (dOc, skeleton); convertSearchType (dOc, skeleton); convertSearchFormat (dOc, skeleton); convertDisplayFormat (dOc, skeleton); convertReplaceListRef (dOc, skeleton); convertServiceRackList(dOc, skeleton); return skeleton; } // Append the elements, which are of Element type, in 'list' to Element 'to' static private void appendArrayList (Element to, ArrayList list) { if (list == null) return; for (int i=0; i