Ignore:
Timestamp:
2003-07-16T15:56:23+12:00 (21 years ago)
Author:
jmt12
Message:

Major changes to CDM - MGPP extension.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java

    r4932 r4967  
    218218        return self.languageDefaultToString(command_element);
    219219    }
     220    else if(command_element_name.equals(LEVELS_ELEMENT)) {
     221        return self.levelsToString(command_element);
     222    }
    220223    else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
    221224        return self.metadataToString(command_element);
     
    224227        return self.pluginToString(command_element, show_extracted_namespace);
    225228    }
    226     //else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
    227     //    return self.searchtypeToString(command_element);
    228     //}
     229    else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
     230        return self.searchtypeToString(command_element);
     231    }
    229232    else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
    230233        return self.subcollectionToString(command_element, show_extracted_namespace);
     
    287290    static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP;
    288291    /** Gives the preferred ordering of commands */
    289     static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
     292    static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
    290293
    291294    /** ************************** Public Data Members ***************************/
     
    340343    }
    341344
    342     /** Retrieve or create the indexes Element. */
    343     public Element getIndexes() {
    344     return getOrCreateElementByTagName(INDEXES_ELEMENT);
    345     }
    346 
    347345    /** Retrieve or create the languages Element. */
    348346    public Element getLanguages() {
    349     return getOrCreateElementByTagName(LANGUAGES_ELEMENT);
     347    return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
     348    }
     349
     350    public Element getLevels() {
     351    return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
     352    }
     353
     354    /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
     355    public Element getMGIndexes() {
     356    return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
     357    }
     358
     359    public Element getMGPPIndexes() {
     360    return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
     361    }
     362
     363    /** Retrieve or create the searchtype element. */
     364    public Element getSearchType() {
     365    return getOrCreateElementByTagName(SEARCHTYPE_ELEMENT, null, null);
    350366    }
    351367
    352368    /** Retrieve or create the subindexes Element. */
    353369    public Element getSubIndexes() {
    354     return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT);
     370    return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
    355371    }
    356372
    357373    /** Retrieve or create the supercollections Element. */
    358374    public Element getSuperCollection() {
    359     return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT);
     375    return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
    360376    }
    361377
     
    481497
    482498    /** Retrieve or create the indexes Element. */
    483     private Element getOrCreateElementByTagName(String name) {
     499    private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
    484500    ///ystem.err.println("Get or create element by tag name: " + name);
    485501    Element document_element = document.getDocumentElement();
    486502    NodeList elements = document_element.getElementsByTagName(name);
    487     if(elements.getLength() > 0) {
    488         ///ystem.err.println("Found element.");
    489         document_element = null;
    490         return (Element) elements.item(0);
     503    int elements_length = elements.getLength();
     504    if(elements_length > 0) {
     505        if(conditional_attribute == null) {
     506        document_element = null;
     507        return (Element) elements.item(0);
     508        }
     509        else {
     510        for(int i = 0; i < elements_length; i++) {
     511            Element element = (Element) elements.item(i);
     512            if(element.getAttribute(conditional_attribute).equals(required_value)) {
     513            document_element = null;
     514            return element;
     515            }
     516            element = null;
     517        }
     518        }
     519    }
     520    // Create the element
     521    Element element = document.createElement(name);
     522    Node target_node = findInsertionPoint(element);
     523    if(target_node != null) {
     524        document_element.insertBefore(element, target_node);
    491525    }
    492526    else {
    493         ///ystem.err.println("Creating element.");
    494         Element element = document.createElement(name);
    495         Node target_node = findInsertionPoint(element);
    496         if(target_node != null) {
    497         document_element.insertBefore(element, target_node);
    498         }
    499         else {
    500         document_element.appendChild(element);
    501         }
    502         document_element = null;
    503         return element;
    504     }
     527        document_element.appendChild(element);
     528    }
     529    document_element = null;
     530    return element;
    505531    }   
    506532
    507533    private String indexesToString(Element command_element, boolean show_extracted_namespace) {
    508     StringBuffer text = new StringBuffer(INDEX_STR);
     534    StringBuffer text = new StringBuffer("");
     535    if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
     536        text.append("#");
     537    }
     538    text.append(INDEX_STR);
    509539    text.append(TAB_CHARACTER);
    510540    text.append(TAB_CHARACTER);
     
    514544    for(int j = 0; j < index_elements_length; j++) {
    515545        Element index_element = (Element) index_elements.item(j);
    516         text.append(index_element.getAttribute(LEVEL_ATTRIBUTE));
    517         text.append(StaticStrings.COLON_CHARACTER);
     546        String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
     547        if(level_str.length() > 0) {
     548        text.append(level_str);
     549        text.append(StaticStrings.COLON_CHARACTER);
     550        }
    518551        NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
    519552        int content_elements_length = content_elements.getLength();
     
    591624    }
    592625
     626    private String levelsToString(Element command_element) {
     627    if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
     628        StringBuffer text = new StringBuffer(LEVELS_STR);
     629        text.append(TAB_CHARACTER);
     630        text.append(TAB_CHARACTER);
     631        NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
     632        int content_elements_length = content_elements.getLength();
     633        for(int i = 0; i < content_elements_length; i++) {
     634        Element content_element = (Element) content_elements.item(i);
     635        text.append(content_element.getAttribute(NAME_ATTRIBUTE));
     636        text.append(SPACE_CHARACTER);
     637        }
     638        return text.substring(0, text.length() - 1);
     639    }
     640    else {
     641        return null;
     642    }
     643    }
     644
    593645    static public String metadataToString(Element command_element) {
    594646    // If there is no value attribute, then we don't write anything
     
    614666        text.append(TAB_CHARACTER);
    615667        text.append(name_str);
     668        text.append(SPACE_CHARACTER);
    616669        }
    617670        String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
    618         text.append(SPACE_CHARACTER);
    619671        // If this is element is in english, and it is the first one found, we don't need to write the language argument.
    620672        if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
     
    703755            command_element = parseLanguageDefault(command_str);
    704756            }
     757            if(command_element == null && command_type.equals(LEVELS_STR)) {
     758            command_element = parseLevels(command_str);
     759            }
    705760            if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
    706761            command_element = parseMetadata(command_str);
     
    711766            if(command_element == null && command_type.equals(PLUGIN_STR)) {
    712767            command_element = parsePlugIn(command_str);
     768            }
     769            if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
     770            command_element = parseSearchType(command_str);
    713771            }
    714772            if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
     
    834892        command_element = document.createElement(INDEXES_ELEMENT);
    835893        }
     894        boolean first_index = true;
    836895        while(tokenizer.hasMoreTokens()) {
    837896        Element index_element = document.createElement(INDEX_ELEMENT);
    838897        String index_str = tokenizer.nextToken();
    839         index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
    840         String raw_content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
    841         StringTokenizer content_tokenizer = new StringTokenizer(raw_content_str, StaticStrings.COMMA_CHARACTER);
     898        // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
     899        boolean old_index;
     900        if(index_str.indexOf(COLON_CHARACTER) != -1) {
     901            old_index = true;
     902            index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
     903            index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
     904            if(first_index) {
     905            command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
     906            first_index = false;
     907            }
     908        }
     909        else {
     910            if(first_index) {
     911            command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
     912            first_index = false;
     913            }
     914            old_index = false;
     915        }
     916        StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
    842917        while(content_tokenizer.hasMoreTokens()) {
    843918            Element content_element = document.createElement(CONTENT_ELEMENT);
    844919            String content_str = content_tokenizer.nextToken();
    845920            // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
    846             if(!content_str.equals(StaticStrings.TEXT_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) {
    847             content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
     921            if(content_str.indexOf(MSMUtils.NS_SEP) == -1) {
     922            if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
     923                // Our special strings are OK.
     924            }
     925            else {
     926                content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
     927            }
    848928            }
    849929            content_element.setAttribute(NAME_ATTRIBUTE, content_str);
     
    852932        }
    853933        content_tokenizer = null;
    854         raw_content_str = null;
    855934        index_str = null;
    856935        command_element.appendChild(index_element);
     
    901980        if(tokenizer.hasMoreTokens()) {
    902981        command_element = document.createElement(LANGUAGES_ELEMENT);
    903         }
    904         while(tokenizer.hasMoreTokens()) {
    905         Element language_element = document.createElement(LANGUAGE_ELEMENT);
    906         language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
    907         command_element.appendChild(language_element);
    908         language_element = null;
     982        while(tokenizer.hasMoreTokens()) {
     983            Element language_element = document.createElement(LANGUAGE_ELEMENT);
     984            language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
     985            command_element.appendChild(language_element);
     986            language_element = null;
     987        }
    909988        }
    910989        tokenizer = null;
     
    9291008    }
    9301009    catch (Exception exception) {
     1010    }
     1011    return command_element;
     1012    }
     1013
     1014    private Element parseLevels(String command_str) {
     1015    Element command_element = null;
     1016    try {
     1017        StringTokenizer tokenizer = new StringTokenizer(command_str);
     1018        // First token is command type (levels)
     1019        tokenizer.nextToken();
     1020        if(tokenizer.hasMoreTokens()) {
     1021        command_element = document.createElement(LEVELS_ELEMENT);
     1022        command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
     1023        while(tokenizer.hasMoreTokens()) {
     1024            Element level_element = document.createElement(CONTENT_ELEMENT);
     1025            level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
     1026            command_element.appendChild(level_element);
     1027            level_element = null;
     1028        }
     1029        }
     1030    }
     1031    catch(Exception exception) {
    9311032    }
    9321033    return command_element;
     
    10341135        }
    10351136        tokenizer = null;
     1137    }
     1138    catch(Exception exception) {
     1139    }
     1140    return command_element;
     1141    }
     1142
     1143    private Element parseSearchType(String command_str) {
     1144    Element command_element = null;
     1145    try {
     1146        StringTokenizer tokenizer = new StringTokenizer(command_str);
     1147        // First token is command type (levels)
     1148        tokenizer.nextToken();
     1149        if(tokenizer.hasMoreTokens()) {
     1150        command_element = document.createElement(SEARCHTYPE_ELEMENT);
     1151        command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
     1152        while(tokenizer.hasMoreTokens()) {
     1153            Element search_element = document.createElement(CONTENT_ELEMENT);
     1154            search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
     1155            command_element.appendChild(search_element);
     1156            search_element = null;
     1157        }
     1158        }   
    10361159    }
    10371160    catch(Exception exception) {
     
    12081331    }
    12091332
     1333    private String searchtypeToString(Element command_element) {
     1334    if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
     1335        StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
     1336        text.append(TAB_CHARACTER);
     1337        NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
     1338        int search_elements_length = search_elements.getLength();
     1339        for(int i = 0; i < search_elements_length; i++) {
     1340        Element search_element = (Element) search_elements.item(i);
     1341        text.append(search_element.getAttribute(NAME_ATTRIBUTE));
     1342        text.append(SPACE_CHARACTER);
     1343        }
     1344        return text.substring(0, text.length() - 1);
     1345    }
     1346    else {
     1347        return null;
     1348    }
     1349    }
     1350
    12101351    private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
    12111352    StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
Note: See TracChangeset for help on using the changeset viewer.