Changeset 9908


Ignore:
Timestamp:
2005-05-18T17:15:28+12:00 (19 years ago)
Author:
kjdon
Message:

removed all 2.39 compat stuff. removed commented out beta stuff. added buildtype handling - we now use it for lucene

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java

    r9036 r9908  
    242242        return self.metadataToString(command_element, show_extracted_namespace);
    243243    }
    244     // else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
    245     //     return self.metadataToString(command_element, show_extracted_namespace);
    246     // }
     244    else if (command_element_name.equals(BUILDTYPE_ELEMENT)) {
     245        return self.metadataToString(command_element, show_extracted_namespace);
     246    }
    247247    else if(command_element_name.equals(PLUGIN_ELEMENT)) {
    248248        return self.pluginToString(command_element, show_extracted_namespace);
     
    310310
    311311    /** Gives the preferred ordering of commands */
    312     static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, /* StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, */ StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
     312    static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT,  StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.BUILDTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
    313313
    314314    /** ************************** Public Data Members ***************************/
     
    316316    /** ************************** Private Data Members ***************************/
    317317   
    318     /** Is the configuration file currently being read in one of the infamous G2.39 ones. */
    319     private boolean is_twopointthreenine = false;
    320318    private File collect_config_file;
    321319
     
    358356    }
    359357
    360 //      public Element getBeta() {
    361 //      Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
    362 //      element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
    363 //      element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
    364 //      return element;
    365 //      }
    366358
    367359    public Element getCreator() {
     
    412404    }
    413405
     406    public Element getBuildType() {
     407    Element element = getOrCreateElementByTagName(BUILDTYPE_ELEMENT, null, null);
     408    element.setAttribute(NAME_ATTRIBUTE, BUILDTYPE_STR);
     409    element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
     410    return element;
     411
     412    }
    414413    /** Retrieve or create the searchtype element. */
    415414    public Element getSearchType() {
     
    479478        //FileWriter file_writer = new FileWriter(collect_config_file, false);
    480479        BufferedWriter buffered_writer = new BufferedWriter(file_writer);
    481         // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
    482         // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata??
    483         // Yes we can. Lets see..
    484         // known_metadata = new ArrayList(); 'blamo'
    485480        Element collect_config_element = document.getDocumentElement();
    486481        NodeList command_elements = collect_config_element.getChildNodes();
     
    513508        }
    514509        buffered_writer.close();
    515         // known_metadata = null; 'poof'
    516510        }
    517511        catch (Exception exception) {
     
    782776    String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
    783777    // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
    784     if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
     778    if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals(BUILDTYPE_STR) ) {
    785779        text.append(name_str);
    786780        text.append(TAB_CHARACTER);
    787781        special = true;
    788782    }
    789     else if (/* name_str.equals(COLLECTIONMETADATA_BETA_STR) || */ name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
     783    else if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR) ) {
    790784        text.append(name_str);
    791785        text.append(TAB_CHARACTER);
     
    799793        text.append(SPACE_CHARACTER);
    800794        String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
    801         // If this is element is in english, and it is the first one found, we don't need to write the language argument.
    802         //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
    803         // changed so that we always write the language string
    804795        text.append(LBRACKET_CHARACTER);
    805796        text.append(LANGUAGE_ARGUMENT);
     
    807798        text.append(RBRACKET_CHARACTER);
    808799        text.append(SPACE_CHARACTER);
    809         //}
     800
    810801        if(known_metadata != null) {
    811802        known_metadata.add(name_str);
     
    837828    }
    838829
    839     /** Parse a collect.cfg into a DOM model representation. */
     830    /** Parse a collect.cfg into a DOM model representation.
     831     * note we are ignoring 2.39 compatibility now. */
    840832    private void parse(File collect_config_file) {
    841833    try {
    842         ArrayList acquired_collectionmeta_names = null;
    843         ArrayList obsolete_collectionmeta_names = null;
    844         HashMap changed_collectionmeta_names = null;
    845        
    846         // Life is made oh-so-more tricky by the existance of G2.39 config files. There are two ways to handle them:
    847         // 1. Notice that the file is G2.39 from the start, then as I parse it magic it into G2.4 standard
    848         // 2. Extend the parsing method to handle reading in G2.39, then afterwards go through the DOM changing it to G2.4 as appropriate.
    849         // As far as I can tell the second option is twice as much work, so I'll try option 1. The problem here is that I have to determine if the 'buildtype' command is somewhere in the collect.cfg file, which means I'm going to have to read the file twice - once seaching for 'buildtype' and the second time to parse it.
    850 
    851         // Search for 'buildtype mgpp'
    852         InputStream input_stream_one = new FileInputStream(collect_config_file);
    853         Reader reader_one = new InputStreamReader(input_stream_one, ENCODING);
    854         BufferedReader buffered_reader_one = new BufferedReader(reader_one);
    855         String search_line_str = null;
    856         while(!is_twopointthreenine && (search_line_str = buffered_reader_one.readLine()) != null) {
    857         if(search_line_str.toLowerCase().indexOf(BUILDTYPE_STR) != -1) {
    858             is_twopointthreenine = true;
    859             acquired_collectionmeta_names = new ArrayList();
    860             changed_collectionmeta_names = new HashMap();
    861             obsolete_collectionmeta_names = new ArrayList();
    862         }
    863         }
    864         buffered_reader_one.close();
    865         reader_one.close();
    866         input_stream_one.close();
    867         buffered_reader_one = null;
    868         reader_one = null;
    869         input_stream_one = null;
    870834
    871835        Element collect_cfg_element = document.getDocumentElement();
     
    910874            String command_type = tokenizer.nextToken().toLowerCase();
    911875            // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
    912             if(command_element == null && command_type.equals(BUILDTYPE_STR)) {
    913             DebugStream.println("G2.39 Buildtype command detected. Ignoring.");
    914             command_element = document.createElement(UNKNOWN_ELEMENT);
    915             append_element = false;
    916             }
    917876            if(command_element == null && command_type.equals(CLASSIFY_STR)) {
    918877            command_element = parseClassify(command_str);
     
    922881            }
    923882            if(command_element == null && (command_type.equals(INDEX_STR)  || command_type.equals(COMMENTED_INDEXES_STR))) {
    924             // If this was a G2.39 config file then we manipulate the command string a bit before we submit it to the parser. We start by adding allfields as the first index. We then space separate the remaining indexes, and remove duplicates when encountered. Of course before we do any of that we record the various space separated indexes so that we can remove the collection meta assigned to them.
    925             if(is_twopointthreenine) {
    926                 DebugStream.println("G2.39 Index command detected. Modifying.");
    927                 DebugStream.println("Before: " + command_str);
    928                 StringBuffer new_command_str = new StringBuffer(command_type);
    929                 new_command_str.append(SPACE_CHARACTER);
    930                 new_command_str.append(ALLFIELDS_STR);
    931                 new_command_str.append(SPACE_CHARACTER);
    932                 ArrayList known_indexes = new ArrayList();
    933                 while(tokenizer.hasMoreTokens()) {
    934                 String old_index_str = tokenizer.nextToken();
    935                 // If this index is a combination of sources, then we need to remove the old collectionmeta, split up the compound index, then request new metadata be added for each part
    936                 if(old_index_str.indexOf(COMMA_CHARACTER) != -1) {
    937                     obsolete_collectionmeta_names.add(STOP_CHARACTER + old_index_str);
    938                     StringTokenizer string_tokenizer = new StringTokenizer(old_index_str, COMMA_CHARACTER);
    939                     while(string_tokenizer.hasMoreTokens()) {
    940                     String index_fragment_str = string_tokenizer.nextToken();
    941                     if(!known_indexes.contains(index_fragment_str)) {
    942                         known_indexes.add(index_fragment_str);
    943                         new_command_str.append(index_fragment_str);
    944                         new_command_str.append(SPACE_CHARACTER);
    945                         acquired_collectionmeta_names.add(STOP_CHARACTER + index_fragment_str);
    946                     }
    947                     index_fragment_str = null;
    948                     }
    949                     string_tokenizer = null;
    950                 }
    951                 // However if this was just a single index then a little choir of angels sing haleluja because we don't have to do -anything-. Nothing at all. Zip. Well no changes anyway. I obviously had to write this comment, and you can probably see, assuming you are not blind, that there are several lines of code below doing something, which is of course not nothing but something. And if we assume you are blind then you probably can't see the code, but then you probably didn't not see it doing the not nothing I said it would above.
    952                 else {
    953                     if(!known_indexes.contains(old_index_str)) {
    954                     known_indexes.add(old_index_str);
    955                     new_command_str.append(old_index_str);
    956                     new_command_str.append(SPACE_CHARACTER);
    957                     }
    958                     else {
    959                     // Use the collectionmeta for the single index instead of generating a default one
    960                     acquired_collectionmeta_names.remove(STOP_CHARACTER + old_index_str);
    961                     }
    962                 }
    963                 old_index_str = null;
    964                 }
    965                 known_indexes = null;
    966                 command_str = new_command_str.toString();
    967                 new_command_str = null;
    968                 DebugStream.println("After: " + command_str);
    969             }
    970883            command_element = parseIndex(command_str);
    971884            }
     
    973886           
    974887            command_element = parseIndexDefault(command_str);
    975             // If this was a G2.39 config file then we squelch the default index (no such thing in G2.4)
    976             if(is_twopointthreenine) {
    977                 DebugStream.println("G2.39 Default Index command detected. Ignoring.");
    978                 append_element = false;
    979             }
    980888            }
    981889            if(command_element == null && command_type.equals(LANGUAGES_STR)) {
     
    986894            }
    987895            if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
    988             // Again if this is G2.39 we have to do a tiny bit of magic to the levels command. We need to add document level, and change the remainder to lower case.
    989             if(is_twopointthreenine) {
    990                 DebugStream.println("G2.39 Levels command detected. Modifying.");
    991                 DebugStream.println("Before: " + command_str);
    992                 StringBuffer new_command_str = new StringBuffer(command_type);
    993                 new_command_str.append(SPACE_CHARACTER);
    994                 new_command_str.append(DOCUMENT_STR);
    995                 while(tokenizer.hasMoreTokens()) {
    996                 String token = tokenizer.nextToken();
    997                 // Generate a lower case version
    998                 String token_lc = token.toLowerCase();
    999                 // If they are still the same then it is all good baby, otherwise we have to remember to transform their collectionmeta as well
    1000                 if(!token.equals(token_lc)) {
    1001                     changed_collectionmeta_names.put(STOP_CHARACTER + token, STOP_CHARACTER + token_lc);
    1002                 }
    1003                 new_command_str.append(SPACE_CHARACTER);
    1004                 new_command_str.append(token_lc);
    1005                 token_lc = null;
    1006                 token = null;
    1007                 }
    1008                 command_str = new_command_str.toString();
    1009                 new_command_str = null;
    1010                 DebugStream.println("After: " + command_str);
    1011             }
    1012896            command_element = parseLevels(command_str);
    1013897            }
     
    1015899            command_element = parseMetadata(tokenizer); // Revised to handle multiple lines
    1016900            }
    1017             if(command_element == null && (/* command_type.equals(COLLECTIONMETADATA_BETA_STR) || */ command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
     901            if(command_element == null && (command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals(BUILDTYPE_STR))) {
    1018902            command_element = parseMetadataSpecial(command_str);
    1019903            }
     
    1049933        }
    1050934        // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
    1051         if(append_element) {
     935        //if(append_element) {
    1052936            collect_cfg_element.appendChild(command_element);
    1053         }
    1054         }
    1055 
    1056         // We have completed parsing the collect configuration file. Now, if we are dealing with the G2.39 nightmare scenario, it's time to add the SearchType command and modify the collectionmeta commands as necessary.
    1057         if(is_twopointthreenine) {
    1058         Element search_type_element = getSearchType();
    1059         search_type_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
    1060         while(search_type_element.hasChildNodes()) {
    1061             search_type_element.removeChild(search_type_element.getFirstChild());
    1062         }
    1063         Element plain_search_type_element = document.createElement(CONTENT_ELEMENT);
    1064         plain_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[1]);
    1065         search_type_element.appendChild(plain_search_type_element);
    1066         plain_search_type_element = null;
    1067         Element form_search_type_element = document.createElement(CONTENT_ELEMENT);
    1068         form_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
    1069         search_type_element.appendChild(form_search_type_element);
    1070         form_search_type_element = null;
    1071         search_type_element = null;
    1072        
    1073         // Search through the existing collectionmeta
    1074         Element document_element = document.getDocumentElement();
    1075         NodeList collectionmeta_elements = document_element.getElementsByTagName(COLLECTIONMETADATA_ELEMENT);
    1076         DebugStream.println("There are " + obsolete_collectionmeta_names.size() + " collectionmeta to remove.");
    1077         DebugStream.println("There are " + changed_collectionmeta_names.size() + " collectionmeta to change.");
    1078         for(int z = collectionmeta_elements.getLength(); z > 0; z--) {
    1079             Element collectionmeta_element = (Element) collectionmeta_elements.item(z - 1);
    1080             String name = collectionmeta_element.getAttribute(NAME_ATTRIBUTE);
    1081             DebugStream.println("Checking " + name);
    1082             // Remove any obsolete metadata
    1083             if(obsolete_collectionmeta_names.contains(name)) {
    1084             DebugStream.println("G2.39 CollectMeta detected. Removing: " + name);
    1085             document_element.removeChild(collectionmeta_element);
    1086             }
    1087             // We may have been asked to change the index name to lower case
    1088             else if(changed_collectionmeta_names.containsKey(name)) {
    1089             String new_name = (String) changed_collectionmeta_names.get(name);
    1090             DebugStream.println("G2.39 CollectMeta detected. Changing: " + name + " -> " + new_name);
    1091             collectionmeta_element.setAttribute(NAME_ATTRIBUTE, new_name);
    1092             new_name = null;
    1093             }
    1094             name = null;
    1095         }
    1096 
    1097         // Finally add any newly acquired collectionmeta. This general defaults to the collectionmeta name less the full stop
    1098         DebugStream.println("There are " + acquired_collectionmeta_names.size() + " collectionmeta to add.");
    1099         for(int y = 0; y < acquired_collectionmeta_names.size(); y++) {
    1100             String name = (String) acquired_collectionmeta_names.get(y);
    1101             String value = name.substring(1);
    1102             DebugStream.println("G2.39 CollectMeta missing. Adding: " + name + " [l=" + Configuration.getLanguage() + "] \"" + value + "\"");
    1103             Element element = document.createElement(COLLECTIONMETADATA_ELEMENT);
    1104             element.setAttribute(NAME_ATTRIBUTE, name);
    1105             element.setAttribute(LANGUAGE_ATTRIBUTE, Configuration.getLanguage());
    1106             element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
    1107             XMLTools.setValue(element, value);
    1108             document_element.appendChild(element);
    1109             element = null;
    1110             value = null;
    1111             name = null;
    1112         }
    1113 
    1114         document_element = null;
    1115         }
     937            //}
     938        }
     939
    1116940    }
    1117941    catch(Exception exception) {
     
    13121136    return command_element;
    13131137    }
     1138    private Element parseBuildType(String command_str) {
     1139    Element command_element = null;
     1140    try {
     1141        StringTokenizer tokenizer = new StringTokenizer(command_str);
     1142        if(tokenizer.countTokens() >= 2) {
     1143        command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
     1144        command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
     1145        String index_str = tokenizer.nextToken();
     1146        command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
     1147        String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
     1148        StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
     1149        while(content_tokenizer.hasMoreTokens()) {
     1150            Element content_element = document.createElement(CONTENT_ELEMENT);
     1151            content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
     1152            command_element.appendChild(content_element);
     1153            content_element = null;
     1154        }
     1155        content_tokenizer = null;
     1156        content_str = null;
     1157        content_str = null;
     1158        index_str = null;
     1159        }
     1160        tokenizer = null;
     1161    }
     1162    catch (Exception exception) {
     1163    }
     1164    return command_element;
     1165    }
    13141166
    13151167    private Element parseLanguage(String command_str) {
     
    14221274        String name_str = tokenizer.nextToken();
    14231275        String value_str = tokenizer.nextToken();
    1424         // if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
    1425         //     command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
    1426         // }
    14271276        if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
    14281277            command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
     
    14331282        else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
    14341283            command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
     1284        }
     1285        else if (name_str.equals(BUILDTYPE_STR)) {
     1286            command_element = document.createElement(BUILDTYPE_ELEMENT);
    14351287        }
    14361288        if(command_element != null) {
Note: See TracChangeset for help on using the changeset viewer.