Changeset 9908
- Timestamp:
- 2005-05-18T17:15:28+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java
r9036 r9908 242 242 return self.metadataToString(command_element, show_extracted_namespace); 243 243 } 244 // else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {245 //return self.metadataToString(command_element, show_extracted_namespace);246 //}244 else if (command_element_name.equals(BUILDTYPE_ELEMENT)) { 245 return self.metadataToString(command_element, show_extracted_namespace); 246 } 247 247 else if(command_element_name.equals(PLUGIN_ELEMENT)) { 248 248 return self.pluginToString(command_element, show_extracted_namespace); … … 310 310 311 311 /** Gives the preferred ordering of commands */ 312 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, /* StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, */ StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};312 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.BUILDTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT}; 313 313 314 314 /** ************************** Public Data Members ***************************/ … … 316 316 /** ************************** Private Data Members ***************************/ 317 317 318 /** Is the configuration file currently being read in one of the infamous G2.39 ones. */319 private boolean is_twopointthreenine = false;320 318 private File collect_config_file; 321 319 … … 358 356 } 359 357 360 // public Element getBeta() {361 // Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);362 // element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);363 // element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);364 // return element;365 // }366 358 367 359 public Element getCreator() { … … 412 404 } 413 405 406 public Element getBuildType() { 407 Element element = getOrCreateElementByTagName(BUILDTYPE_ELEMENT, null, null); 408 element.setAttribute(NAME_ATTRIBUTE, BUILDTYPE_STR); 409 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR); 410 return element; 411 412 } 414 413 /** Retrieve or create the searchtype element. */ 415 414 public Element getSearchType() { … … 479 478 //FileWriter file_writer = new FileWriter(collect_config_file, false); 480 479 BufferedWriter buffered_writer = new BufferedWriter(file_writer); 481 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).482 // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata??483 // Yes we can. Lets see..484 // known_metadata = new ArrayList(); 'blamo'485 480 Element collect_config_element = document.getDocumentElement(); 486 481 NodeList command_elements = collect_config_element.getChildNodes(); … … 513 508 } 514 509 buffered_writer.close(); 515 // known_metadata = null; 'poof'516 510 } 517 511 catch (Exception exception) { … … 782 776 String name_str = command_element.getAttribute(NAME_ATTRIBUTE); 783 777 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons. 784 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR) ) {778 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals(BUILDTYPE_STR) ) { 785 779 text.append(name_str); 786 780 text.append(TAB_CHARACTER); 787 781 special = true; 788 782 } 789 else if ( /* name_str.equals(COLLECTIONMETADATA_BETA_STR) || */ name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {783 else if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR) ) { 790 784 text.append(name_str); 791 785 text.append(TAB_CHARACTER); … … 799 793 text.append(SPACE_CHARACTER); 800 794 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE); 801 // If this is element is in english, and it is the first one found, we don't need to write the language argument.802 //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {803 // changed so that we always write the language string804 795 text.append(LBRACKET_CHARACTER); 805 796 text.append(LANGUAGE_ARGUMENT); … … 807 798 text.append(RBRACKET_CHARACTER); 808 799 text.append(SPACE_CHARACTER); 809 //} 800 810 801 if(known_metadata != null) { 811 802 known_metadata.add(name_str); … … 837 828 } 838 829 839 /** Parse a collect.cfg into a DOM model representation. */ 830 /** Parse a collect.cfg into a DOM model representation. 831 * note we are ignoring 2.39 compatibility now. */ 840 832 private void parse(File collect_config_file) { 841 833 try { 842 ArrayList acquired_collectionmeta_names = null;843 ArrayList obsolete_collectionmeta_names = null;844 HashMap changed_collectionmeta_names = null;845 846 // Life is made oh-so-more tricky by the existance of G2.39 config files. There are two ways to handle them:847 // 1. Notice that the file is G2.39 from the start, then as I parse it magic it into G2.4 standard848 // 2. Extend the parsing method to handle reading in G2.39, then afterwards go through the DOM changing it to G2.4 as appropriate.849 // As far as I can tell the second option is twice as much work, so I'll try option 1. The problem here is that I have to determine if the 'buildtype' command is somewhere in the collect.cfg file, which means I'm going to have to read the file twice - once seaching for 'buildtype' and the second time to parse it.850 851 // Search for 'buildtype mgpp'852 InputStream input_stream_one = new FileInputStream(collect_config_file);853 Reader reader_one = new InputStreamReader(input_stream_one, ENCODING);854 BufferedReader buffered_reader_one = new BufferedReader(reader_one);855 String search_line_str = null;856 while(!is_twopointthreenine && (search_line_str = buffered_reader_one.readLine()) != null) {857 if(search_line_str.toLowerCase().indexOf(BUILDTYPE_STR) != -1) {858 is_twopointthreenine = true;859 acquired_collectionmeta_names = new ArrayList();860 changed_collectionmeta_names = new HashMap();861 obsolete_collectionmeta_names = new ArrayList();862 }863 }864 buffered_reader_one.close();865 reader_one.close();866 input_stream_one.close();867 buffered_reader_one = null;868 reader_one = null;869 input_stream_one = null;870 834 871 835 Element collect_cfg_element = document.getDocumentElement(); … … 910 874 String command_type = tokenizer.nextToken().toLowerCase(); 911 875 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created 912 if(command_element == null && command_type.equals(BUILDTYPE_STR)) {913 DebugStream.println("G2.39 Buildtype command detected. Ignoring.");914 command_element = document.createElement(UNKNOWN_ELEMENT);915 append_element = false;916 }917 876 if(command_element == null && command_type.equals(CLASSIFY_STR)) { 918 877 command_element = parseClassify(command_str); … … 922 881 } 923 882 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) { 924 // If this was a G2.39 config file then we manipulate the command string a bit before we submit it to the parser. We start by adding allfields as the first index. We then space separate the remaining indexes, and remove duplicates when encountered. Of course before we do any of that we record the various space separated indexes so that we can remove the collection meta assigned to them.925 if(is_twopointthreenine) {926 DebugStream.println("G2.39 Index command detected. Modifying.");927 DebugStream.println("Before: " + command_str);928 StringBuffer new_command_str = new StringBuffer(command_type);929 new_command_str.append(SPACE_CHARACTER);930 new_command_str.append(ALLFIELDS_STR);931 new_command_str.append(SPACE_CHARACTER);932 ArrayList known_indexes = new ArrayList();933 while(tokenizer.hasMoreTokens()) {934 String old_index_str = tokenizer.nextToken();935 // If this index is a combination of sources, then we need to remove the old collectionmeta, split up the compound index, then request new metadata be added for each part936 if(old_index_str.indexOf(COMMA_CHARACTER) != -1) {937 obsolete_collectionmeta_names.add(STOP_CHARACTER + old_index_str);938 StringTokenizer string_tokenizer = new StringTokenizer(old_index_str, COMMA_CHARACTER);939 while(string_tokenizer.hasMoreTokens()) {940 String index_fragment_str = string_tokenizer.nextToken();941 if(!known_indexes.contains(index_fragment_str)) {942 known_indexes.add(index_fragment_str);943 new_command_str.append(index_fragment_str);944 new_command_str.append(SPACE_CHARACTER);945 acquired_collectionmeta_names.add(STOP_CHARACTER + index_fragment_str);946 }947 index_fragment_str = null;948 }949 string_tokenizer = null;950 }951 // However if this was just a single index then a little choir of angels sing haleluja because we don't have to do -anything-. Nothing at all. Zip. Well no changes anyway. I obviously had to write this comment, and you can probably see, assuming you are not blind, that there are several lines of code below doing something, which is of course not nothing but something. And if we assume you are blind then you probably can't see the code, but then you probably didn't not see it doing the not nothing I said it would above.952 else {953 if(!known_indexes.contains(old_index_str)) {954 known_indexes.add(old_index_str);955 new_command_str.append(old_index_str);956 new_command_str.append(SPACE_CHARACTER);957 }958 else {959 // Use the collectionmeta for the single index instead of generating a default one960 acquired_collectionmeta_names.remove(STOP_CHARACTER + old_index_str);961 }962 }963 old_index_str = null;964 }965 known_indexes = null;966 command_str = new_command_str.toString();967 new_command_str = null;968 DebugStream.println("After: " + command_str);969 }970 883 command_element = parseIndex(command_str); 971 884 } … … 973 886 974 887 command_element = parseIndexDefault(command_str); 975 // If this was a G2.39 config file then we squelch the default index (no such thing in G2.4)976 if(is_twopointthreenine) {977 DebugStream.println("G2.39 Default Index command detected. Ignoring.");978 append_element = false;979 }980 888 } 981 889 if(command_element == null && command_type.equals(LANGUAGES_STR)) { … … 986 894 } 987 895 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) { 988 // Again if this is G2.39 we have to do a tiny bit of magic to the levels command. We need to add document level, and change the remainder to lower case.989 if(is_twopointthreenine) {990 DebugStream.println("G2.39 Levels command detected. Modifying.");991 DebugStream.println("Before: " + command_str);992 StringBuffer new_command_str = new StringBuffer(command_type);993 new_command_str.append(SPACE_CHARACTER);994 new_command_str.append(DOCUMENT_STR);995 while(tokenizer.hasMoreTokens()) {996 String token = tokenizer.nextToken();997 // Generate a lower case version998 String token_lc = token.toLowerCase();999 // If they are still the same then it is all good baby, otherwise we have to remember to transform their collectionmeta as well1000 if(!token.equals(token_lc)) {1001 changed_collectionmeta_names.put(STOP_CHARACTER + token, STOP_CHARACTER + token_lc);1002 }1003 new_command_str.append(SPACE_CHARACTER);1004 new_command_str.append(token_lc);1005 token_lc = null;1006 token = null;1007 }1008 command_str = new_command_str.toString();1009 new_command_str = null;1010 DebugStream.println("After: " + command_str);1011 }1012 896 command_element = parseLevels(command_str); 1013 897 } … … 1015 899 command_element = parseMetadata(tokenizer); // Revised to handle multiple lines 1016 900 } 1017 if(command_element == null && ( /* command_type.equals(COLLECTIONMETADATA_BETA_STR) || */ command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {901 if(command_element == null && (command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals(BUILDTYPE_STR))) { 1018 902 command_element = parseMetadataSpecial(command_str); 1019 903 } … … 1049 933 } 1050 934 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it 1051 if(append_element) {935 //if(append_element) { 1052 936 collect_cfg_element.appendChild(command_element); 1053 } 1054 } 1055 1056 // We have completed parsing the collect configuration file. Now, if we are dealing with the G2.39 nightmare scenario, it's time to add the SearchType command and modify the collectionmeta commands as necessary. 1057 if(is_twopointthreenine) { 1058 Element search_type_element = getSearchType(); 1059 search_type_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); 1060 while(search_type_element.hasChildNodes()) { 1061 search_type_element.removeChild(search_type_element.getFirstChild()); 1062 } 1063 Element plain_search_type_element = document.createElement(CONTENT_ELEMENT); 1064 plain_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[1]); 1065 search_type_element.appendChild(plain_search_type_element); 1066 plain_search_type_element = null; 1067 Element form_search_type_element = document.createElement(CONTENT_ELEMENT); 1068 form_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]); 1069 search_type_element.appendChild(form_search_type_element); 1070 form_search_type_element = null; 1071 search_type_element = null; 1072 1073 // Search through the existing collectionmeta 1074 Element document_element = document.getDocumentElement(); 1075 NodeList collectionmeta_elements = document_element.getElementsByTagName(COLLECTIONMETADATA_ELEMENT); 1076 DebugStream.println("There are " + obsolete_collectionmeta_names.size() + " collectionmeta to remove."); 1077 DebugStream.println("There are " + changed_collectionmeta_names.size() + " collectionmeta to change."); 1078 for(int z = collectionmeta_elements.getLength(); z > 0; z--) { 1079 Element collectionmeta_element = (Element) collectionmeta_elements.item(z - 1); 1080 String name = collectionmeta_element.getAttribute(NAME_ATTRIBUTE); 1081 DebugStream.println("Checking " + name); 1082 // Remove any obsolete metadata 1083 if(obsolete_collectionmeta_names.contains(name)) { 1084 DebugStream.println("G2.39 CollectMeta detected. Removing: " + name); 1085 document_element.removeChild(collectionmeta_element); 1086 } 1087 // We may have been asked to change the index name to lower case 1088 else if(changed_collectionmeta_names.containsKey(name)) { 1089 String new_name = (String) changed_collectionmeta_names.get(name); 1090 DebugStream.println("G2.39 CollectMeta detected. Changing: " + name + " -> " + new_name); 1091 collectionmeta_element.setAttribute(NAME_ATTRIBUTE, new_name); 1092 new_name = null; 1093 } 1094 name = null; 1095 } 1096 1097 // Finally add any newly acquired collectionmeta. This general defaults to the collectionmeta name less the full stop 1098 DebugStream.println("There are " + acquired_collectionmeta_names.size() + " collectionmeta to add."); 1099 for(int y = 0; y < acquired_collectionmeta_names.size(); y++) { 1100 String name = (String) acquired_collectionmeta_names.get(y); 1101 String value = name.substring(1); 1102 DebugStream.println("G2.39 CollectMeta missing. Adding: " + name + " [l=" + Configuration.getLanguage() + "] \"" + value + "\""); 1103 Element element = document.createElement(COLLECTIONMETADATA_ELEMENT); 1104 element.setAttribute(NAME_ATTRIBUTE, name); 1105 element.setAttribute(LANGUAGE_ATTRIBUTE, Configuration.getLanguage()); 1106 element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR); 1107 XMLTools.setValue(element, value); 1108 document_element.appendChild(element); 1109 element = null; 1110 value = null; 1111 name = null; 1112 } 1113 1114 document_element = null; 1115 } 937 //} 938 } 939 1116 940 } 1117 941 catch(Exception exception) { … … 1312 1136 return command_element; 1313 1137 } 1138 private Element parseBuildType(String command_str) { 1139 Element command_element = null; 1140 try { 1141 StringTokenizer tokenizer = new StringTokenizer(command_str); 1142 if(tokenizer.countTokens() >= 2) { 1143 command_element = document.createElement(INDEX_DEFAULT_ELEMENT); 1144 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR)); 1145 String index_str = tokenizer.nextToken(); 1146 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER))); 1147 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1); 1148 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER); 1149 while(content_tokenizer.hasMoreTokens()) { 1150 Element content_element = document.createElement(CONTENT_ELEMENT); 1151 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken()); 1152 command_element.appendChild(content_element); 1153 content_element = null; 1154 } 1155 content_tokenizer = null; 1156 content_str = null; 1157 content_str = null; 1158 index_str = null; 1159 } 1160 tokenizer = null; 1161 } 1162 catch (Exception exception) { 1163 } 1164 return command_element; 1165 } 1314 1166 1315 1167 private Element parseLanguage(String command_str) { … … 1422 1274 String name_str = tokenizer.nextToken(); 1423 1275 String value_str = tokenizer.nextToken(); 1424 // if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {1425 // command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);1426 // }1427 1276 if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) { 1428 1277 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT); … … 1433 1282 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) { 1434 1283 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT); 1284 } 1285 else if (name_str.equals(BUILDTYPE_STR)) { 1286 command_element = document.createElement(BUILDTYPE_ELEMENT); 1435 1287 } 1436 1288 if(command_element != null) {
Note:
See TracChangeset
for help on using the changeset viewer.