Changeset 5817


Ignore:
Timestamp:
2003-11-11T11:17:34+13:00 (20 years ago)
Author:
kjdon
Message:

changed the metadata parsing to look over multiple lines if starts with a quote but doesn't end with a quote - needed for legacy collections. also made it always write out a [l=en] qualifier for metadata even if its english.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java

    r5757 r5817  
    757757        String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
    758758        // If this is element is in english, and it is the first one found, we don't need to write the language argument.
    759         if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
    760             text.append(LBRACKET_CHARACTER);
    761             text.append(LANGUAGE_ARGUMENT);
    762             text.append(language_str);
    763             text.append(RBRACKET_CHARACTER);
    764             text.append(SPACE_CHARACTER);
    765         }
     759        // we should always write the language string
     760        //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
     761        text.append(LBRACKET_CHARACTER);
     762        text.append(LANGUAGE_ARGUMENT);
     763        text.append(language_str);
     764        text.append(RBRACKET_CHARACTER);
     765        text.append(SPACE_CHARACTER);
     766            //}
    766767        if(known_metadata != null) {
    767768            known_metadata.add(name_str);
     
    797798    try {
    798799        Element collect_cfg_element = document.getDocumentElement();
    799         // Read in the file command at a time.
     800        // Read in the file one command at a time.
    800801        FileReader in_reader = new FileReader(collect_config_file);
    801802        BufferedReader in = new BufferedReader(in_reader);
     
    856857            }
    857858            if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
    858             command_element = parseMetadata(command_str);
     859            // collectionmeta may go over more than one line, so
     860            // pass in the reader
     861            command_element = parseMetadata(command_str, in);
    859862            }
    860863            if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) || command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
     
    11351138    }
    11361139
    1137     private Element parseMetadata(String command_str) {
     1140    private Element parseMetadata(String command_str, BufferedReader in) {
    11381141    Element command_element = null;
    11391142    try {
     
    11451148        String name_str = tokenizer.nextToken();
    11461149        String value_str = tokenizer.nextToken();
    1147         String language_str = "en"; // By default
     1150        String language_str = "en"; // By default - why do we assume English???
    11481151        // Check if the value string is actually a language string
    11491152        if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
     
    11511154            value_str = tokenizer.nextToken();
    11521155        }
    1153         // Remove any speech marks from value
    1154         if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) {
    1155             value_str = value_str.substring(1, value_str.length() - 1);
    1156         }
     1156
     1157        // now we need to handle the case where the value is enclosed in quotes (single or double) and may extend across multiple lines
     1158        String start_string = value_str.substring(0,1);
     1159        if (start_string.equals("\"") || start_string.equals("\'")) {
     1160
     1161            if (value_str.endsWith(start_string) && value_str.length()!=1) {
     1162            // we remove the quotes from the ends
     1163            value_str = value_str.substring(1, value_str.length() - 1);
     1164            } else {
     1165           
     1166            Gatherer.println("cdm.CollectionConfiguration.parseMEtadata: we have found a starting quote but not an ending quote, so now read lines until we get to an end quote");
     1167            StringBuffer value_raw = new StringBuffer(value_str.substring(1));
     1168            int pos = value_raw.indexOf(start_string);
     1169            int old_pos = 0;
     1170            while (pos != -1 && value_raw.charAt(pos-1)=='\\') {
     1171                old_pos = pos+1;
     1172                pos = value_raw.indexOf(start_string, old_pos);
     1173            }
     1174            while(pos == -1) {
     1175                String next_line = in.readLine();
     1176                if(next_line != null) {
     1177                value_raw.append(next_line);
     1178                value_raw.append(StaticStrings.NEW_LINE_CHAR);
     1179                }
     1180                next_line = null;
     1181                pos = value_raw.indexOf(start_string, old_pos);
     1182                while (pos != -1 && value_raw.charAt(pos-1)=='\\') {
     1183                old_pos = pos+1;
     1184                pos = value_raw.indexOf(start_string, old_pos);
     1185                }
     1186            }
     1187           
     1188            value_str = value_raw.substring(0, value_raw.lastIndexOf(start_string));
     1189            value_raw = null;
     1190           
     1191            } // else
     1192        } // if starts with a quote
     1193                   
    11571194        if(value_str != null) {
    11581195            // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
Note: See TracChangeset for help on using the changeset viewer.