Changeset 33730 for main


Ignore:
Timestamp:
2019-11-29T23:40:14+13:00 (4 years ago)
Author:
ak19
Message:

Finally, got the code back to achieving the same thing as the partial bugfix from yesterday to the issue of non-ascii meta filenames not retaining any meta assigned at file level. The difference in today's commit is that the reworking does it Dr Bainbridge's way: instead of hex encoding one value to compare to a hex encoded value, am now ensuring both are decoded hex values when comparing. Adding a bugfix to recently introduced FilenameEncoding.decodeStringContainingHexEntities() since I forgot to test for the most basic case of all: when a string is not hex encoded, it should return the same string (the bug caused it to return nothing and which caused my rewrite of yesterday's work to look like it wasn't working for so long).

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33728 r33730  
    311311    public static String decodeStringContainingHexEntities(String str) {
    312312        String result = "";
    313         boolean done = false;       
    314313        Pattern hexPattern = Pattern.compile("(&#x[0-9a-zA-Z]{1,4}+;)");
    315314        Matcher matcher = hexPattern.matcher(str);
     
    345344        }
    346345       
    347         if(endMatchIndex != -1) {
     346        if(endMatchIndex != -1) { // attach any suffix once we finished processing all the hex codes
    348347            result += str.substring(endMatchIndex);
    349348            //System.err.println("suffix: " + str.substring(endMatchIndex));
    350349        }
     350        else { // there were no hex codes to decode, return string as is
     351            result = str;
     352        }
    351353       
    352354        return result;
     
    355357    /** Attempting to produce the equivalent method fileToURLEncoding() above, but taking a String as input parameter */
    356358     public static String fileNameToHex(String filename) {
    357         /*String filename_url_encoded = "";
    358         try {
    359             URI filename_uri = new URI(filename);
    360             String filename_ascii = filename_uri.toASCIIString();
    361             String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
    362             filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
    363             return filename_url_encoded;
    364         } catch (Exception e) {
    365             e.printStackTrace();
    366             // Give up trying to convert
    367             filename_url_encoded = filename;
    368         }
    369         return filename_url_encoded;
    370         */
    371        
     359
    372360        String hexFilename = "";
    373361        for(int i = 0; i < filename.length(); i++) {
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33727 r33730  
    3535import org.w3c.dom.*;
    3636
     37import org.greenstone.gatherer.util.Utility;
    3738
    3839/** This class represents one metadata.xml file */
     
    262263        String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
    263264
    264         String regexed_file_relative_path = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
    265         //System.err.println("Looking in meta.xml for regexed version of filename: " + regexed_file_relative_path);
     265        //System.err.println("\n  Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
     266        String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
     267        //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
     268            //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));     
    266269   
    267270        // Does this fileset specify metadata for one file only?
     
    272275        }
    273276
    274         String current_filename_element_value_hex = FilenameEncoding.fileNameToHex(current_filename_element_value);
     277        String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
     278        //System.err.println("   Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
     279            //System.err.println("   Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
    275280       
    276281        // This fileset specifies metadata for the file
    277         // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex, so need to compare with the same
    278         if (file_relative_path.matches(current_filename_element_value_hex)) { //if (file_relative_path.matches(current_filename_element_value)) {
    279             //System.err.println("Found a match in meta.xml for file name: " + regexed_file_relative_path);
     282        // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
     283        // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples     
     284        if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
     285            //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path);
    280286            current_fileset_matches = true;
    281287            if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
     
    283289            }
    284290            break;
    285         }       
     291        } //else {
     292            //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
     293            //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
     294        //}
    286295       
    287296        // This fileset specifies metadata for the folder the file is in
    288         if (regexed_file_relative_path.startsWith(current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
     297        if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
    289298            current_fileset_matches = true;
    290299            folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
     
    616625    // If we have a file loaded into memory and it has been modified, save it now
    617626    if (loaded_file != null && loaded_file_changed == true) {
     627        //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
     628       
    618629        XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
    619630   
     631        Document doc = XMLTools.parseXMLFile(loaded_file);
     632        //System.err.println("AT END saveLoadedFile(), PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true));
    620633       
    621634        loaded_file_changed = false;
Note: See TracChangeset for help on using the changeset viewer.