Changeset 33730

Show
Ignore:
Timestamp:
29.11.2019 23:40:14 (7 days ago)
Author:
ak19
Message:

Finally, got the code back to achieving the same thing as the partial bugfix from yesterday to the issue of non-ascii meta filenames not retaining any meta assigned at file level. The difference in today's commit is that the reworking does it Dr Bainbridge's way: instead of hex encoding one value to compare to a hex encoded value, am now ensuring both are decoded hex values when comparing. Adding a bugfix to recently introduced FilenameEncoding?.decodeStringContainingHexEntities() since I forgot to test for the most basic case of all: when a string is not hex encoded, it should return the same string (the bug caused it to return nothing and which caused my rewrite of yesterday's work to look like it wasn't working for so long).

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33728 r33730  
    311311    public static String decodeStringContainingHexEntities(String str) { 
    312312        String result = ""; 
    313         boolean done = false;        
    314313        Pattern hexPattern = Pattern.compile("(&#x[0-9a-zA-Z]{1,4}+;)"); 
    315314        Matcher matcher = hexPattern.matcher(str); 
     
    345344        } 
    346345         
    347         if(endMatchIndex != -1) { 
     346        if(endMatchIndex != -1) { // attach any suffix once we finished processing all the hex codes 
    348347            result += str.substring(endMatchIndex); 
    349348            //System.err.println("suffix: " + str.substring(endMatchIndex)); 
    350349        } 
     350        else { // there were no hex codes to decode, return string as is 
     351            result = str; 
     352        } 
    351353         
    352354        return result; 
     
    355357    /** Attempting to produce the equivalent method fileToURLEncoding() above, but taking a String as input parameter */ 
    356358     public static String fileNameToHex(String filename) { 
    357         /*String filename_url_encoded = ""; 
    358         try { 
    359             URI filename_uri = new URI(filename); 
    360             String filename_ascii = filename_uri.toASCIIString(); 
    361             String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1"); 
    362             filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);  
    363             return filename_url_encoded; 
    364         } catch (Exception e) { 
    365             e.printStackTrace(); 
    366             // Give up trying to convert 
    367             filename_url_encoded = filename;  
    368         } 
    369         return filename_url_encoded; 
    370         */ 
    371          
     359 
    372360        String hexFilename = ""; 
    373361        for(int i = 0; i < filename.length(); i++) { 
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33727 r33730  
    3535import org.w3c.dom.*; 
    3636 
     37import org.greenstone.gatherer.util.Utility; 
    3738 
    3839/** This class represents one metadata.xml file */ 
     
    262263        String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element); 
    263264 
    264         String regexed_file_relative_path = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path); 
    265         //System.err.println("Looking in meta.xml for regexed version of filename: " + regexed_file_relative_path); 
     265        //System.err.println("\n  Original TAIL filename was: " + Utility.debugUnicodeString(file.getName())); 
     266        String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path); 
     267        //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path 
     268            //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));       
    266269     
    267270        // Does this fileset specify metadata for one file only? 
     
    272275        } 
    273276 
    274         String current_filename_element_value_hex = FilenameEncoding.fileNameToHex(current_filename_element_value); 
     277        String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value); 
     278        //System.err.println("   Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value)); 
     279            //System.err.println("   Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value)); 
    275280         
    276281        // This fileset specifies metadata for the file 
    277         // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex, so need to compare with the same 
    278         if (file_relative_path.matches(current_filename_element_value_hex)) { //if (file_relative_path.matches(current_filename_element_value)) { 
    279             //System.err.println("Found a match in meta.xml for file name: " + regexed_file_relative_path); 
     282        // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex 
     283        // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples      
     284        if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) { 
     285            //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path); 
    280286            current_fileset_matches = true; 
    281287            if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { 
     
    283289            } 
    284290            break; 
    285         }        
     291        } //else { 
     292            //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value); 
     293            //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value)); 
     294        //} 
    286295         
    287296        // This fileset specifies metadata for the folder the file is in 
    288         if (regexed_file_relative_path.startsWith(current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) { 
     297        if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) { 
    289298            current_fileset_matches = true; 
    290299            folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value); 
     
    616625    // If we have a file loaded into memory and it has been modified, save it now 
    617626    if (loaded_file != null && loaded_file_changed == true) { 
     627        //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true)); 
     628         
    618629        XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements); 
    619630     
     631        Document doc = XMLTools.parseXMLFile(loaded_file); 
     632        //System.err.println("AT END saveLoadedFile(), PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true)); 
    620633         
    621634        loaded_file_changed = false;