Changeset 33738

Show
Ignore:
Timestamp:
02.12.2019 20:43:20 (4 days ago)
Author:
ak19
Message:

Got the filenameToURLEncoding(String) variant that reuses fileToURLEncoding(File) to work now. It just needed the current directory path (whatever . resolves to) to be removed from the String filepath returned, something Dr Bainbridge had anticipated could also happen with new URI() but that didn't happen there but does happen with file.toURI() as he had also expected.

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33737 r33738  
    376376     
    377377    // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 
    378     public static String filenameToURLEncoding(String filename) { 
     378    public static String UNUSED_filenameToURLEncoding(String filename) { 
    379379        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { 
    380380            return filename; 
     
    424424     
    425425    // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter     
    426     public static String _filenameToURLEncoding(String filename) { 
     426    public static String filenameToURLEncoding(String filename) { 
    427427        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 
    428428            return filename; 
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33737 r33738  
    287287        // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples      
    288288        if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) { 
    289             //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path); 
     289            //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n"); 
    290290            current_fileset_matches = true; 
    291291            if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { 
     
    668668        } 
    669669         
     670        String metadata_xml_file_directory_path = FilenameEncoding.filenameToURLEncoding("."); 
     671        metadata_xml_file_directory_path = metadata_xml_file_directory_path.substring(0, metadata_xml_file_directory_path.length()-2); // cut off /. at end 
     672        System.err.println("@@@ metadata_xml_file_directory_path: " + metadata_xml_file_directory_path); 
     673         
    670674        //System.err.println("PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true)); 
    671675         
     
    681685                String filename = XMLTools.getElementTextValue(filename_element); 
    682686                if(!filename.equals(DIRECTORY_FILENAME)) { 
    683                     //System.err.println("Filename before reencoding was: " + filename); 
    684                     // reencode filename                     
    685                     // can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object 
    686                     // created by filenameToURLEncoding). 
     687                    // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements 
     688                     
     689                    //System.err.println("Filename before reencoding was: " + filename);                     
     690                     
     691                    // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object 
     692                    // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C. 
    687693                    String encoded_filename = filename.replace("\\", "%5C");  
    688694                    encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename); 
    689                     // escape chars for regex again 
    690                     encoded_filename = encoded_filename.replace("%5C", "\\"); 
     695 
     696                    // now lop off the metadataxml dir prefix the FilenameEncoding.filenameToURLEncoding(STRING) variant would have added 
     697                    encoded_filename = encoded_filename.substring(metadata_xml_file_directory_path.length()); 
     698                    if (encoded_filename.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { 
     699                        encoded_filename = encoded_filename.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); 
     700                    } 
     701     
     702                    // Reintrodudce the backslash characters in place of their %5C hex placeholders 
     703                    encoded_filename = encoded_filename.replace("%5C", "\\");                
     704                     
     705                    // Update filename element in DOM 
    691706                    XMLTools.setElementTextValue(filename_element, encoded_filename); 
    692707                    //System.err.println("Filename after reencoding was: " + encoded_filename);