Changeset 33738


Ignore:
Timestamp:
2019-12-02T20:43:20+13:00 (4 years ago)
Author:
ak19
Message:

Got the filenameToURLEncoding(String) variant that reuses fileToURLEncoding(File) to work now. It just needed the current directory path (whatever . resolves to) to be removed from the String filepath returned, something Dr Bainbridge had anticipated could also happen with new URI() but that didn't happen there but does happen with file.toURI() as he had also expected.

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33737 r33738  
    376376   
    377377    // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter
    378     public static String filenameToURLEncoding(String filename) {
     378    public static String UNUSED_filenameToURLEncoding(String filename) {
    379379        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
    380380            return filename;
     
    424424   
    425425    // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter   
    426     public static String _filenameToURLEncoding(String filename) {
     426    public static String filenameToURLEncoding(String filename) {
    427427        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
    428428            return filename;
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33737 r33738  
    287287        // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples     
    288288        if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
    289             //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path);
     289            //System.err.println("   @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n");
    290290            current_fileset_matches = true;
    291291            if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
     
    668668        }
    669669       
     670        String metadata_xml_file_directory_path = FilenameEncoding.filenameToURLEncoding(".");
     671        metadata_xml_file_directory_path = metadata_xml_file_directory_path.substring(0, metadata_xml_file_directory_path.length()-2); // cut off /. at end
     672        System.err.println("@@@ metadata_xml_file_directory_path: " + metadata_xml_file_directory_path);
     673       
    670674        //System.err.println("PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true));
    671675       
     
    681685                String filename = XMLTools.getElementTextValue(filename_element);
    682686                if(!filename.equals(DIRECTORY_FILENAME)) {
    683                     //System.err.println("Filename before reencoding was: " + filename);
    684                     // reencode filename                   
    685                     // can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
    686                     // created by filenameToURLEncoding).
     687                    // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
     688                   
     689                    //System.err.println("Filename before reencoding was: " + filename);                   
     690                   
     691                    // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
     692                    // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
    687693                    String encoded_filename = filename.replace("\\", "%5C");
    688694                    encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename);
    689                     // escape chars for regex again
    690                     encoded_filename = encoded_filename.replace("%5C", "\\");
     695
     696                    // now lop off the metadataxml dir prefix the FilenameEncoding.filenameToURLEncoding(STRING) variant would have added
     697                    encoded_filename = encoded_filename.substring(metadata_xml_file_directory_path.length());
     698                    if (encoded_filename.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     699                        encoded_filename = encoded_filename.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
     700                    }
     701   
     702                    // Reintrodudce the backslash characters in place of their %5C hex placeholders
     703                    encoded_filename = encoded_filename.replace("%5C", "\\");               
     704                   
     705                    // Update filename element in DOM
    691706                    XMLTools.setElementTextValue(filename_element, encoded_filename);
    692707                    //System.err.println("Filename after reencoding was: " + encoded_filename);
Note: See TracChangeset for help on using the changeset viewer.