Ignore:
Timestamp:
2019-12-03T17:31:17+13:00 (4 years ago)
Author:
ak19
Message:
  1. Bugfix for dealing with + in filenames: file-level metadata now sticks and also ends up in doc.xml on build, as should happen. 2. Better (more optimal) bugfix for & in filenames, to get metadata to still stick after yesterday's first bugfix for this. Sadly, the improved code no longer needs the new function I introduced yesterday (escapeAllCharWithHexEntity). Leaving the function in, in case it ever comes in handy or as an idea. 3. Refactoring some code. 4. Removed some debugging statements. But some things are still largely commented out. Will remove hereafter.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33744 r33746  
    668668        }
    669669       
    670         String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding(".");
    671        
    672         //String curr_directory_path = FilenameEncoding.filenameToURLEncoding(".");
    673         //curr_directory_path = curr_directory_path.substring(0, curr_directory_path.length()-2); // cut off /. at end
    674         System.err.println("@@@ curr_directory_path: " + curr_directory_path);
     670        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end       
     671        //System.err.println("@@@ curr_directory_path: " + curr_directory_path);
    675672       
    676673        //System.err.println("PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true));
     
    689686                    // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
    690687                   
    691                     System.err.println("Filename before reencoding was: " + filename);                 
     688                    //System.err.println("Filename before reencoding was: " + filename);                   
    692689                   
    693690                    // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
    694691                    // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
    695                     String encoded_filename = filename.replace("\\", "%5C");
    696                    
    697                     /*
    698                     encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename);
    699                     // now lop off the metadataxml dir prefix the FilenameEncoding.filenameToURLEncoding(STRING) variant would have added
    700                     encoded_filename = encoded_filename.substring(curr_directory_path.length());
    701                     if (encoded_filename.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
    702                         encoded_filename = encoded_filename.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
    703                     }
    704                     */
     692                    String encoded_filename = filename.replace("\\", "%5C");
    705693                   
    706694                    // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed
     
    708696                   
    709697                    // Reintrodudce the backslash characters in place of their %5C hex placeholders
    710                     encoded_filename = encoded_filename.replace("%5C", "\\");               
     698                    encoded_filename = encoded_filename.replace("%5C", "\\");
    711699                   
    712700                    // Update filename element in DOM
    713701                    XMLTools.setElementTextValue(filename_element, encoded_filename);
    714                     System.err.println("Filename after reencoding was: " + encoded_filename);
     702                    //System.err.println("Filename after reencoding was: " + encoded_filename);
    715703                }
    716704            }
Note: See TracChangeset for help on using the changeset viewer.