Changeset 33738
- Timestamp:
- 2019-12-02T20:43:20+13:00 (4 years ago)
- Location:
- main/trunk/gli/src/org/greenstone/gatherer/metadata
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java
r33737 r33738 376 376 377 377 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 378 public static String filenameToURLEncoding(String filename) {378 public static String UNUSED_filenameToURLEncoding(String filename) { 379 379 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { 380 380 return filename; … … 424 424 425 425 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 426 public static String _filenameToURLEncoding(String filename) {426 public static String filenameToURLEncoding(String filename) { 427 427 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 428 428 return filename; -
main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java
r33737 r33738 287 287 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples 288 288 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) { 289 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path );289 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n"); 290 290 current_fileset_matches = true; 291 291 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) { … … 668 668 } 669 669 670 String metadata_xml_file_directory_path = FilenameEncoding.filenameToURLEncoding("."); 671 metadata_xml_file_directory_path = metadata_xml_file_directory_path.substring(0, metadata_xml_file_directory_path.length()-2); // cut off /. at end 672 System.err.println("@@@ metadata_xml_file_directory_path: " + metadata_xml_file_directory_path); 673 670 674 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true)); 671 675 … … 681 685 String filename = XMLTools.getElementTextValue(filename_element); 682 686 if(!filename.equals(DIRECTORY_FILENAME)) { 683 //System.err.println("Filename before reencoding was: " + filename); 684 // reencode filename 685 // can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object 686 // created by filenameToURLEncoding). 687 // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements 688 689 //System.err.println("Filename before reencoding was: " + filename); 690 691 // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object 692 // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C. 687 693 String encoded_filename = filename.replace("\\", "%5C"); 688 694 encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename); 689 // escape chars for regex again 690 encoded_filename = encoded_filename.replace("%5C", "\\"); 695 696 // now lop off the metadataxml dir prefix the FilenameEncoding.filenameToURLEncoding(STRING) variant would have added 697 encoded_filename = encoded_filename.substring(metadata_xml_file_directory_path.length()); 698 if (encoded_filename.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { 699 encoded_filename = encoded_filename.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); 700 } 701 702 // Reintrodudce the backslash characters in place of their %5C hex placeholders 703 encoded_filename = encoded_filename.replace("%5C", "\\"); 704 705 // Update filename element in DOM 691 706 XMLTools.setElementTextValue(filename_element, encoded_filename); 692 707 //System.err.println("Filename after reencoding was: " + encoded_filename);
Note:
See TracChangeset
for help on using the changeset viewer.