- Timestamp:
- 2019-12-07T01:40:13+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java
r33756 r33757 34 34 import org.greenstone.gatherer.util.Utility; 35 35 36 import org.apache.commons.codec.binary.Base64; 37 38 //import org.greenstone.gatherer.feedback.Base64; 36 //import org.greenstone.gatherer.feedback.Base64; // decode() from Base64 didn't work 37 import org.apache.commons.codec.binary.Base64; // decoding from Base64 works 39 38 40 39 /** This class represents one doc.xml file */ … … 58 57 } 59 58 59 /** 60 * Checks if various versions of the file object's filename, denoted relatively by file_relative_path, 61 * occur in the source_file_name_to_description_elements_mapping map 62 */ 63 private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) { 64 ArrayList description_elements_list = null; 65 66 System.err.println("Looking for key " + file_relative_path); 67 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 68 if(description_elements_list != null) { 69 System.err.println(" Found key matching REGULAR filepath: " + file_relative_path); 70 return description_elements_list; 71 } 72 else if(!Utility.isWindows()) { // couldn't find a matching key, we're done 73 System.err.println("Unable to find meta for regular file path form " + file_relative_path); 74 return null; 75 } 76 77 // Now we can try windows short filename as map key 78 79 String win_short_file_relative_path = ""; 80 try{ 81 win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath()); 82 //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path); 83 } catch(Exception e) { // we're done trying to find a matching key 84 System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path); 85 return null; 86 } 87 88 // Got a windows short file name, lop off import folder again 89 int import_index = win_short_file_relative_path.indexOf("import"); 90 if (import_index != -1) { 91 win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1); 92 } 93 94 System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path + "| in map of sourcefilenames to doc.xml's ex meta."); 95 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path); 96 if (description_elements_list != null) { 97 System.err.println(" Found key matching FULL win shortfile path: " + win_short_file_relative_path); 98 return description_elements_list; // found 99 } 100 101 // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path: 102 // - windows shortfilename's rel-dir-path with regular tailname 103 // - and regular rel-dir-path with windows shortfilename's tailname 104 105 String shortFileTailName = win_short_file_relative_path; 106 String shortFileRelDirPath = ""; 107 int lastSep = win_short_file_relative_path.lastIndexOf(File.separator); 108 if(lastSep != -1) { 109 shortFileTailName = win_short_file_relative_path.substring(lastSep+1); 110 shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash 111 } 112 113 String fileTailName = file_relative_path; 114 String fileRelDirPath = ""; 115 lastSep = file_relative_path.lastIndexOf(File.separator); 116 if(lastSep != -1) { 117 fileTailName = file_relative_path.substring(lastSep+1); 118 fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash 119 } 120 121 String path = shortFileRelDirPath + fileTailName; 122 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 123 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 124 125 if(description_elements_list != null) { 126 System.err.println(" Found key matching MIX of win shortfile path and regular path: " + path); 127 return description_elements_list; // found 128 } 129 130 // try the other combination 131 path = fileRelDirPath + shortFileTailName; 132 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 133 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 134 135 if(description_elements_list != null) { 136 System.err.println(" Found key matching MIX of regular path and win shortfile path: " + path); 137 return description_elements_list; // found 138 } 139 140 return description_elements_list; 141 } 142 60 143 61 144 public ArrayList getMetadataExtractedFromFile(File file) … … 70 153 } 71 154 72 ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {73 /// System.err.println("@@@ relFilename: " + relFilename);74 ///}155 for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) { 156 System.err.println("\n@@@ relFilename: " + relFilename); 157 } 75 158 76 159 // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file 77 ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 160 //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 161 ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path); 78 162 if (description_elements_list == null) { 79 // ...it doesn't 80 return metadata_values; 163 // ...it doesn't 164 System.err.println("Unable to find meta for (regular file path form) " + file_relative_path); 165 if(Utility.isWindows()) { 166 System.err.println(" Or for windows shortFile path form, or for combinations with regular file path form"); 167 } 168 return metadata_values; // we're done 81 169 } 82 170 … … 275 363 } 276 364 277 ///System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value);365 System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value); 278 366 // Remember this for quick access later 279 367 if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) { … … 326 414 gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path); 327 415 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list); 328 } 416 } 329 417 } 330 418 catch (FileNotFoundException exception) { … … 379 467 // General info: https://stackoverflow.com/questions/43089541/difference-between-basic-and-url-base64-encoding-in-java-8 380 468 byte[] bytes = Base64.decodeBase64(importFilePathParts[i].getBytes()); 381 ///System.err.println("Got base64 string: " + importFilePathParts[i]);382 ///System.err.println("Decoded from base64 to bytes: " + bytes);469 System.err.println("Got base64 string: " + importFilePathParts[i]); 470 System.err.println("Decoded from base64 to bytes: " + new String(bytes, System.getProperty("file.encoding"))); 383 471 // Using system file.encoding to interpret the resulting bytestring as a String, 384 472 // just as we always did with URL decoding method … … 397 485 decoded_gsdlsourcefilename += file_ext; 398 486 399 ///System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename));487 System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename)); 400 488 401 489 return decoded_gsdlsourcefilename;
Note:
See TracChangeset
for help on using the changeset viewer.