- Timestamp:
- 2020-09-15T20:26:19+12:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java
r33758 r34394 41 41 public abstract class DocXMLFile extends File 42 42 { 43 static boolean isWin = Utility.isWindows(); 44 // For Linux, we continue using gsdlsourcefilename as key to the metadata mapping 45 // For Windows, we use the hex encoded long file paths as key 46 static String GSDL_SOURCE_FILE_METANAME = isWin ? "gsdlfullsourcepath" : "gsdlsourcefilename"; 47 43 48 protected HashMap source_file_name_to_description_elements_mapping = new HashMap(); 44 49 45 50 protected final String MetadataWrap; 46 51 protected final String MetadataItem; … … 57 62 } 58 63 59 /** 60 * Checks if various versions of the file object's filename, denoted relatively by file_relative_path, 61 * occur in the source_file_name_to_description_elements_mapping map 62 */ 63 private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) { 64 ArrayList description_elements_list = null; 65 66 ///System.err.println("Looking for key " + file_relative_path); 67 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 68 if(description_elements_list != null) { 69 ///System.err.println(" Found key matching REGULAR filepath: " + file_relative_path); 70 return description_elements_list; 71 } 72 else if(!Utility.isWindows()) { // couldn't find a matching key, we're done 73 ///System.err.println("Unable to find ex.meta for regular file path form " + file_relative_path); 74 return null; 75 } 76 77 // Now we can try windows short filename as map key 78 79 String win_short_file_relative_path = ""; 80 try{ 81 win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath()); 82 //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path); 83 } catch(Exception e) { // we're done trying to find a matching key 84 System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path); 85 return null; 86 } 87 88 // Got a windows short file name, lop off import folder again 89 int import_index = win_short_file_relative_path.indexOf("import"); 90 if (import_index != -1) { 91 win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1); 92 } 93 94 ///System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path + "| in map of sourcefilenames to doc.xml's ex meta."); 95 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path); 96 if (description_elements_list != null) { 97 ///System.err.println(" Found key matching FULL win shortfile path: " + win_short_file_relative_path); 98 return description_elements_list; // found 99 } 100 101 // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path: 102 // - windows shortfilename's rel-dir-path with regular tailname 103 // - and regular rel-dir-path with windows shortfilename's tailname 104 105 String shortFileTailName = win_short_file_relative_path; 106 String shortFileRelDirPath = ""; 107 int lastSep = win_short_file_relative_path.lastIndexOf(File.separator); 108 if(lastSep != -1) { 109 shortFileTailName = win_short_file_relative_path.substring(lastSep+1); 110 shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash 111 } 112 113 String fileTailName = file_relative_path; 114 String fileRelDirPath = ""; 115 lastSep = file_relative_path.lastIndexOf(File.separator); 116 if(lastSep != -1) { 117 fileTailName = file_relative_path.substring(lastSep+1); 118 fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash 119 } 120 121 String path = shortFileRelDirPath + fileTailName; 122 ///System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 123 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 124 125 if(description_elements_list != null) { 126 ///System.err.println(" Found key matching MIX of win shortfile path and regular path: " + path); 127 return description_elements_list; // found 128 } 129 130 // try the other combination 131 path = fileRelDirPath + shortFileTailName; 132 ///System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 133 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 134 135 if(description_elements_list != null) { 136 ///System.err.println(" Found key matching MIX of regular path and win shortfile path: " + path); 137 return description_elements_list; // found 138 } 139 140 // could not find gsdlsourcefilename in map 141 ///System.err.println("Unable to find ex.meta for regular file path form " + file_relative_path); 142 ///System.err.println(" Or for windows shortFile path form, or for combinations with regular file path form"); 143 144 return description_elements_list; // returns null at this point 145 } 146 147 148 public ArrayList getMetadataExtractedFromFile(File file) 64 /** On Windows, file_relative_path will be hex-encoded for codepts beyond ASCII. 65 * But keys into the source_file_name_to_description_elements_mapping will then also match on Windows */ 66 public ArrayList getMetadataExtractedFromFile(File file, String file_relative_path) 149 67 { 150 68 // Build up a list of metadata extracted from this file 151 69 ArrayList metadata_values = new ArrayList(); 152 153 String file_relative_path = file.getAbsolutePath(); 154 int import_index = file_relative_path.indexOf("import"); 155 if (import_index != -1) { 156 file_relative_path = file_relative_path.substring(import_index + "import".length() + 1); 157 } 158 70 159 71 ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) { 160 72 /// System.err.println("\n@@@ relFilename: " + relFilename); … … 162 74 163 75 // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file 164 //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 165 ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path); 76 ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 166 77 if (description_elements_list == null) { 167 78 // ...it doesn't 79 ///System.err.println("Unable to find meta for file path form " + file_relative_path); 168 80 return metadata_values; // we're done 169 81 } … … 337 249 338 250 // Note which file this is for 339 else if (metadata_element_name.equals("gsdlsourcefilename")) { 251 //else if (metadata_element_name.equals("gsdlsourcefilename")) { 252 else if (metadata_element_name.equals(GSDL_SOURCE_FILE_METANAME)) { 253 // On Unix, GSDL_SOURCE_FILE_METANAME is the gsdlsourcefilename metadata field 254 // which may be encoded by the encoding denoted in fileRenameMethod (and will need decoding) 255 // On Windows, GSDL_SOURCE_FILE_METANAME is a different metadata field that 256 // will be hex encoded for non-ASCII chars 257 340 258 // Extract the gsdlsourcefilename element value 341 259 int value_index = line.indexOf(">", name_index) + ">".length(); … … 354 272 355 273 // Make sure the path matches the OS that is running 356 if (is_unix_path && Utility.isWindows()) {274 if (is_unix_path && isWin) { 357 275 // Convert path from Unix to Windows 358 276 gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\/", "\\\\"); 359 277 } 360 else if (!is_unix_path && ! Utility.isWindows()) {278 else if (!is_unix_path && !isWin) { 361 279 // Convert path from Windows to Unix 362 280 gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/"); … … 369 287 } 370 288 289 // Would be better to store hex src file name decoded? But how do we know what encoding the filename is in 290 // https://stackoverflow.com/questions/13990941/how-to-convert-hex-string-to-java-string 291 292 371 293 ((ArrayList) source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value)).add(new Integer(description_element_start)); 372 294 } … … 383 305 } 384 306 } 385 307 386 308 // Ignore metadata starting with gsdl (used to be lowercase metadata and /metadata) 387 309 if (metadata_element_name.startsWith("gsdl")) { … … 399 321 buffered_reader.close(); 400 322 401 // Now that we're done skimming, we actually need to decode gsdlsourcefilename 402 // based on whatever fileRenameMethod was used to encode it, so that we can 403 // at last properly compare properly against filenames on the file system 404 // in order to load the correct ex.meta for the file. 405 // Now that we should have both gsdlsourcefilename AND fileRenameMethod set, 406 // we can finally perform the decoding of gsdlsourcefilename. 407 if(fileRenameMethod == null) { 408 fileRenameMethod = FILE_RENAME_METHOD_URL; // default for building 323 // ON WINDOWS, we're working with hex encoded full file path instead of with gsdlsourcefilename, 324 // so needn't bother decoding gsdlsourcefilename as it's unused. 325 // On UNIX, continue decoding gsdlsourcefilename as before 326 if(!isWin) { 327 // Now that we're done skimming, we actually need to decode gsdlsourcefilename 328 // based on whatever fileRenameMethod was used to encode it, so that we can 329 // at last properly compare properly against filenames on the file system 330 // in order to load the correct ex.meta for the file. 331 // Now that we should have both gsdlsourcefilename AND fileRenameMethod set, 332 // we can finally perform the decoding of gsdlsourcefilename. 333 if(fileRenameMethod == null) { 334 fileRenameMethod = FILE_RENAME_METHOD_URL; // default for building 335 } 336 337 // If gsdlsourcefilename was encoded, we remove it from the map under its encoded 338 // filename, decode it and add it back into map using its decoded filename. 339 if(!fileRenameMethod.equals(FILE_RENAME_METHOD_NONE)) { 340 ArrayList value_list = (ArrayList) source_file_name_to_description_elements_mapping.remove(gsdlsourcefilename_value); 341 gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path); 342 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list); 343 } 409 344 } 410 // If gsdlsourcefilename was encoded, we remove it from the map under its encoded 411 // filename, decode it and add it back into map using its decoded filename. 412 if(!fileRenameMethod.equals(FILE_RENAME_METHOD_NONE)) { 413 ArrayList value_list = (ArrayList) source_file_name_to_description_elements_mapping.remove(gsdlsourcefilename_value); 414 gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path); 415 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list); 416 } 345 417 346 } 418 347 catch (FileNotFoundException exception) { … … 708 637 709 638 // Make sure the path matches the OS that is running 710 if (is_unix_path && Utility.isWindows()) {639 if (is_unix_path && isWin) { 711 640 // Convert path from Unix to Windows 712 641 gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\/", "\\\\"); 713 642 } 714 else if (!is_unix_path && ! Utility.isWindows()) {643 else if (!is_unix_path && !isWin) { 715 644 // Convert path from Windows to Unix 716 645 gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/");
Note:
See TracChangeset
for help on using the changeset viewer.