Changeset 33757 for main/trunk/gli/src/org/greenstone/gatherer
- Timestamp:
- 2019-12-07T01:40:13+13:00 (4 years ago)
- Location:
- main/trunk/gli/src/org/greenstone/gatherer
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java
r33756 r33757 34 34 import org.greenstone.gatherer.util.Utility; 35 35 36 import org.apache.commons.codec.binary.Base64; 37 38 //import org.greenstone.gatherer.feedback.Base64; 36 //import org.greenstone.gatherer.feedback.Base64; // decode() from Base64 didn't work 37 import org.apache.commons.codec.binary.Base64; // decoding from Base64 works 39 38 40 39 /** This class represents one doc.xml file */ … … 58 57 } 59 58 59 /** 60 * Checks if various versions of the file object's filename, denoted relatively by file_relative_path, 61 * occur in the source_file_name_to_description_elements_mapping map 62 */ 63 private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) { 64 ArrayList description_elements_list = null; 65 66 System.err.println("Looking for key " + file_relative_path); 67 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 68 if(description_elements_list != null) { 69 System.err.println(" Found key matching REGULAR filepath: " + file_relative_path); 70 return description_elements_list; 71 } 72 else if(!Utility.isWindows()) { // couldn't find a matching key, we're done 73 System.err.println("Unable to find meta for regular file path form " + file_relative_path); 74 return null; 75 } 76 77 // Now we can try windows short filename as map key 78 79 String win_short_file_relative_path = ""; 80 try{ 81 win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath()); 82 //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path); 83 } catch(Exception e) { // we're done trying to find a matching key 84 System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path); 85 return null; 86 } 87 88 // Got a windows short file name, lop off import folder again 89 int import_index = win_short_file_relative_path.indexOf("import"); 90 if (import_index != -1) { 91 win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1); 92 } 93 94 System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path + "| in map of sourcefilenames to doc.xml's ex meta."); 95 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path); 96 if (description_elements_list != null) { 97 System.err.println(" Found key matching FULL win shortfile path: " + win_short_file_relative_path); 98 return description_elements_list; // found 99 } 100 101 // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path: 102 // - windows shortfilename's rel-dir-path with regular tailname 103 // - and regular rel-dir-path with windows shortfilename's tailname 104 105 String shortFileTailName = win_short_file_relative_path; 106 String shortFileRelDirPath = ""; 107 int lastSep = win_short_file_relative_path.lastIndexOf(File.separator); 108 if(lastSep != -1) { 109 shortFileTailName = win_short_file_relative_path.substring(lastSep+1); 110 shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash 111 } 112 113 String fileTailName = file_relative_path; 114 String fileRelDirPath = ""; 115 lastSep = file_relative_path.lastIndexOf(File.separator); 116 if(lastSep != -1) { 117 fileTailName = file_relative_path.substring(lastSep+1); 118 fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash 119 } 120 121 String path = shortFileRelDirPath + fileTailName; 122 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 123 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 124 125 if(description_elements_list != null) { 126 System.err.println(" Found key matching MIX of win shortfile path and regular path: " + path); 127 return description_elements_list; // found 128 } 129 130 // try the other combination 131 path = fileRelDirPath + shortFileTailName; 132 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 133 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 134 135 if(description_elements_list != null) { 136 System.err.println(" Found key matching MIX of regular path and win shortfile path: " + path); 137 return description_elements_list; // found 138 } 139 140 return description_elements_list; 141 } 142 60 143 61 144 public ArrayList getMetadataExtractedFromFile(File file) … … 70 153 } 71 154 72 ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {73 /// System.err.println("@@@ relFilename: " + relFilename);74 ///}155 for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) { 156 System.err.println("\n@@@ relFilename: " + relFilename); 157 } 75 158 76 159 // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file 77 ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 160 //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 161 ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path); 78 162 if (description_elements_list == null) { 79 // ...it doesn't 80 return metadata_values; 163 // ...it doesn't 164 System.err.println("Unable to find meta for (regular file path form) " + file_relative_path); 165 if(Utility.isWindows()) { 166 System.err.println(" Or for windows shortFile path form, or for combinations with regular file path form"); 167 } 168 return metadata_values; // we're done 81 169 } 82 170 … … 275 363 } 276 364 277 ///System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value);365 System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value); 278 366 // Remember this for quick access later 279 367 if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) { … … 326 414 gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path); 327 415 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list); 328 } 416 } 329 417 } 330 418 catch (FileNotFoundException exception) { … … 379 467 // General info: https://stackoverflow.com/questions/43089541/difference-between-basic-and-url-base64-encoding-in-java-8 380 468 byte[] bytes = Base64.decodeBase64(importFilePathParts[i].getBytes()); 381 ///System.err.println("Got base64 string: " + importFilePathParts[i]);382 ///System.err.println("Decoded from base64 to bytes: " + bytes);469 System.err.println("Got base64 string: " + importFilePathParts[i]); 470 System.err.println("Decoded from base64 to bytes: " + new String(bytes, System.getProperty("file.encoding"))); 383 471 // Using system file.encoding to interpret the resulting bytestring as a String, 384 472 // just as we always did with URL decoding method … … 397 485 decoded_gsdlsourcefilename += file_ext; 398 486 399 ///System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename));487 System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename)); 400 488 401 489 return decoded_gsdlsourcefilename; -
main/trunk/gli/src/org/greenstone/gatherer/util/Utility.java
r33729 r33757 114 114 115 115 return result; 116 } 117 118 /** 119 * The following calls a method in WindowsNativeFunctions.java to retrieve Windows short file names 120 * taken from http://dolf.trieschnigg.nl/eightpointthree/eightpointthree.html 121 * which uses the the non-JNI NativeCall jar file for which WindowsNativeFunctions imports com.eaio.nativecall.* 122 * 123 * returns the short filename (8.3) for a file in Windows 124 * 125 * @param longFileName - must be the full path to an actual existing file 126 * @return a string with the short filename, or null if an error occurred or the 127 * file does not exist. 128 */ 129 public static String getWindowsShortFileName(String longFileName) throws Exception { 130 if(!Utility.isWindows()) { 131 return longFileName; 132 } else { 133 //return WindowsNativeFunctions.getEightPointThree(longFileName); 134 return getMSDOSName(longFileName); 135 } 136 } 137 138 /** 139 * getMSDOSName() and its helper function getAbsolutePath(fileName) 140 * are from https://stackoverflow.com/questions/18893284/how-to-get-short-filenames-in-windows-using-java 141 * getMSDOSName() modified to use our SafeProcess class. 142 * 143 * @param fileName - the regular fileName to be converted. Must be the full path to an actual existing file 144 * @return Windows shortfile name for the fileName parameter given. 145 */ 146 public static String getMSDOSName(String fileName) 147 throws IOException, InterruptedException { 148 149 /* 150 String path = getAbsolutePath(fileName); 151 152 changed "+ fileName.toUpperCase() +" to "path" 153 Process process = 154 Runtime.getRuntime().exec( 155 "cmd /c for %I in (\"" + path + "\") do @echo %~fsI"); 156 157 process.waitFor(); 158 159 byte[] data = new byte[65536]; 160 int size = process.getInputStream().read(data); 161 162 if (size <= 0) { 163 return null; 164 } 165 166 return new String(data, 0, size).replaceAll("\\r\\n", ""); 167 */ 168 String path = getAbsolutePath(fileName); 169 170 SafeProcess process = new SafeProcess("cmd /c for %I in (\"" + path + "\") do @echo %~fsI"); 171 int returnVal = process.runProcess(); 172 if(returnVal != 0) { 173 return null; 174 } 175 176 String data = process.getStdOutput(); 177 if(data == null) { 178 return null; 179 } 180 else return data.replaceAll("\\r\\n", ""); 181 } 182 public static String getAbsolutePath(String fileName) 183 throws IOException { 184 File file = new File(fileName); 185 String path = file.getAbsolutePath(); 186 187 if (file.exists() == false) 188 file = new File(path); 189 190 path = file.getCanonicalPath(); 191 192 if (file.isDirectory() && (path.endsWith(File.separator) == false)) 193 path += File.separator; 194 195 return path; 116 196 } 117 197
Note:
See TracChangeset
for help on using the changeset viewer.