Changeset 33757
- Timestamp:
- 2019-12-07T01:40:13+13:00 (4 years ago)
- Location:
- main/trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java
r33756 r33757 34 34 import org.greenstone.gatherer.util.Utility; 35 35 36 import org.apache.commons.codec.binary.Base64; 37 38 //import org.greenstone.gatherer.feedback.Base64; 36 //import org.greenstone.gatherer.feedback.Base64; // decode() from Base64 didn't work 37 import org.apache.commons.codec.binary.Base64; // decoding from Base64 works 39 38 40 39 /** This class represents one doc.xml file */ … … 58 57 } 59 58 59 /** 60 * Checks if various versions of the file object's filename, denoted relatively by file_relative_path, 61 * occur in the source_file_name_to_description_elements_mapping map 62 */ 63 private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) { 64 ArrayList description_elements_list = null; 65 66 System.err.println("Looking for key " + file_relative_path); 67 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 68 if(description_elements_list != null) { 69 System.err.println(" Found key matching REGULAR filepath: " + file_relative_path); 70 return description_elements_list; 71 } 72 else if(!Utility.isWindows()) { // couldn't find a matching key, we're done 73 System.err.println("Unable to find meta for regular file path form " + file_relative_path); 74 return null; 75 } 76 77 // Now we can try windows short filename as map key 78 79 String win_short_file_relative_path = ""; 80 try{ 81 win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath()); 82 //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path); 83 } catch(Exception e) { // we're done trying to find a matching key 84 System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path); 85 return null; 86 } 87 88 // Got a windows short file name, lop off import folder again 89 int import_index = win_short_file_relative_path.indexOf("import"); 90 if (import_index != -1) { 91 win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1); 92 } 93 94 System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path + "| in map of sourcefilenames to doc.xml's ex meta."); 95 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path); 96 if (description_elements_list != null) { 97 System.err.println(" Found key matching FULL win shortfile path: " + win_short_file_relative_path); 98 return description_elements_list; // found 99 } 100 101 // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path: 102 // - windows shortfilename's rel-dir-path with regular tailname 103 // - and regular rel-dir-path with windows shortfilename's tailname 104 105 String shortFileTailName = win_short_file_relative_path; 106 String shortFileRelDirPath = ""; 107 int lastSep = win_short_file_relative_path.lastIndexOf(File.separator); 108 if(lastSep != -1) { 109 shortFileTailName = win_short_file_relative_path.substring(lastSep+1); 110 shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash 111 } 112 113 String fileTailName = file_relative_path; 114 String fileRelDirPath = ""; 115 lastSep = file_relative_path.lastIndexOf(File.separator); 116 if(lastSep != -1) { 117 fileTailName = file_relative_path.substring(lastSep+1); 118 fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash 119 } 120 121 String path = shortFileRelDirPath + fileTailName; 122 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 123 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 124 125 if(description_elements_list != null) { 126 System.err.println(" Found key matching MIX of win shortfile path and regular path: " + path); 127 return description_elements_list; // found 128 } 129 130 // try the other combination 131 path = fileRelDirPath + shortFileTailName; 132 System.err.println("### Looking for Windows short file name |" + path + "| in map of sourcefilenames to doc.xml's ex meta."); 133 description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path); 134 135 if(description_elements_list != null) { 136 System.err.println(" Found key matching MIX of regular path and win shortfile path: " + path); 137 return description_elements_list; // found 138 } 139 140 return description_elements_list; 141 } 142 60 143 61 144 public ArrayList getMetadataExtractedFromFile(File file) … … 70 153 } 71 154 72 ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {73 /// System.err.println("@@@ relFilename: " + relFilename);74 ///}155 for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) { 156 System.err.println("\n@@@ relFilename: " + relFilename); 157 } 75 158 76 159 // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file 77 ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 160 //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path); 161 ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path); 78 162 if (description_elements_list == null) { 79 // ...it doesn't 80 return metadata_values; 163 // ...it doesn't 164 System.err.println("Unable to find meta for (regular file path form) " + file_relative_path); 165 if(Utility.isWindows()) { 166 System.err.println(" Or for windows shortFile path form, or for combinations with regular file path form"); 167 } 168 return metadata_values; // we're done 81 169 } 82 170 … … 275 363 } 276 364 277 ///System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value);365 System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value); 278 366 // Remember this for quick access later 279 367 if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) { … … 326 414 gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path); 327 415 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list); 328 } 416 } 329 417 } 330 418 catch (FileNotFoundException exception) { … … 379 467 // General info: https://stackoverflow.com/questions/43089541/difference-between-basic-and-url-base64-encoding-in-java-8 380 468 byte[] bytes = Base64.decodeBase64(importFilePathParts[i].getBytes()); 381 ///System.err.println("Got base64 string: " + importFilePathParts[i]);382 ///System.err.println("Decoded from base64 to bytes: " + bytes);469 System.err.println("Got base64 string: " + importFilePathParts[i]); 470 System.err.println("Decoded from base64 to bytes: " + new String(bytes, System.getProperty("file.encoding"))); 383 471 // Using system file.encoding to interpret the resulting bytestring as a String, 384 472 // just as we always did with URL decoding method … … 397 485 decoded_gsdlsourcefilename += file_ext; 398 486 399 ///System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename));487 System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename)); 400 488 401 489 return decoded_gsdlsourcefilename; -
main/trunk/gli/src/org/greenstone/gatherer/util/Utility.java
r33729 r33757 114 114 115 115 return result; 116 } 117 118 /** 119 * The following calls a method in WindowsNativeFunctions.java to retrieve Windows short file names 120 * taken from http://dolf.trieschnigg.nl/eightpointthree/eightpointthree.html 121 * which uses the the non-JNI NativeCall jar file for which WindowsNativeFunctions imports com.eaio.nativecall.* 122 * 123 * returns the short filename (8.3) for a file in Windows 124 * 125 * @param longFileName - must be the full path to an actual existing file 126 * @return a string with the short filename, or null if an error occurred or the 127 * file does not exist. 128 */ 129 public static String getWindowsShortFileName(String longFileName) throws Exception { 130 if(!Utility.isWindows()) { 131 return longFileName; 132 } else { 133 //return WindowsNativeFunctions.getEightPointThree(longFileName); 134 return getMSDOSName(longFileName); 135 } 136 } 137 138 /** 139 * getMSDOSName() and its helper function getAbsolutePath(fileName) 140 * are from https://stackoverflow.com/questions/18893284/how-to-get-short-filenames-in-windows-using-java 141 * getMSDOSName() modified to use our SafeProcess class. 142 * 143 * @param fileName - the regular fileName to be converted. Must be the full path to an actual existing file 144 * @return Windows shortfile name for the fileName parameter given. 145 */ 146 public static String getMSDOSName(String fileName) 147 throws IOException, InterruptedException { 148 149 /* 150 String path = getAbsolutePath(fileName); 151 152 changed "+ fileName.toUpperCase() +" to "path" 153 Process process = 154 Runtime.getRuntime().exec( 155 "cmd /c for %I in (\"" + path + "\") do @echo %~fsI"); 156 157 process.waitFor(); 158 159 byte[] data = new byte[65536]; 160 int size = process.getInputStream().read(data); 161 162 if (size <= 0) { 163 return null; 164 } 165 166 return new String(data, 0, size).replaceAll("\\r\\n", ""); 167 */ 168 String path = getAbsolutePath(fileName); 169 170 SafeProcess process = new SafeProcess("cmd /c for %I in (\"" + path + "\") do @echo %~fsI"); 171 int returnVal = process.runProcess(); 172 if(returnVal != 0) { 173 return null; 174 } 175 176 String data = process.getStdOutput(); 177 if(data == null) { 178 return null; 179 } 180 else return data.replaceAll("\\r\\n", ""); 181 } 182 public static String getAbsolutePath(String fileName) 183 throws IOException { 184 File file = new File(fileName); 185 String path = file.getAbsolutePath(); 186 187 if (file.exists() == false) 188 file = new File(path); 189 190 path = file.getCanonicalPath(); 191 192 if (file.isDirectory() && (path.endsWith(File.separator) == false)) 193 path += File.separator; 194 195 return path; 116 196 } 117 197 -
main/trunk/greenstone2/perllib/doc.pm
r33756 r33757 298 298 my @dirs = ($sep eq "\\") ? split(/[\\\/]+/, $dirname) : split(/\//, $dirname); 299 299 300 my $dirpath = ""; 300 my $dirpath = ""; 301 302 # Don't encode the first folder ("import" or "tmp"): GLI's DocXMLFile.java looks for literal "import" 303 # or "tmp" before it knows what the file rename method to be used decode the rest of gsdlsourcefilename is. 304 if(scalar (@dirs) > 1) { 305 $dirpath = shift(@dirs); 306 } 301 307 foreach my $subdir (@dirs) { 302 ##print STDERR "@@@@ Found subdir: $subdir\n"; 303 304 # The import folder can be called anything, including in non-ASCII encodings. 305 # Don't need to avoid encoding default import folder called "import", as it gets 306 # URL/base64 encoded to ITSELF. 307 # But can't encode (URL/base64 encode) any $ENV{'GSDLIMPORTDIR'} though if it's set, 308 # as BasePlugout::get_doc_dir() removes any $ENV{'GSDLIMPORTDIR'} prefix. Because if 309 # the $ENV{'GSDLIMPORTDIR'} part of gsdlsourcefilename is encoded here, the prefix 310 # won't match with $ENV{'GSDLIMPORTDIR'} 311 312 unless ($ENV{'GSDLIMPORTDIR'} && $subdir eq $ENV{'GSDLIMPORTDIR'}) { 313 $subdir = &util::rename_file($subdir, $rename_method); 314 } 308 print STDERR "@@@@ Found subdir: $subdir\n"; 309 310 $subdir = &util::rename_file($subdir, $rename_method); 311 #print STDERR "@@@@ encoded subdir: $subdir\n"; 312 315 313 $dirpath = &FileUtils::filenameConcatenate($dirpath, $subdir); 316 314 } -
main/trunk/greenstone2/perllib/unicode.pm
r33299 r33757 697 697 } 698 698 699 # Base64 encoding does not encode a pure ASCII to itself. This is important to know. 700 # If the $force_encode parameter is true, then this method WILL base64 encode whatever 701 # string is passed in, including any plain ASCII string. 702 # That means this method could double encode an already encoded string. 703 # However, this method is necessary because on the GLI end, we can't detect whether a plain 704 # ASCII string has been encoded or not. And if gsdlsourcefilerenamemethod is set to base64, 705 # then gli will always attempt to decode all parts of the relative path gsdlsourcefilename 706 # (except the "import" prefix, which is special) or none of the parts. 707 sub force_base64_encode { 708 my ($text) = @_; 709 my $force_encode = 1; 710 return &base64_encode($text, $force_encode); 711 } 712 699 713 sub base64_encode { 700 my ($text ) = @_;701 if( !&conforms_to_mod_base64($text)) {714 my ($text, $force_encode) = @_; 715 if($force_encode || !&conforms_to_mod_base64($text)) { 702 716 # return entity for underscore to underscore before encoding 703 717 $text =~ s/&\#095;/_/g; -
main/trunk/greenstone2/perllib/util.pm
r33013 r33757 1216 1216 } 1217 1217 elsif ($rename_method eq "base64") { 1218 $tailname = &unicode:: base64_encode($tailname);1218 $tailname = &unicode::force_base64_encode($tailname); 1219 1219 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle 1220 1220 }
Note:
See TracChangeset
for help on using the changeset viewer.