Ignore:
Timestamp:
2019-12-07T01:40:13+13:00 (4 years ago)
Author:
ak19
Message:
  1. Windows bugfix for getting exMeta to be loaded into GLI where there are subdirs involved in the Gather pane, or there are non-ASCII filenames, or the file rename method is set to base64. 2. Bugfix for Linux and Windows: Using Base64 to rename files was still a problem despite the previous commit (which was supposed to have fixed all GLI exMeta loading issues on Linux) in the special case where a subfolder was pure ASCII. The perl code wouldn't base64 encode such subdirs. However, GLI won't know which part of a relative file path to decode based on the file rename method used and which parts are not to be decoded. So GLI uniformly decoded them, and ASCII named subfolders that were not base64 encoded (but contained files that were to be renamed with base64) got base64 decoded into garbage, so that exMeta still did not get attached. 3. This commit contains debug stmts.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java

    r33756 r33757  
    3434import org.greenstone.gatherer.util.Utility;
    3535
    36 import org.apache.commons.codec.binary.Base64;
    37 
    38 //import org.greenstone.gatherer.feedback.Base64;
     36//import org.greenstone.gatherer.feedback.Base64; // decode() from Base64 didn't work
     37import org.apache.commons.codec.binary.Base64; // decoding from Base64 works
    3938
    4039/** This class represents one doc.xml file */
     
    5857    }
    5958
     59    /**
     60     * Checks if various versions of the file object's filename, denoted relatively by file_relative_path,
     61     * occur in the source_file_name_to_description_elements_mapping map
     62    */
     63    private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) {
     64        ArrayList description_elements_list = null;
     65               
     66        System.err.println("Looking for key " + file_relative_path);
     67        description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
     68        if(description_elements_list != null) {
     69            System.err.println("   Found key matching REGULAR filepath: " + file_relative_path);
     70            return description_elements_list;           
     71        }
     72        else if(!Utility.isWindows()) { // couldn't find a matching key, we're done
     73            System.err.println("Unable to find meta for regular file path form " + file_relative_path);
     74            return null;
     75        }
     76       
     77        // Now we can try windows short filename as map key
     78       
     79        String win_short_file_relative_path = "";
     80        try{
     81            win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath());             
     82            //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path);
     83        } catch(Exception e) { // we're done trying to find a matching key
     84            System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path);           
     85            return null;
     86        }
     87       
     88        // Got a windows short file name, lop off import folder again
     89        int import_index = win_short_file_relative_path.indexOf("import");
     90        if (import_index != -1) {
     91            win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1);
     92        }
     93           
     94        System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path +  "| in map of sourcefilenames to doc.xml's ex meta.");
     95        description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path);
     96        if (description_elements_list != null) {
     97            System.err.println("   Found key matching FULL win shortfile path: " + win_short_file_relative_path);
     98            return description_elements_list; // found
     99        }
     100       
     101        // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path:
     102        // - windows shortfilename's rel-dir-path with regular tailname
     103        // - and regular rel-dir-path with windows shortfilename's tailname
     104               
     105        String shortFileTailName = win_short_file_relative_path;
     106        String shortFileRelDirPath = "";
     107        int lastSep = win_short_file_relative_path.lastIndexOf(File.separator);
     108        if(lastSep != -1) {         
     109            shortFileTailName = win_short_file_relative_path.substring(lastSep+1);
     110            shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash
     111        }
     112       
     113        String fileTailName = file_relative_path;
     114        String fileRelDirPath = "";
     115        lastSep = file_relative_path.lastIndexOf(File.separator);
     116        if(lastSep != -1) {         
     117            fileTailName = file_relative_path.substring(lastSep+1);
     118            fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash
     119        }
     120       
     121        String path = shortFileRelDirPath + fileTailName;
     122        System.err.println("### Looking for Windows short file name |" + path +  "| in map of sourcefilenames to doc.xml's ex meta.");
     123        description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path);
     124       
     125        if(description_elements_list != null) {
     126            System.err.println("   Found key matching MIX of win shortfile path and regular path: " + path);
     127            return description_elements_list; // found
     128        }
     129
     130        // try the other combination
     131        path = fileRelDirPath + shortFileTailName;
     132        System.err.println("### Looking for Windows short file name |" + path +  "| in map of sourcefilenames to doc.xml's ex meta.");
     133        description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path);
     134       
     135        if(description_elements_list != null) {
     136            System.err.println("   Found key matching MIX of regular path and win shortfile path: " + path);
     137            return description_elements_list; // found
     138        }       
     139       
     140        return description_elements_list;
     141    }
     142   
    60143
    61144    public ArrayList getMetadataExtractedFromFile(File file)
     
    70153    }
    71154
    72     ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {
    73     ///    System.err.println("@@@ relFilename: " + relFilename);
    74     ///}
     155    for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {
     156        System.err.println("\n@@@ relFilename: " + relFilename);
     157    }
    75158   
    76159    // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file
    77     ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
     160    //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
     161    ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path);
    78162    if (description_elements_list == null) {
    79         // ...it doesn't
    80         return metadata_values;
     163            // ...it doesn't
     164            System.err.println("Unable to find meta for (regular file path form) " + file_relative_path);
     165            if(Utility.isWindows()) {
     166                System.err.println("    Or for windows shortFile path form, or for combinations with regular file path form");
     167            }
     168            return metadata_values; // we're done
    81169    }
    82170
     
    275363            }
    276364           
    277             ///System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value);
     365            System.err.println("@@@@ Found gsdlsourcefilename: " + gsdlsourcefilename_value);
    278366            // Remember this for quick access later
    279367            if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) {
     
    326414        gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path);
    327415        source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list);
    328         }
     416        }       
    329417    }
    330418    catch (FileNotFoundException exception) {
     
    379467        // General info: https://stackoverflow.com/questions/43089541/difference-between-basic-and-url-base64-encoding-in-java-8
    380468        byte[] bytes = Base64.decodeBase64(importFilePathParts[i].getBytes());
    381         ///System.err.println("Got base64 string: " + importFilePathParts[i]);
    382         ///System.err.println("Decoded from base64 to bytes: " + bytes);
     469        System.err.println("Got base64 string: " + importFilePathParts[i]);
     470        System.err.println("Decoded from base64 to bytes: " + new String(bytes, System.getProperty("file.encoding")));
    383471        // Using system file.encoding to interpret the resulting bytestring as a String,
    384472        // just as we always did with URL decoding method
     
    397485    decoded_gsdlsourcefilename += file_ext;
    398486   
    399     ///System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename));
     487    System.err.println("@@@@ decoded_gsdlsourcefilename: " + Utility.debugUnicodeString(decoded_gsdlsourcefilename));
    400488
    401489    return decoded_gsdlsourcefilename;
Note: See TracChangeset for help on using the changeset viewer.