Ignore:
Timestamp:
2020-09-15T20:26:19+12:00 (4 years ago)
Author:
ak19
Message:

Bugfix 1 for GLI metadata slowdown: selecting multiple Gathererd files in GLI became very slow. Kathy and Dr Bainbridge had tracked this down to code I had added to support non basic ASCII filenames in GLI, which was making an expensive win operating system function call on Windows for each selected file, launching a Java Process for each. The speed of selecting multiple files is now back to being almost as fast as in 3.09. Tested on Windows and linux. Had to treat windows as a special case because I can't get the code modifications to work on Linux: the perl code stores a hex-encoded string for the filename that GLI now uses when OS is Windows and compares against the hex encoded name of a file selected. But on linux the hex encoded value generated by perl is not the same as that which java generates and after trying repeatedly, I'e not been able to succeed to get it to work. So the code behaves as before for Linux.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java

    r33758 r34394  
    4141public abstract class DocXMLFile extends File
    4242{
     43    static boolean isWin = Utility.isWindows();
     44    // For Linux, we continue using gsdlsourcefilename as key to the metadata mapping
     45    // For Windows, we use the hex encoded long file paths as key
     46    static String GSDL_SOURCE_FILE_METANAME = isWin ? "gsdlfullsourcepath" : "gsdlsourcefilename";
     47   
    4348    protected HashMap source_file_name_to_description_elements_mapping = new HashMap();
    44 
     49   
    4550    protected final String MetadataWrap;
    4651    protected final String MetadataItem;
     
    5762    }
    5863
    59     /**
    60      * Checks if various versions of the file object's filename, denoted relatively by file_relative_path,
    61      * occur in the source_file_name_to_description_elements_mapping map
    62     */
    63     private ArrayList findSourceFileMapKeyMatch(File file, String file_relative_path) {
    64         ArrayList description_elements_list = null;
    65                
    66         ///System.err.println("Looking for key " + file_relative_path);
    67         description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
    68         if(description_elements_list != null) {
    69             ///System.err.println("   Found key matching REGULAR filepath: " + file_relative_path);
    70             return description_elements_list;           
    71         }
    72         else if(!Utility.isWindows()) { // couldn't find a matching key, we're done
    73             ///System.err.println("Unable to find ex.meta for regular file path form " + file_relative_path);
    74             return null;
    75         }
    76        
    77         // Now we can try windows short filename as map key
    78        
    79         String win_short_file_relative_path = "";
    80         try{
    81             win_short_file_relative_path = Utility.getWindowsShortFileName(file.getAbsolutePath());             
    82             //System.err.println("@@@ Searching for short file name: " + win_short_file_relative_path);
    83         } catch(Exception e) { // we're done trying to find a matching key
    84             System.err.println("Failed to convert to windows short file name: " + win_short_file_relative_path);           
    85             return null;
    86         }
    87        
    88         // Got a windows short file name, lop off import folder again
    89         int import_index = win_short_file_relative_path.indexOf("import");
    90         if (import_index != -1) {
    91             win_short_file_relative_path = win_short_file_relative_path.substring(import_index + "import".length() + 1);
    92         }
    93            
    94         ///System.err.println("### Looking for Windows short file name |" + win_short_file_relative_path +  "| in map of sourcefilenames to doc.xml's ex meta.");
    95         description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(win_short_file_relative_path);
    96         if (description_elements_list != null) {
    97             ///System.err.println("   Found key matching FULL win shortfile path: " + win_short_file_relative_path);
    98             return description_elements_list; // found
    99         }
    100        
    101         // else, check whether a map key is matched by any REMAINING combination of windows shortfile path and regular path:
    102         // - windows shortfilename's rel-dir-path with regular tailname
    103         // - and regular rel-dir-path with windows shortfilename's tailname
    104                
    105         String shortFileTailName = win_short_file_relative_path;
    106         String shortFileRelDirPath = "";
    107         int lastSep = win_short_file_relative_path.lastIndexOf(File.separator);
    108         if(lastSep != -1) {         
    109             shortFileTailName = win_short_file_relative_path.substring(lastSep+1);
    110             shortFileRelDirPath = win_short_file_relative_path.substring(0, lastSep+1); // include the slash
    111         }
    112        
    113         String fileTailName = file_relative_path;
    114         String fileRelDirPath = "";
    115         lastSep = file_relative_path.lastIndexOf(File.separator);
    116         if(lastSep != -1) {         
    117             fileTailName = file_relative_path.substring(lastSep+1);
    118             fileRelDirPath = file_relative_path.substring(0, lastSep+1); // include the slash
    119         }
    120        
    121         String path = shortFileRelDirPath + fileTailName;
    122         ///System.err.println("### Looking for Windows short file name |" + path +  "| in map of sourcefilenames to doc.xml's ex meta.");
    123         description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path);
    124        
    125         if(description_elements_list != null) {
    126             ///System.err.println("   Found key matching MIX of win shortfile path and regular path: " + path);
    127             return description_elements_list; // found
    128         }
    129 
    130         // try the other combination
    131         path = fileRelDirPath + shortFileTailName;
    132         ///System.err.println("### Looking for Windows short file name |" + path +  "| in map of sourcefilenames to doc.xml's ex meta.");
    133         description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(path);
    134        
    135         if(description_elements_list != null) {
    136             ///System.err.println("   Found key matching MIX of regular path and win shortfile path: " + path);
    137             return description_elements_list; // found
    138         }       
    139        
    140         // could not find gsdlsourcefilename in map
    141         ///System.err.println("Unable to find ex.meta for regular file path form " + file_relative_path);
    142         ///System.err.println("    Or for windows shortFile path form, or for combinations with regular file path form");       
    143        
    144         return description_elements_list; // returns null at this point
    145     }
    146    
    147 
    148     public ArrayList getMetadataExtractedFromFile(File file)
     64    /** On Windows, file_relative_path will be hex-encoded for codepts beyond ASCII.
     65     * But keys into the source_file_name_to_description_elements_mapping will then also match on Windows */
     66    public ArrayList getMetadataExtractedFromFile(File file, String file_relative_path)                                             
    14967    {
    15068    // Build up a list of metadata extracted from this file
    15169    ArrayList metadata_values = new ArrayList();
    152 
    153     String file_relative_path = file.getAbsolutePath();
    154     int import_index = file_relative_path.indexOf("import");
    155     if (import_index != -1) {
    156         file_relative_path = file_relative_path.substring(import_index + "import".length() + 1);
    157     }
    158 
     70   
    15971    ///for (Object relFilename : source_file_name_to_description_elements_mapping.keySet()) {
    16072    ///    System.err.println("\n@@@ relFilename: " + relFilename);
     
    16274   
    16375    // Check whether this file (i.e. doc.xml or docmets.xml on inheritance) file contains extracted metadata for the specified file
    164     //ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
    165     ArrayList description_elements_list = findSourceFileMapKeyMatch(file, file_relative_path);
     76    ArrayList description_elements_list = (ArrayList) source_file_name_to_description_elements_mapping.get(file_relative_path);
    16677    if (description_elements_list == null) {
    16778            // ...it doesn't
     79            ///System.err.println("Unable to find meta for file path form " + file_relative_path);
    16880            return metadata_values; // we're done
    16981    }
     
    337249       
    338250        // Note which file this is for
    339         else if (metadata_element_name.equals("gsdlsourcefilename")) {
     251        //else if (metadata_element_name.equals("gsdlsourcefilename")) {   
     252        else if (metadata_element_name.equals(GSDL_SOURCE_FILE_METANAME)) {
     253            // On Unix, GSDL_SOURCE_FILE_METANAME is the gsdlsourcefilename metadata field
     254            // which may be encoded by the encoding denoted in fileRenameMethod (and will need decoding)
     255            // On Windows, GSDL_SOURCE_FILE_METANAME is a different metadata field that
     256            // will be hex encoded for non-ASCII chars
     257           
    340258            // Extract the gsdlsourcefilename element value
    341259            int value_index = line.indexOf(">", name_index) + ">".length();
     
    354272
    355273            // Make sure the path matches the OS that is running
    356             if (is_unix_path && Utility.isWindows()) {
     274            if (is_unix_path && isWin) {
    357275                // Convert path from Unix to Windows
    358276                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\/", "\\\\");
    359277            }
    360             else if (!is_unix_path && !Utility.isWindows()) {
     278            else if (!is_unix_path && !isWin) {
    361279                // Convert path from Windows to Unix
    362280                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/");
     
    369287            }
    370288
     289            // Would be better to store hex src file name decoded? But how do we know what encoding the filename is in
     290            // https://stackoverflow.com/questions/13990941/how-to-convert-hex-string-to-java-string
     291           
     292           
    371293            ((ArrayList) source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value)).add(new Integer(description_element_start));
    372294            }
     
    383305            }
    384306        }
    385 
     307       
    386308        // Ignore metadata starting with gsdl (used to be lowercase metadata and /metadata)
    387309        if (metadata_element_name.startsWith("gsdl")) {
     
    399321        buffered_reader.close();
    400322
    401         // Now that we're done skimming, we actually need to decode gsdlsourcefilename
    402         // based on whatever fileRenameMethod was used to encode it, so that we can
    403         // at last properly compare properly against filenames on the file system
    404         // in order to load the correct ex.meta for the file.
    405         // Now that we should have both gsdlsourcefilename AND fileRenameMethod set,
    406         // we can finally perform the decoding of gsdlsourcefilename.       
    407         if(fileRenameMethod == null) {
    408         fileRenameMethod = FILE_RENAME_METHOD_URL; // default for building
     323        // ON WINDOWS, we're working with hex encoded full file path instead of with gsdlsourcefilename,
     324        // so needn't bother decoding gsdlsourcefilename as it's unused.
     325        // On UNIX, continue decoding gsdlsourcefilename as before
     326        if(!isWin) {
     327        // Now that we're done skimming, we actually need to decode gsdlsourcefilename
     328        // based on whatever fileRenameMethod was used to encode it, so that we can
     329        // at last properly compare properly against filenames on the file system
     330        // in order to load the correct ex.meta for the file.
     331        // Now that we should have both gsdlsourcefilename AND fileRenameMethod set,
     332        // we can finally perform the decoding of gsdlsourcefilename.       
     333        if(fileRenameMethod == null) {
     334            fileRenameMethod = FILE_RENAME_METHOD_URL; // default for building
     335        }
     336       
     337        // If gsdlsourcefilename was encoded, we remove it from the map under its encoded
     338        // filename, decode it and add it back into map using its decoded filename.
     339        if(!fileRenameMethod.equals(FILE_RENAME_METHOD_NONE)) {
     340            ArrayList value_list = (ArrayList) source_file_name_to_description_elements_mapping.remove(gsdlsourcefilename_value);
     341            gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path);
     342            source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list);
     343        }
    409344        }
    410         // If gsdlsourcefilename was encoded, we remove it from the map under its encoded
    411         // filename, decode it and add it back into map using its decoded filename.
    412         if(!fileRenameMethod.equals(FILE_RENAME_METHOD_NONE)) {     
    413         ArrayList value_list = (ArrayList) source_file_name_to_description_elements_mapping.remove(gsdlsourcefilename_value);       
    414         gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path);
    415         source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, value_list);
    416         }       
     345       
    417346    }
    418347    catch (FileNotFoundException exception) {
     
    708637
    709638            // Make sure the path matches the OS that is running
    710             if (is_unix_path && Utility.isWindows()) {
     639            if (is_unix_path && isWin) {
    711640                // Convert path from Unix to Windows
    712641                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\/", "\\\\");
    713642            }
    714             else if (!is_unix_path && !Utility.isWindows()) {
     643            else if (!is_unix_path && !isWin) {
    715644                // Convert path from Windows to Unix
    716645                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/");
Note: See TracChangeset for help on using the changeset viewer.