Changeset 34509


Ignore:
Timestamp:
2020-10-22T17:42:08+13:00 (4 years ago)
Author:
ak19
Message:

Related to previous commits 34506-34508. Storing both the Win 8.3 Short filename of gsdlsourcefilename and its long filename version.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/DocXMLFile.java

    r34508 r34509  
    7777            ///System.err.println("Unable to find meta for file path form " + file_relative_path);
    7878            return metadata_values; // we're done
    79     } /// else { System.err.println("@@@ file rel path: " + file_relative_path + " matched" ); }
     79    } ///else { System.err.println("@@@ file rel path: " + file_relative_path + " matched" ); }
    8080
    8181    MetadataSet extracted_metadata_set = MetadataSetManager.getMetadataSet(MetadataSetManager.EXTRACTED_METADATA_NAMESPACE);
     
    293293        buffered_reader.close();
    294294
     295        // Work out if is_unix_path
     296        int import_index = gsdlsourcefilename_value.indexOf("import");
     297        if (import_index != -1) {
     298            String tempStr = gsdlsourcefilename_value.substring(import_index + "import".length());
     299            is_unix_path = tempStr.startsWith("/");
     300        }
     301        // We're only interested in the path relative to the import folder
     302        // Lop off "import" folder prefix
     303        gsdlsourcefilename_value = adjustForRelativeToImportDir(gsdlsourcefilename_value);
    295304
    296305        // Now that we're done skimming, we actually need to decode gsdlsourcefilename
     
    303312            fileRenameMethod = FILE_RENAME_METHOD_URL; // default for building
    304313        }
    305        
    306         // If gsdlsourcefilename was encoded, we remove it from the map under its encoded
    307         // filename, decode it and add it back into map using its decoded filename.
    308314        if(!fileRenameMethod.equals(FILE_RENAME_METHOD_NONE)) {         
    309315            gsdlsourcefilename_value = decodeSourceFilename(gsdlsourcefilename_value, fileRenameMethod, is_unix_path);         
    310         }       
     316        }
     317
     318        // Now we can finally put the gsdlsourcefilename path relative to import dir into the hashmap
     319        ///System.err.println("@@@ into map: " + gsdlsourcefilename_value);
     320        if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) {
     321            source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, new ArrayList());
     322        }
     323        ((ArrayList) source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value)).add(new Integer(description_element_start_gsdlsourcefilename_value));     
    311324       
    312325        // Next, if Windows, check if dealing with Win 8.3 Short Filename
    313         // In that case, convert short file name to full name - works only if the file exists       
     326        // In that case, convert short file name to long filename - works only if the file exists       
    314327        if(isWin && gsdlsourcefilename_value.indexOf("~") != -1) {
     328           
     329            String long_gsdlsourcefilename = gsdlsourcefilename_value;
     330           
    315331            // gsdlsourcefilename is stored from import folder onwards: import/opt_subdir/filename.ext
    316332            // This may contain Win 8.3 shortening. To get Win Long filename, prefix current collection dir
    317333            // and if resulting file exists, getCanonicalPath() which produces Win Long filename.
    318334            File currentCollectionFolder = Gatherer.c_man.getCollection().getCollectionDirectory();
    319             File f = new File(currentCollectionFolder, /*"import" + File.separator +*/ gsdlsourcefilename_value);
     335            File f = new File(currentCollectionFolder, "import" + File.separator + gsdlsourcefilename_value); // should work even if linux style slashes in gsdlsourcefilename_value
    320336            ///System.err.println("### file: " + f.getAbsolutePath());
    321337           
    322338            if(f.exists()) {
    323                 gsdlsourcefilename_value = f.getCanonicalPath();
    324                 ///System.err.println("### canon: " + gsdlsourcefilename_value);
    325                
    326             } // else couldn't find a version of the filename stored in doc.xml that exists, giving up, leave gsdlsourcefilename_value as is   
     339                long_gsdlsourcefilename = f.getCanonicalPath();
     340                ///System.err.println("### canon: " + long_gsdlsourcefilename);             
     341            } // else couldn't find a version of the filename stored in doc.xml that exists, giving up, leave gsdlsourcefilename_value as is
     342           
     343            // Again, we're only interested in the path relative to the import folder
     344            long_gsdlsourcefilename = adjustForRelativeToImportDir(long_gsdlsourcefilename);
     345            if(!gsdlsourcefilename_value.equals(long_gsdlsourcefilename)) { // truly distinct Win long and short file names
     346                // Put a copy of the ref to gsdlsourcefilename's metadata list under the long filename as well
     347                ///System.err.println("@@@ long filename into map: " + long_gsdlsourcefilename);       
     348                Object arrList = source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value);
     349                source_file_name_to_description_elements_mapping.put(long_gsdlsourcefilename, arrList);
     350            }
    327351        }
    328352       
    329         // We're only interested in the path relative to the import folder
    330         int import_index = gsdlsourcefilename_value.indexOf("import");
     353    }
     354    catch (FileNotFoundException exception) {
     355        DebugStream.printStackTrace(exception);
     356    }
     357    catch (IOException exception) {
     358        DebugStream.printStackTrace(exception);
     359    } catch (Exception exception) { // e.g. exception decoding gsdlsourcefilename
     360        DebugStream.printStackTrace(exception);
     361    }
     362    }
     363   
     364    private String adjustForRelativeToImportDir(String gsdlsourcefilename_value) {
     365        int import_index = gsdlsourcefilename_value.indexOf("import");
    331366        if (import_index != -1) {
    332367            gsdlsourcefilename_value = gsdlsourcefilename_value.substring(import_index + "import".length());
    333368
    334             is_unix_path = gsdlsourcefilename_value.startsWith("/");
     369            boolean is_unix_path = gsdlsourcefilename_value.startsWith("/");
    335370            gsdlsourcefilename_value = gsdlsourcefilename_value.substring(1);
    336371
     
    347382                gsdlsourcefilename_value = gsdlsourcefilename_value.replaceAll("\\\\", "/");
    348383            }
    349        
    350             ///System.err.println("@@@ into map: " + gsdlsourcefilename_value);
    351        
    352             if (source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value) == null) {
    353                 source_file_name_to_description_elements_mapping.put(gsdlsourcefilename_value, new ArrayList());
    354             }
    355            
    356             ((ArrayList) source_file_name_to_description_elements_mapping.get(gsdlsourcefilename_value)).add(new Integer(description_element_start_gsdlsourcefilename_value));
    357         }
    358     }
    359     catch (FileNotFoundException exception) {
    360         DebugStream.printStackTrace(exception);
    361     }
    362     catch (IOException exception) {
    363         DebugStream.printStackTrace(exception);
    364     } catch (Exception exception) { // e.g. exception decoding gsdlsourcefilename
    365         DebugStream.printStackTrace(exception);
    366     }
    367     }
    368    
     384        }
     385        return gsdlsourcefilename_value;
     386    }
    369387   
    370388    protected String decodeSourceFilename(String relative_sourcefile_path,
Note: See TracChangeset for help on using the changeset viewer.