Changeset 33744

Show
Ignore:
Timestamp:
03.12.2019 15:04:32 (4 days ago)
Author:
ak19
Message:

Refactored code to do more inside functions rather than make callers do extra, specific work that requires knowledge of what happened (and didn't happen) inside the called functions. Shifted the new functions about until they appear after Dr Bainbridge's more important functions.

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33739 r33744  
    438438        return str; 
    439439    } 
    440      
    441      
    442     // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 
    443     public static String UNUSED_filenameToURLEncoding(String filename) { 
    444         if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { 
    445             return filename; 
    446         } 
    447  
    448          // Can't create a URI out of a filename containing spaces. Spaces must be encoded as %20 
    449         String filename_url_encoded = filename.replace(" ", "%20"); 
    450         //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex 
    451         //filename_url_encoded = filename_url_encoded.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased 
    452  
    453         try { 
    454             URI filename_uri = new URI(filename_url_encoded); 
    455             // The trick: 
    456             //  1. toASCIIString() will %xx encode values > 127 
    457             //  2. Decode the result to "ISO-8859-1" 
    458             //  3. URL encode the bytes to string 
    459              
    460             // Step 2 forces the string to be 8-bit values.  It 
    461             // doesn't matter if the starting raw filename was *not* 
    462             // in the ISO-8859-1 encoding, the effect is to ensure 
    463             // we have an 8-bit byte string that (numerically) 
    464             // captures the right value.  These numerical values are 
    465             // then used to determine how to URL encode it 
    466              
    467             String filename_ascii = filename_uri.toASCIIString(); 
    468             //filename_ascii = filename_ascii.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex 
    469             //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased 
    470             String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1"); 
    471             filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes); 
    472              
    473             // DEALING WITH & and + in filenames: NOT WORKING YET 
    474             //if(filename_url_encoded.contains("&")) { 
    475             //  filename_url_encoded = filename_url_encoded.replace("&", "%36amp;"); 
    476             //} else if(filename_url_encoded.contains("&")) { 
    477             //  filename_url_encoded = filename_url_encoded.replace("&", "%36"); 
    478             //} 
    479              
    480         } 
    481         catch (Exception e) { 
    482             e.printStackTrace(); 
    483             // Give up trying to convert 
    484             filename_url_encoded = filename;  
    485         } 
    486         return filename_url_encoded; 
    487     } 
    488      
    489      
    490     // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter     
    491     public static String filenameToURLEncoding(String filename) { 
    492         if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 
    493             return filename; 
    494         } 
    495         File file = new File (filename); 
    496         return fileToURLEncoding(file); 
    497     }        
    498440 
    499441     
     
    517459            return file.getAbsolutePath(); 
    518460        } 
    519  
    520         // we'll want to protect & by replacing with &'s hex value 
    521         // but we don't want to replace &#x....; with the same! 
    522         Pattern plain_ampersand_not_hex_prefix_Pattern = Pattern.compile("&[^#]"); 
    523          
    524          
    525         int containsAmp = 0; 
    526         if(file.getName().contains("&")) { 
    527             System.err.println("@@@ 1 to encode " + file.getName()); 
    528             containsAmp = 1; 
    529         } else if(file.getName().contains("&")) { 
    530             System.err.println("@@@ 2 to encode " + file.getName()); 
    531             containsAmp = 2; 
    532         } else { 
    533             System.err.println("@@@ 0 to encode " + file.getName()); 
    534         } 
    535          
    536461         
    537462        String filename_url_encoded = ""; 
     
    571496             
    572497            String filename_ascii = filename_uri.toASCIIString(); 
    573             // protect & and + in the filename too 
    574             filename_ascii = escapeAllCharWithHexEntity(filename_ascii, '&'); 
    575              
    576             if(containsAmp > 0) System.err.println("@@@ filename_ascii: " + filename_ascii); 
    577              
    578              
    579             //if(containsAmp > 0) System.err.println("@@@ filename_ascii with hexed &: " + filename_ascii); 
     498             
     499            // Before proceeding, protect & in the filename too. 
     500            // &'s ASCII code is 36 in decimal, and 26 in hex, so replace with & 
     501            // But dangerous to do simple replace if there are &#x...; entities in the filename already! 
     502            // That is, we'll want to protect & by replacing with &'s hex value, but we don't want to replace the & in "&#x....;" with the same! 
     503            //filename_url_encoded = filename_url_encoded.replace("&", "&x26;");// SO THIS IS BAD 
     504            //filename_url_encoded = filename_url_encoded.replace("&", hexEntityForChar("&"));// SAME, STILL BAD 
     505            filename_ascii = escapeAllCharWithHexEntity(filename_ascii, '&'); // Good: CAREFULLY replaces & that are not part of hex entities 
     506             
     507             
    580508            String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");          
    581509            filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes); 
    582              
    583              
    584             //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex 
    585             //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased 
    586             //if(containsAmp > 0) System.err.println("@@@ filename_url_encoded: " + filename_url_encoded); 
    587510             
    588511        } 
     
    672595    } 
    673596    
     597 // FURTHER HELPER METHODS  
     598  
     599    /** 
     600     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter. 
     601     * If filename is relative, then the current directory (gli?) will be prefixed to what is returned 
     602     * and should be removed manually by the caller. Alternatively, for relative paths, call the variant 
     603     * relativeFilenameToURLEncoding(String), which will remove any added filepath prefix. 
     604    */ 
     605    public static String fullFilepathToURLEncoding(String filename) { 
     606        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 
     607            return filename; 
     608        } 
     609         
     610        File file = new File (filename); 
     611        //return fileToURLEncoding(file); 
     612        String filename_url_encoded = fileToURLEncoding(file); 
     613         
     614        // if the current directory (".") was passed in as filename, 
     615        // then the filename_url_encoded looks like /full/path/./ 
     616        // In that case, remove the ./ at the end 
     617        if (filename_url_encoded.endsWith(FilenameEncoding.URL_FILE_SEPARATOR+"."+FilenameEncoding.URL_FILE_SEPARATOR)) { 
     618            filename_url_encoded = filename_url_encoded.substring(0, filename_url_encoded.length()-2); // cut off /. at end 
     619        } 
     620         
     621        return filename_url_encoded; 
     622    } 
     623  
     624    /** 
     625     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter 
     626     * If filename is a relative path, call this method to get it specially URL encoded. 
     627     * This method will remove the current directory that is prefixed as an intermediary step. 
     628    */ 
     629    public static String relativeFilenameToURLEncoding(String filename) { 
     630        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 
     631            return filename; 
     632        } 
     633         
     634        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); 
     635        return filenameToURLEncodingWithPrefixRemoved(filename, curr_directory_path); 
     636         
     637        /* 
     638        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding(".");        
     639        if (curr_directory_path.endsWith("."+FilenameEncoding.URL_FILE_SEPARATOR)) { 
     640            curr_directory_path = curr_directory_path.substring(0, curr_directory_path.length()-2); // cut off /. at end             
     641        } 
     642         
     643        File file = new File (filename);         
     644        String filename_url_encoded = fileToURLEncoding(file); // returns a full filepath 
     645         
     646        // now lop off the current dir prefix that FilenameEncoding.filenameToURLEncoding(STRING) variant would have added 
     647        filename_url_encoded = filename_url_encoded.substring(curr_directory_path.length()); 
     648        // remove any remaining slash prefix 
     649        if (filename_url_encoded.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { 
     650            filename_url_encoded = filename_url_encoded.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); 
     651        } 
     652         
     653        return filename_url_encoded; 
     654        */ 
     655    } 
     656     
     657    /** 
     658     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter 
     659     * Convenience method that will return the specially URL encoded version of filename 
     660     * with the provided removeFilePathPrefix removed */ 
     661    public static String filenameToURLEncodingWithPrefixRemoved(String filename, String removeFilePathPrefix) { 
     662        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 
     663            return filename; 
     664        } 
     665         
     666        /*if (removeFilePathPrefix.endsWith("."+FilenameEncoding.URL_FILE_SEPARATOR)) { 
     667            removeFilePathPrefix = removeFilePathPrefix.substring(0, removeFilePathPrefix.length()-2); // cut off /. at end      
     668        } 
     669        */ 
     670         
     671        File file = new File (filename);         
     672        String filename_url_encoded = fileToURLEncoding(file); // returns a full filepath 
     673         
     674        System.err.println("@@@ full url encoded filename: " + filename_url_encoded); 
     675         
     676        // now lop off the given removeFilePathPrefix that FilenameEncoding.filenameToURLEncoding(STRING) variant would have added 
     677        filename_url_encoded = filename_url_encoded.substring(removeFilePathPrefix.length()); 
     678        // remove any remaining slash prefix 
     679        if (filename_url_encoded.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) { 
     680            filename_url_encoded = filename_url_encoded.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); 
     681        } 
     682         
     683        return filename_url_encoded; 
     684    } 
    674685} 
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r33739 r33744  
    668668        } 
    669669         
    670         String curr_directory_path = FilenameEncoding.filenameToURLEncoding("."); 
    671         curr_directory_path = curr_directory_path.substring(0, curr_directory_path.length()-2); // cut off /. at end 
    672         //System.err.println("@@@ curr_directory_path: " + curr_directory_path); 
     670        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); 
     671         
     672        //String curr_directory_path = FilenameEncoding.filenameToURLEncoding("."); 
     673        //curr_directory_path = curr_directory_path.substring(0, curr_directory_path.length()-2); // cut off /. at end 
     674        System.err.println("@@@ curr_directory_path: " + curr_directory_path); 
    673675         
    674676        //System.err.println("PARSED loaded_file contains:\n" +  XMLTools.elementToString(doc.getDocumentElement(), true)); 
     
    687689                    // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements 
    688690                     
    689                     //System.err.println("Filename before reencoding was: " + filename);                     
     691                    System.err.println("Filename before reencoding was: " + filename);                   
    690692                     
    691693                    // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object 
    692694                    // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C. 
    693695                    String encoded_filename = filename.replace("\\", "%5C");  
     696                     
     697                    /* 
    694698                    encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename); 
    695  
    696699                    // now lop off the metadataxml dir prefix the FilenameEncoding.filenameToURLEncoding(STRING) variant would have added 
    697700                    encoded_filename = encoded_filename.substring(curr_directory_path.length()); 
     
    699702                        encoded_filename = encoded_filename.substring(FilenameEncoding.URL_FILE_SEPARATOR.length()); 
    700703                    } 
    701      
     704                    */ 
     705                     
     706                    // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed 
     707                    encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path); 
     708                     
    702709                    // Reintrodudce the backslash characters in place of their %5C hex placeholders 
    703710                    encoded_filename = encoded_filename.replace("%5C", "\\");                
     
    705712                    // Update filename element in DOM 
    706713                    XMLTools.setElementTextValue(filename_element, encoded_filename); 
    707                     //System.err.println("Filename after reencoding was: " + encoded_filename); 
     714                    System.err.println("Filename after reencoding was: " + encoded_filename); 
    708715                } 
    709716            }