Changeset 29793

Show
Ignore:
Timestamp:
19.03.2015 13:53:24 (5 years ago)
Author:
ak19
Message:

some more changes to make adding metadata to filenames with non latin1 characters work. previously, any char above 127 was being output as %XX. this doesn't work as higher values can end up like %101. but when you decode you only ever look for 2 digits after the %. so higher values we'll use entities like ā then had to modify the xml writing to not escape the text in hte filename element - otherwise get ā

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r23436 r29793  
    3232import java.util.*; 
    3333import org.greenstone.gatherer.collection.CollectionManager; 
    34  
     34import org.greenstone.gatherer.DebugStream; 
    3535 
    3636/** Static access class that contains many of the methods used to work with filename encodings.  
     
    347347             
    348348            String filename_ascii = filename_uri.toASCIIString(); 
     349            DebugStream.println("ascii = "+filename_ascii); 
    349350            String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1"); 
     351            DebugStream.println("raw = "+filename_raw_bytes); 
    350352            filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes); 
     353             
    351354        } 
    352355        catch (Exception e) { 
     
    355358            filename_url_encoded = file.getAbsolutePath();  
    356359        } 
     360            DebugStream.println("returning "+filename_url_encoded); 
    357361        return filename_url_encoded; 
    358362    } 
     
    368372    { 
    369373        String urlEncoded = ""; 
    370  
     374DebugStream.println("in iso 8859 to url encoded, "+raw_bytes_filename); 
    371375        try { 
    372376            // By this point we have a UTF-8 encoded string that captures 
     
    386390            String unicode_filename = new String(raw_bytes,"UTF-8"); 
    387391             
     392                //urlEncoded = URLEncoder.encode(unicode_filename, "UTF-8"); 
    388393            for(int i = 0; i < unicode_filename.length(); i++) { 
    389394            char charVal = unicode_filename.charAt(i); 
    390             if((int)charVal > 127) { 
     395            if ((int)charVal > 255) { 
     396                urlEncoded += String.format("&#x%02X;", (int)charVal); 
     397                    //urlEncoded += String.format("\\u%04X", (int)charVal); 
     398            } 
     399            else if((int)charVal > 127) { 
    391400                urlEncoded += String.format("%%%02X", (int)charVal); 
     401                    //urlEncoded += Integer.toHexString((int)charVal); 
    392402            } else { 
    393403                urlEncoded += String.format("%c", (char)charVal); 
     
    399409            throw(e); 
    400410        } 
    401  
     411DebugStream.println("returning "+urlEncoded); 
    402412        return urlEncoded; 
    403413    } 
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r23433 r29793  
    4545    static final private String FILESET_ELEMENT = "FileSet"; 
    4646    static final private String METADATA_ELEMENT = "Metadata"; 
    47  
     47    static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT}; 
     48     
    4849    /** Special metadata field: the filename encoding is a unique sort of metadata in 
    4950     * that it is not just information stored with a collection file, but also needs to 
     
    123124    // If no appropriate FileSet element exists create a new one for this file 
    124125    if (appropriate_fileset_element == null) { 
    125         DebugStream.println("Creating new FileSet element for file since none exists..."); 
     126        DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp); 
    126127        appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT); 
    127128 
     
    605606    // If we have a file loaded into memory and it has been modified, save it now 
    606607    if (loaded_file != null && loaded_file_changed == true) { 
    607         XMLTools.writeXMLFile(loaded_file, loaded_file_document); 
     608        XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements); 
     609     
     610         
    608611        loaded_file_changed = false; 
    609612    }