Changeset 29793


Ignore:
Timestamp:
03/19/15 13:53:24 (6 years ago)
Author:
ak19
Message:

some more changes to make adding metadata to filenames with non latin1 characters work. previously, any char above 127 was being output as %XX. this doesn't work as higher values can end up like %101. but when you decode you only ever look for 2 digits after the %. so higher values we'll use entities like ā then had to modify the xml writing to not escape the text in hte filename element - otherwise get ā

Location:
main/trunk/gli/src/org/greenstone/gatherer/metadata
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r23436 r29793  
    3232import java.util.*;
    3333import org.greenstone.gatherer.collection.CollectionManager;
    34 
     34import org.greenstone.gatherer.DebugStream;
    3535
    3636/** Static access class that contains many of the methods used to work with filename encodings.
     
    347347           
    348348            String filename_ascii = filename_uri.toASCIIString();
     349            DebugStream.println("ascii = "+filename_ascii);
    349350            String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
     351            DebugStream.println("raw = "+filename_raw_bytes);
    350352            filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
     353           
    351354        }
    352355        catch (Exception e) {
     
    355358            filename_url_encoded = file.getAbsolutePath();
    356359        }
     360            DebugStream.println("returning "+filename_url_encoded);
    357361        return filename_url_encoded;
    358362    }
     
    368372    {
    369373        String urlEncoded = "";
    370 
     374DebugStream.println("in iso 8859 to url encoded, "+raw_bytes_filename);
    371375        try {
    372376            // By this point we have a UTF-8 encoded string that captures
     
    386390            String unicode_filename = new String(raw_bytes,"UTF-8");
    387391           
     392                //urlEncoded = URLEncoder.encode(unicode_filename, "UTF-8");
    388393            for(int i = 0; i < unicode_filename.length(); i++) {
    389394            char charVal = unicode_filename.charAt(i);
    390             if((int)charVal > 127) {
     395            if ((int)charVal > 255) {
     396                urlEncoded += String.format("&#x%02X;", (int)charVal);
     397                    //urlEncoded += String.format("\\u%04X", (int)charVal);
     398            }
     399            else if((int)charVal > 127) {
    391400                urlEncoded += String.format("%%%02X", (int)charVal);
     401                    //urlEncoded += Integer.toHexString((int)charVal);
    392402            } else {
    393403                urlEncoded += String.format("%c", (char)charVal);
     
    399409            throw(e);
    400410        }
    401 
     411DebugStream.println("returning "+urlEncoded);
    402412        return urlEncoded;
    403413    }
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java

    r23433 r29793  
    4545    static final private String FILESET_ELEMENT = "FileSet";
    4646    static final private String METADATA_ELEMENT = "Metadata";
    47 
     47    static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
     48   
    4849    /** Special metadata field: the filename encoding is a unique sort of metadata in
    4950     * that it is not just information stored with a collection file, but also needs to
     
    123124    // If no appropriate FileSet element exists create a new one for this file
    124125    if (appropriate_fileset_element == null) {
    125         DebugStream.println("Creating new FileSet element for file since none exists...");
     126        DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
    126127        appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
    127128
     
    605606    // If we have a file loaded into memory and it has been modified, save it now
    606607    if (loaded_file != null && loaded_file_changed == true) {
    607         XMLTools.writeXMLFile(loaded_file, loaded_file_document);
     608        XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
     609   
     610       
    608611        loaded_file_changed = false;
    609612    }
Note: See TracChangeset for help on using the changeset viewer.