Ignore:
Timestamp:
2015-03-19T13:53:24+13:00 (9 years ago)
Author:
ak19
Message:

some more changes to make adding metadata to filenames with non latin1 characters work. previously, any char above 127 was being output as %XX. this doesn't work as higher values can end up like %101. but when you decode you only ever look for 2 digits after the %. so higher values we'll use entities like ā then had to modify the xml writing to not escape the text in hte filename element - otherwise get ā

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r23436 r29793  
    3232import java.util.*;
    3333import org.greenstone.gatherer.collection.CollectionManager;
    34 
     34import org.greenstone.gatherer.DebugStream;
    3535
    3636/** Static access class that contains many of the methods used to work with filename encodings.
     
    347347           
    348348            String filename_ascii = filename_uri.toASCIIString();
     349            DebugStream.println("ascii = "+filename_ascii);
    349350            String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
     351            DebugStream.println("raw = "+filename_raw_bytes);
    350352            filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
     353           
    351354        }
    352355        catch (Exception e) {
     
    355358            filename_url_encoded = file.getAbsolutePath();
    356359        }
     360            DebugStream.println("returning "+filename_url_encoded);
    357361        return filename_url_encoded;
    358362    }
     
    368372    {
    369373        String urlEncoded = "";
    370 
     374DebugStream.println("in iso 8859 to url encoded, "+raw_bytes_filename);
    371375        try {
    372376            // By this point we have a UTF-8 encoded string that captures
     
    386390            String unicode_filename = new String(raw_bytes,"UTF-8");
    387391           
     392                //urlEncoded = URLEncoder.encode(unicode_filename, "UTF-8");
    388393            for(int i = 0; i < unicode_filename.length(); i++) {
    389394            char charVal = unicode_filename.charAt(i);
    390             if((int)charVal > 127) {
     395            if ((int)charVal > 255) {
     396                urlEncoded += String.format("&#x%02X;", (int)charVal);
     397                    //urlEncoded += String.format("\\u%04X", (int)charVal);
     398            }
     399            else if((int)charVal > 127) {
    391400                urlEncoded += String.format("%%%02X", (int)charVal);
     401                    //urlEncoded += Integer.toHexString((int)charVal);
    392402            } else {
    393403                urlEncoded += String.format("%c", (char)charVal);
     
    399409            throw(e);
    400410        }
    401 
     411DebugStream.println("returning "+urlEncoded);
    402412        return urlEncoded;
    403413    }
Note: See TracChangeset for help on using the changeset viewer.