Ignore:
Timestamp:
2020-09-15T20:26:19+12:00 (4 years ago)
Author:
ak19
Message:

Bugfix 1 for GLI metadata slowdown: selecting multiple Gathererd files in GLI became very slow. Kathy and Dr Bainbridge had tracked this down to code I had added to support non basic ASCII filenames in GLI, which was making an expensive win operating system function call on Windows for each selected file, launching a Java Process for each. The speed of selecting multiple files is now back to being almost as fast as in 3.09. Tested on Windows and linux. Had to treat windows as a special case because I can't get the code modifications to work on Linux: the perl code stores a hex-encoded string for the filename that GLI now uses when OS is Windows and compares against the hex encoded name of a file selected. But on linux the hex encoded value generated by perl is not the same as that which java generates and after trying repeatedly, I'e not been able to succeed to get it to work. So the code behaves as before for Linux.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/util/Utility.java

    r33777 r34394  
    9494    }   
    9595    }
    96 
    97     // Copied from GS3 main java code at GSDL3SRCHOME\src\java\org\greenstone/util\Misc.java
    98     // Debugging function to print a string's non-basic chars in hex, so stringToHex on all non-basic and non-printable ASCII
    99     // Dr Bainbridge said that printing anything with charCode over 128 in hex is okay, but I'd already made extra allowances for non-printable ASCII
    100     // Based on https://stackoverflow.com/questions/923863/converting-a-string-to-hexadecimal-in-java
    101     public static String debugUnicodeString(String str) {
    102       String result = "";
    103       for(int i = 0; i < str.length(); i++) {
    104             int charCode = str.codePointAt(i); // unicode codepoint / ASCII code
    105            
    106             // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
    107             // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
    108             if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing
    109                 result += str.charAt(i);
    110             } else {
    111                 result += "x{" + String.format("%04x", charCode) + "}"; // looks like: x{4-char-codepoint}
    112             }
    113       }
    114      
    115       return result;
    116     }
    117    
    118     /**
     96   
     97    // Copied from GS3 main java code at GSDL3SRCHOME\src\java\org\greenstone/util\Misc.java
     98    // Debugging function to print a string's non-basic chars in hex, so stringToHex on all non-basic and non-printable ASCII
     99    // Dr Bainbridge said that printing anything with charCode over 128 in hex is okay, but I'd already made extra allowances for non-printable ASCII
     100    // Based on https://stackoverflow.com/questions/923863/converting-a-string-to-hexadecimal-in-java
     101    public static String debugUnicodeString(String str) {
     102    String result = "";
     103    for(int i = 0; i < str.length(); i++) {
     104        int charCode = str.codePointAt(i); // unicode codepoint / ASCII code
     105       
     106        // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
     107        // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
     108        if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing
     109        result += str.charAt(i);
     110        } else {
     111        result += "x{" + String.format("%04x", charCode) + "}"; // looks like: x{4-char-codepoint}             
     112        }
     113    }
     114   
     115    return result;
     116    }
     117   
     118    // Version of debugUnicodeString that, on Windows, mimics perl unicode::debug_unicode_string
     119    // exactly by producing hex/unicode codepoints for ALL codepoints beyond ASCII
     120    public static String stringToHex(String str) {
     121    String result = "";
     122    for(int i = 0; i < str.length(); i++) {
     123        int charCode = str.codePointAt(i); // unicode codepoint / ASCII code
     124       
     125        if(charCode <=127) { // ASCII
     126        result += str.charAt(i);
     127        } else { // non-ASCII
     128        result += "\\x{" + String.format("%04x", charCode) + "}"; // looks like: \x{4-char-codepoint}
     129        }
     130    }   
     131   
     132    return result;
     133    }
     134   
     135    /**
    119136     * returns the short filename (8.3) for a file in Windows
    120137     *
Note: See TracChangeset for help on using the changeset viewer.