Ignore:
Timestamp:
2010-01-04T17:40:03+13:00 (14 years ago)
Author:
davidb
Message:

Code was working for Ascii characters, but not for Unicode values > 128. More careful prescription of utf-8 used in code

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/src/jdbmedit/Txt2Jdb.java

    r21395 r21403  
    2626import java.io.BufferedInputStream;
    2727import java.io.InputStream;
     28import java.io.IOException;
     29import java.io.InputStreamReader;
     30import java.io.BufferedReader;
     31import java.util.Properties;
    2832
    2933import jdbm.RecordManager;
     
    3236import jdbm.htree.HTree;
    3337
    34 import java.io.IOException;
    35 import java.util.Properties;
    36 
    3738
    3839public class Txt2Jdb
     
    4748    throws IOException
    4849    {
     50    if (db_filename.endsWith(".jdb")) {
     51        // remove file extension as JDBM does not expect it
     52        db_filename = db_filename.substring(0,db_filename.length()-4);
     53    }
     54
    4955        // create or open a record manager
    5056        Properties props = new Properties();
     
    7581    // Assumes InputStream is a file or standard-in 
    7682    // (no some other form of IO mapped device)
    77     public boolean eof(InputStream is)
     83    //public boolean eof(InputStream is)
     84    public boolean eof(int c)
    7885    throws IOException
    7986    {
    80     return (is.available() == 0);
     87    return (c == -1);
    8188    }
    8289
     
    95102    boolean delkey = false;
    96103
    97     BufferedInputStream bis = new BufferedInputStream(System.in);
    98    
     104
     105    InputStreamReader isr = new InputStreamReader(System.in,"UTF-8");
     106    BufferedReader bis = new BufferedReader(isr);
     107
    99108    int c = bis.read();
    100     while (!eof(bis)) {
     109    while (!eof(c)) {
    101110        num_dashes = 0;
    102111        key = new StringBuffer();
     
    106115       
    107116        // scan for first occurrence of [
    108         while (!eof(bis) && c != '[') {
    109         c = bis.read();
    110         }
    111 
    112        
    113         if (!eof(bis)) {
     117        while (!eof(c) && c != '[') {
     118        c = bis.read();
     119        }
     120
     121       
     122        if (!eof(c)) {
    114123        c = bis.read(); // skip [
    115124        }
     
    117126       
    118127        // now look for closing ], building up 'key' as we go
    119         while (!eof(bis) && c != ']') {
     128        while (!eof(c) && c != ']') {
    120129        key.append ((char)c);
    121130        c = bis.read();
    122131        }
    123132       
    124         if (!eof(bis)) {
     133        if (!eof(c)) {
    125134        // most likely an eol char, but if '-', then signifies record
    126135        // is to be deleted, not added
     
    134143        }
    135144
    136         while (!eof(bis) && (c == '\n' || c == '\r')) {
     145        while (!eof(c) && (c == '\n' || c == '\r')) {
    137146        c = bis.read();
    138147        }
     
    140149        // look for 70 dashes
    141150        tmp = new StringBuffer();
    142         while (!eof(bis) && (num_dashes < 70)) {
     151        while (!eof(c) && (num_dashes < 70)) {
    143152        if (c == '\n') {
    144153            tmp.append((char)c);
Note: See TracChangeset for help on using the changeset viewer.