/********************************************************************** * * Txt2Jdb.java -- * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Copyright (C) 1999 The New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **********************************************************************/ import java.io.BufferedInputStream; import java.io.InputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.BufferedReader; import java.util.Properties; import jdbm.RecordManager; import jdbm.RecordManagerFactory; import jdbm.helper.FastIterator; import jdbm.htree.HTree; public class Txt2Jdb { static String TNAME = "greenstone"; RecordManager recman_; HTree hashtable_; public Txt2Jdb(String db_filename, boolean append) throws IOException { if (db_filename.endsWith(".jdb")) { // remove file extension as JDBM does not expect it db_filename = db_filename.substring(0,db_filename.length()-4); } // create or open a record manager Properties props = new Properties(); recman_ = RecordManagerFactory.createRecordManager(db_filename, props); // create new table or (if append) load existing table long recid = recman_.getNamedObject(TNAME); if (append) { if (recid != 0) { System.out.println("Appending to existing database table '" + TNAME +"' ..."); hashtable_ = HTree.load(recman_, recid); } else { System.out.println("No database table '" + TNAME +"' to append to. Creating new one"); hashtable_ = HTree.createInstance(recman_); recman_.setNamedObject(TNAME, hashtable_.getRecid()); } } else { System.out.println("Creating new database table '" + TNAME +"' ..."); hashtable_ = HTree.createInstance(recman_); recman_.setNamedObject(TNAME, hashtable_.getRecid()); } } // Helper function to fill in for C++ equivalent // Assumes InputStream is a file or standard-in // (no some other form of IO mapped device) //public boolean eof(InputStream is) public boolean eof(int c) throws IOException { return (c == -1); } public void txt2db() throws IOException { // Port of main txt2db code (originally written in C++) // Note: might not be most efficient way to do things in Java StringBuffer key; StringBuffer value; StringBuffer tmp; int num_dashes = 0; boolean delkey = false; InputStreamReader isr = new InputStreamReader(System.in,"UTF-8"); BufferedReader bis = new BufferedReader(isr); int c = bis.read(); while (!eof(c)) { num_dashes = 0; key = new StringBuffer(); value = new StringBuffer(); // Parse out 'key' from [key]\n // scan for first occurrence of [ while (!eof(c) && c != '[') { c = bis.read(); } if (!eof(c)) { c = bis.read(); // skip [ } // now look for closing ], building up 'key' as we go while (!eof(c) && c != ']') { key.append ((char)c); c = bis.read(); } if (!eof(c)) { // most likely an eol char, but if '-', then signifies record // is to be deleted, not added c = bis.read(); if (c == '-') { delkey = true; } else { delkey = false; } } while (!eof(c) && (c == '\n' || c == '\r')) { c = bis.read(); } // look for 70 dashes tmp = new StringBuffer(); while (!eof(c) && (num_dashes < 70)) { if (c == '\n') { tmp.append((char)c); num_dashes = 0; } else if (c == '\r') { // Here we are able to process both Windows-specific // text files (containing carriage-return, newline) and // Linux text files (containing only newline // characters) by ignoring the Windows' carriage-return // altogether so that we produce a uniform database // file from either system's type of text file. // // If we don't ignore the carriage return here, txt.gz // files produced on Windows cause a GS library running // on Linux to break. num_dashes = 0; } else if (c == '-') { tmp.append((char)c); ++num_dashes; } else { value.append(tmp); value.append((char)c); tmp = new StringBuffer(); num_dashes = 0; } c = bis.read(); } // if the key is not an empty string store this key-value pair if (key.length()>0) { String key_str = key.toString(); if (delkey) { // delete the given key hashtable_.remove(key_str); } else { // add/append String value_str = value.toString(); hashtable_.put(key_str,value_str); recman_.commit(); } } } recman_.close(); } public static void print_usage() { System.err.println("Usage: java Txt2Jdb [-append] database-name"); System.exit(-1); } public static void main(String[] args) { int argc = args.length; // sanity check if (argc != 1 && argc != 2) { print_usage(); } String dbname; boolean append = false; boolean delkey = false; if (argc == 2) { dbname = args[1]; if (args[0].equals("-append")) { append = true; } else { System.err.println(args[0] + " is not a valid option."); print_usage(); } } else { dbname = args[0]; } try { Txt2Jdb table = new Txt2Jdb(dbname,append); table.txt2db(); } catch (IOException e) { e.printStackTrace(); } } }