[21395] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * Txt2Jdb.java --
|
---|
| 4 | * A component of the Greenstone digital library software
|
---|
| 5 | * from the New Zealand Digital Library Project at the
|
---|
| 6 | * University of Waikato, New Zealand.
|
---|
| 7 | *
|
---|
| 8 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 9 | *
|
---|
| 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
| 24 | **********************************************************************/
|
---|
| 25 |
|
---|
| 26 | import java.io.BufferedInputStream;
|
---|
| 27 | import java.io.InputStream;
|
---|
[21403] | 28 | import java.io.IOException;
|
---|
| 29 | import java.io.InputStreamReader;
|
---|
| 30 | import java.io.BufferedReader;
|
---|
| 31 | import java.util.Properties;
|
---|
[21395] | 32 |
|
---|
| 33 | import jdbm.RecordManager;
|
---|
| 34 | import jdbm.RecordManagerFactory;
|
---|
| 35 | import jdbm.helper.FastIterator;
|
---|
| 36 | import jdbm.htree.HTree;
|
---|
| 37 |
|
---|
| 38 |
|
---|
| 39 | public class Txt2Jdb
|
---|
| 40 | {
|
---|
| 41 | static String TNAME = "greenstone";
|
---|
| 42 |
|
---|
| 43 | RecordManager recman_;
|
---|
| 44 | HTree hashtable_;
|
---|
| 45 |
|
---|
| 46 |
|
---|
| 47 | public Txt2Jdb(String db_filename, boolean append)
|
---|
| 48 | throws IOException
|
---|
| 49 | {
|
---|
[21403] | 50 | if (db_filename.endsWith(".jdb")) {
|
---|
| 51 | // remove file extension as JDBM does not expect it
|
---|
| 52 | db_filename = db_filename.substring(0,db_filename.length()-4);
|
---|
| 53 | }
|
---|
| 54 |
|
---|
[21395] | 55 | // create or open a record manager
|
---|
| 56 | Properties props = new Properties();
|
---|
| 57 | recman_ = RecordManagerFactory.createRecordManager(db_filename, props);
|
---|
| 58 |
|
---|
| 59 | // create new table or (if append) load existing table
|
---|
| 60 | long recid = recman_.getNamedObject(TNAME);
|
---|
| 61 | if (append) {
|
---|
| 62 | if (recid != 0) {
|
---|
| 63 | System.out.println("Appending to existing database table '" + TNAME +"' ...");
|
---|
| 64 | hashtable_ = HTree.load(recman_, recid);
|
---|
| 65 | }
|
---|
| 66 | else {
|
---|
| 67 | System.out.println("No database table '" + TNAME +"' to append to. Creating new one");
|
---|
| 68 | hashtable_ = HTree.createInstance(recman_);
|
---|
| 69 | recman_.setNamedObject(TNAME, hashtable_.getRecid());
|
---|
| 70 | }
|
---|
| 71 | }
|
---|
| 72 | else {
|
---|
| 73 | System.out.println("Creating new database table '" + TNAME +"' ...");
|
---|
| 74 | hashtable_ = HTree.createInstance(recman_);
|
---|
| 75 | recman_.setNamedObject(TNAME, hashtable_.getRecid());
|
---|
| 76 | }
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 |
|
---|
| 80 | // Helper function to fill in for C++ equivalent
|
---|
| 81 | // Assumes InputStream is a file or standard-in
|
---|
| 82 | // (no some other form of IO mapped device)
|
---|
[21403] | 83 | //public boolean eof(InputStream is)
|
---|
| 84 | public boolean eof(int c)
|
---|
[21395] | 85 | throws IOException
|
---|
| 86 | {
|
---|
[21403] | 87 | return (c == -1);
|
---|
[21395] | 88 | }
|
---|
| 89 |
|
---|
| 90 | public void txt2db()
|
---|
| 91 | throws IOException
|
---|
| 92 | {
|
---|
| 93 |
|
---|
| 94 | // Port of main txt2db code (originally written in C++)
|
---|
| 95 | // Note: might not be most efficient way to do things in Java
|
---|
| 96 |
|
---|
| 97 | StringBuffer key;
|
---|
| 98 | StringBuffer value;
|
---|
| 99 | StringBuffer tmp;
|
---|
| 100 | int num_dashes = 0;
|
---|
| 101 |
|
---|
| 102 | boolean delkey = false;
|
---|
| 103 |
|
---|
[21403] | 104 |
|
---|
| 105 | InputStreamReader isr = new InputStreamReader(System.in,"UTF-8");
|
---|
| 106 | BufferedReader bis = new BufferedReader(isr);
|
---|
| 107 |
|
---|
[21395] | 108 | int c = bis.read();
|
---|
[21403] | 109 | while (!eof(c)) {
|
---|
[21395] | 110 | num_dashes = 0;
|
---|
| 111 | key = new StringBuffer();
|
---|
| 112 | value = new StringBuffer();
|
---|
| 113 |
|
---|
| 114 | // Parse out 'key' from [key]\n
|
---|
| 115 |
|
---|
| 116 | // scan for first occurrence of [
|
---|
[21403] | 117 | while (!eof(c) && c != '[') {
|
---|
[21395] | 118 | c = bis.read();
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 |
|
---|
[21403] | 122 | if (!eof(c)) {
|
---|
[21395] | 123 | c = bis.read(); // skip [
|
---|
| 124 | }
|
---|
| 125 |
|
---|
| 126 |
|
---|
| 127 | // now look for closing ], building up 'key' as we go
|
---|
[21403] | 128 | while (!eof(c) && c != ']') {
|
---|
[21395] | 129 | key.append ((char)c);
|
---|
| 130 | c = bis.read();
|
---|
| 131 | }
|
---|
| 132 |
|
---|
[21403] | 133 | if (!eof(c)) {
|
---|
[21395] | 134 | // most likely an eol char, but if '-', then signifies record
|
---|
| 135 | // is to be deleted, not added
|
---|
| 136 | c = bis.read();
|
---|
| 137 | if (c == '-') {
|
---|
| 138 | delkey = true;
|
---|
| 139 | }
|
---|
| 140 | else {
|
---|
| 141 | delkey = false;
|
---|
| 142 | }
|
---|
| 143 | }
|
---|
| 144 |
|
---|
[21403] | 145 | while (!eof(c) && (c == '\n' || c == '\r')) {
|
---|
[21395] | 146 | c = bis.read();
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | // look for 70 dashes
|
---|
| 150 | tmp = new StringBuffer();
|
---|
[21403] | 151 | while (!eof(c) && (num_dashes < 70)) {
|
---|
[21395] | 152 | if (c == '\n') {
|
---|
| 153 | tmp.append((char)c);
|
---|
| 154 | num_dashes = 0;
|
---|
| 155 |
|
---|
| 156 | } else if (c == '\r') {
|
---|
| 157 | // Here we are able to process both Windows-specific
|
---|
| 158 | // text files (containing carriage-return, newline) and
|
---|
| 159 | // Linux text files (containing only newline
|
---|
| 160 | // characters) by ignoring the Windows' carriage-return
|
---|
| 161 | // altogether so that we produce a uniform database
|
---|
| 162 | // file from either system's type of text file.
|
---|
| 163 | //
|
---|
| 164 | // If we don't ignore the carriage return here, txt.gz
|
---|
| 165 | // files produced on Windows cause a GS library running
|
---|
| 166 | // on Linux to break.
|
---|
| 167 | num_dashes = 0;
|
---|
| 168 |
|
---|
| 169 | } else if (c == '-') {
|
---|
| 170 | tmp.append((char)c);
|
---|
| 171 | ++num_dashes;
|
---|
| 172 |
|
---|
| 173 | } else {
|
---|
| 174 | value.append(tmp);
|
---|
| 175 | value.append((char)c);
|
---|
| 176 | tmp = new StringBuffer();
|
---|
| 177 | num_dashes = 0;
|
---|
| 178 | }
|
---|
| 179 | c = bis.read();
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | // if the key is not an empty string store this key-value pair
|
---|
| 183 | if (key.length()>0) {
|
---|
| 184 | String key_str = key.toString();
|
---|
| 185 |
|
---|
| 186 | if (delkey) {
|
---|
| 187 | // delete the given key
|
---|
| 188 | hashtable_.remove(key_str);
|
---|
| 189 | }
|
---|
| 190 | else {
|
---|
| 191 | // add/append
|
---|
| 192 | String value_str = value.toString();
|
---|
| 193 | hashtable_.put(key_str,value_str);
|
---|
| 194 | recman_.commit();
|
---|
| 195 | }
|
---|
| 196 | }
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | recman_.close();
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 |
|
---|
| 203 |
|
---|
| 204 | public static void print_usage()
|
---|
| 205 | {
|
---|
| 206 | System.err.println("Usage: java Txt2Jdb [-append] database-name");
|
---|
| 207 | System.exit(-1);
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 |
|
---|
| 211 | public static void main(String[] args)
|
---|
| 212 | {
|
---|
| 213 | int argc = args.length;
|
---|
| 214 |
|
---|
| 215 | // sanity check
|
---|
| 216 | if (argc != 1 && argc != 2) {
|
---|
| 217 | print_usage();
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 | String dbname;
|
---|
| 221 | boolean append = false;
|
---|
| 222 | boolean delkey = false;
|
---|
| 223 |
|
---|
| 224 | if (argc == 2) {
|
---|
| 225 | dbname = args[1];
|
---|
| 226 | if (args[0].equals("-append")) {
|
---|
| 227 | append = true;
|
---|
| 228 | } else {
|
---|
| 229 | System.err.println(args[0] + " is not a valid option.");
|
---|
| 230 | print_usage();
|
---|
| 231 | }
|
---|
| 232 | } else {
|
---|
| 233 | dbname = args[0];
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | try {
|
---|
| 237 |
|
---|
| 238 | Txt2Jdb table = new Txt2Jdb(dbname,append);
|
---|
| 239 | table.txt2db();
|
---|
| 240 | }
|
---|
| 241 | catch (IOException e) {
|
---|
| 242 | e.printStackTrace();
|
---|
| 243 | }
|
---|
| 244 |
|
---|
| 245 |
|
---|
| 246 |
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | }
|
---|
| 250 |
|
---|
| 251 |
|
---|