source: main/trunk/greenstone2/common-src/src/jdbmedit/Txt2Jdb.java@ 21403

Last change on this file since 21403 was 21403, checked in by davidb, 14 years ago

Code was working for Ascii characters, but not for Unicode values > 128. More careful prescription of utf-8 used in code

File size: 6.2 KB
Line 
1/**********************************************************************
2 *
3 * Txt2Jdb.java --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26import java.io.BufferedInputStream;
27import java.io.InputStream;
28import java.io.IOException;
29import java.io.InputStreamReader;
30import java.io.BufferedReader;
31import java.util.Properties;
32
33import jdbm.RecordManager;
34import jdbm.RecordManagerFactory;
35import jdbm.helper.FastIterator;
36import jdbm.htree.HTree;
37
38
39public class Txt2Jdb
40{
41 static String TNAME = "greenstone";
42
43 RecordManager recman_;
44 HTree hashtable_;
45
46
47 public Txt2Jdb(String db_filename, boolean append)
48 throws IOException
49 {
50 if (db_filename.endsWith(".jdb")) {
51 // remove file extension as JDBM does not expect it
52 db_filename = db_filename.substring(0,db_filename.length()-4);
53 }
54
55 // create or open a record manager
56 Properties props = new Properties();
57 recman_ = RecordManagerFactory.createRecordManager(db_filename, props);
58
59 // create new table or (if append) load existing table
60 long recid = recman_.getNamedObject(TNAME);
61 if (append) {
62 if (recid != 0) {
63 System.out.println("Appending to existing database table '" + TNAME +"' ...");
64 hashtable_ = HTree.load(recman_, recid);
65 }
66 else {
67 System.out.println("No database table '" + TNAME +"' to append to. Creating new one");
68 hashtable_ = HTree.createInstance(recman_);
69 recman_.setNamedObject(TNAME, hashtable_.getRecid());
70 }
71 }
72 else {
73 System.out.println("Creating new database table '" + TNAME +"' ...");
74 hashtable_ = HTree.createInstance(recman_);
75 recman_.setNamedObject(TNAME, hashtable_.getRecid());
76 }
77 }
78
79
80 // Helper function to fill in for C++ equivalent
81 // Assumes InputStream is a file or standard-in
82 // (no some other form of IO mapped device)
83 //public boolean eof(InputStream is)
84 public boolean eof(int c)
85 throws IOException
86 {
87 return (c == -1);
88 }
89
90 public void txt2db()
91 throws IOException
92 {
93
94 // Port of main txt2db code (originally written in C++)
95 // Note: might not be most efficient way to do things in Java
96
97 StringBuffer key;
98 StringBuffer value;
99 StringBuffer tmp;
100 int num_dashes = 0;
101
102 boolean delkey = false;
103
104
105 InputStreamReader isr = new InputStreamReader(System.in,"UTF-8");
106 BufferedReader bis = new BufferedReader(isr);
107
108 int c = bis.read();
109 while (!eof(c)) {
110 num_dashes = 0;
111 key = new StringBuffer();
112 value = new StringBuffer();
113
114 // Parse out 'key' from [key]\n
115
116 // scan for first occurrence of [
117 while (!eof(c) && c != '[') {
118 c = bis.read();
119 }
120
121
122 if (!eof(c)) {
123 c = bis.read(); // skip [
124 }
125
126
127 // now look for closing ], building up 'key' as we go
128 while (!eof(c) && c != ']') {
129 key.append ((char)c);
130 c = bis.read();
131 }
132
133 if (!eof(c)) {
134 // most likely an eol char, but if '-', then signifies record
135 // is to be deleted, not added
136 c = bis.read();
137 if (c == '-') {
138 delkey = true;
139 }
140 else {
141 delkey = false;
142 }
143 }
144
145 while (!eof(c) && (c == '\n' || c == '\r')) {
146 c = bis.read();
147 }
148
149 // look for 70 dashes
150 tmp = new StringBuffer();
151 while (!eof(c) && (num_dashes < 70)) {
152 if (c == '\n') {
153 tmp.append((char)c);
154 num_dashes = 0;
155
156 } else if (c == '\r') {
157 // Here we are able to process both Windows-specific
158 // text files (containing carriage-return, newline) and
159 // Linux text files (containing only newline
160 // characters) by ignoring the Windows' carriage-return
161 // altogether so that we produce a uniform database
162 // file from either system's type of text file.
163 //
164 // If we don't ignore the carriage return here, txt.gz
165 // files produced on Windows cause a GS library running
166 // on Linux to break.
167 num_dashes = 0;
168
169 } else if (c == '-') {
170 tmp.append((char)c);
171 ++num_dashes;
172
173 } else {
174 value.append(tmp);
175 value.append((char)c);
176 tmp = new StringBuffer();
177 num_dashes = 0;
178 }
179 c = bis.read();
180 }
181
182 // if the key is not an empty string store this key-value pair
183 if (key.length()>0) {
184 String key_str = key.toString();
185
186 if (delkey) {
187 // delete the given key
188 hashtable_.remove(key_str);
189 }
190 else {
191 // add/append
192 String value_str = value.toString();
193 hashtable_.put(key_str,value_str);
194 recman_.commit();
195 }
196 }
197 }
198
199 recman_.close();
200 }
201
202
203
204 public static void print_usage()
205 {
206 System.err.println("Usage: java Txt2Jdb [-append] database-name");
207 System.exit(-1);
208 }
209
210
211 public static void main(String[] args)
212 {
213 int argc = args.length;
214
215 // sanity check
216 if (argc != 1 && argc != 2) {
217 print_usage();
218 }
219
220 String dbname;
221 boolean append = false;
222 boolean delkey = false;
223
224 if (argc == 2) {
225 dbname = args[1];
226 if (args[0].equals("-append")) {
227 append = true;
228 } else {
229 System.err.println(args[0] + " is not a valid option.");
230 print_usage();
231 }
232 } else {
233 dbname = args[0];
234 }
235
236 try {
237
238 Txt2Jdb table = new Txt2Jdb(dbname,append);
239 table.txt2db();
240 }
241 catch (IOException e) {
242 e.printStackTrace();
243 }
244
245
246
247 }
248
249}
250
251
Note: See TracBrowser for help on using the repository browser.