source: main/trunk/greenstone2/common-src/src/jdbmedit/Txt2Jdb.java@ 21400

Last change on this file since 21400 was 21395, checked in by davidb, 14 years ago

Initial implementation based on JDBM rather than GDBM to allow (one day) for Java-only based database to complement Java-based Lucene indexing

File size: 5.9 KB
Line 
1/**********************************************************************
2 *
3 * Txt2Jdb.java --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26import java.io.BufferedInputStream;
27import java.io.InputStream;
28
29import jdbm.RecordManager;
30import jdbm.RecordManagerFactory;
31import jdbm.helper.FastIterator;
32import jdbm.htree.HTree;
33
34import java.io.IOException;
35import java.util.Properties;
36
37
38public class Txt2Jdb
39{
40 static String TNAME = "greenstone";
41
42 RecordManager recman_;
43 HTree hashtable_;
44
45
46 public Txt2Jdb(String db_filename, boolean append)
47 throws IOException
48 {
49 // create or open a record manager
50 Properties props = new Properties();
51 recman_ = RecordManagerFactory.createRecordManager(db_filename, props);
52
53 // create new table or (if append) load existing table
54 long recid = recman_.getNamedObject(TNAME);
55 if (append) {
56 if (recid != 0) {
57 System.out.println("Appending to existing database table '" + TNAME +"' ...");
58 hashtable_ = HTree.load(recman_, recid);
59 }
60 else {
61 System.out.println("No database table '" + TNAME +"' to append to. Creating new one");
62 hashtable_ = HTree.createInstance(recman_);
63 recman_.setNamedObject(TNAME, hashtable_.getRecid());
64 }
65 }
66 else {
67 System.out.println("Creating new database table '" + TNAME +"' ...");
68 hashtable_ = HTree.createInstance(recman_);
69 recman_.setNamedObject(TNAME, hashtable_.getRecid());
70 }
71 }
72
73
74 // Helper function to fill in for C++ equivalent
75 // Assumes InputStream is a file or standard-in
76 // (no some other form of IO mapped device)
77 public boolean eof(InputStream is)
78 throws IOException
79 {
80 return (is.available() == 0);
81 }
82
83 public void txt2db()
84 throws IOException
85 {
86
87 // Port of main txt2db code (originally written in C++)
88 // Note: might not be most efficient way to do things in Java
89
90 StringBuffer key;
91 StringBuffer value;
92 StringBuffer tmp;
93 int num_dashes = 0;
94
95 boolean delkey = false;
96
97 BufferedInputStream bis = new BufferedInputStream(System.in);
98
99 int c = bis.read();
100 while (!eof(bis)) {
101 num_dashes = 0;
102 key = new StringBuffer();
103 value = new StringBuffer();
104
105 // Parse out 'key' from [key]\n
106
107 // scan for first occurrence of [
108 while (!eof(bis) && c != '[') {
109 c = bis.read();
110 }
111
112
113 if (!eof(bis)) {
114 c = bis.read(); // skip [
115 }
116
117
118 // now look for closing ], building up 'key' as we go
119 while (!eof(bis) && c != ']') {
120 key.append ((char)c);
121 c = bis.read();
122 }
123
124 if (!eof(bis)) {
125 // most likely an eol char, but if '-', then signifies record
126 // is to be deleted, not added
127 c = bis.read();
128 if (c == '-') {
129 delkey = true;
130 }
131 else {
132 delkey = false;
133 }
134 }
135
136 while (!eof(bis) && (c == '\n' || c == '\r')) {
137 c = bis.read();
138 }
139
140 // look for 70 dashes
141 tmp = new StringBuffer();
142 while (!eof(bis) && (num_dashes < 70)) {
143 if (c == '\n') {
144 tmp.append((char)c);
145 num_dashes = 0;
146
147 } else if (c == '\r') {
148 // Here we are able to process both Windows-specific
149 // text files (containing carriage-return, newline) and
150 // Linux text files (containing only newline
151 // characters) by ignoring the Windows' carriage-return
152 // altogether so that we produce a uniform database
153 // file from either system's type of text file.
154 //
155 // If we don't ignore the carriage return here, txt.gz
156 // files produced on Windows cause a GS library running
157 // on Linux to break.
158 num_dashes = 0;
159
160 } else if (c == '-') {
161 tmp.append((char)c);
162 ++num_dashes;
163
164 } else {
165 value.append(tmp);
166 value.append((char)c);
167 tmp = new StringBuffer();
168 num_dashes = 0;
169 }
170 c = bis.read();
171 }
172
173 // if the key is not an empty string store this key-value pair
174 if (key.length()>0) {
175 String key_str = key.toString();
176
177 if (delkey) {
178 // delete the given key
179 hashtable_.remove(key_str);
180 }
181 else {
182 // add/append
183 String value_str = value.toString();
184 hashtable_.put(key_str,value_str);
185 recman_.commit();
186 }
187 }
188 }
189
190 recman_.close();
191 }
192
193
194
195 public static void print_usage()
196 {
197 System.err.println("Usage: java Txt2Jdb [-append] database-name");
198 System.exit(-1);
199 }
200
201
202 public static void main(String[] args)
203 {
204 int argc = args.length;
205
206 // sanity check
207 if (argc != 1 && argc != 2) {
208 print_usage();
209 }
210
211 String dbname;
212 boolean append = false;
213 boolean delkey = false;
214
215 if (argc == 2) {
216 dbname = args[1];
217 if (args[0].equals("-append")) {
218 append = true;
219 } else {
220 System.err.println(args[0] + " is not a valid option.");
221 print_usage();
222 }
223 } else {
224 dbname = args[0];
225 }
226
227 try {
228
229 Txt2Jdb table = new Txt2Jdb(dbname,append);
230 table.txt2db();
231 }
232 catch (IOException e) {
233 e.printStackTrace();
234 }
235
236
237
238 }
239
240}
241
242
Note: See TracBrowser for help on using the repository browser.