source: gs2-extensions/tdb-edit/trunk/src/src/txt2tdb-src/txt2tdb.cpp@ 23992

Last change on this file since 23992 was 23992, checked in by jmt12, 13 years ago

Initial checkin of txt2tdb wrapper code

File size: 5.9 KB
Line 
1/**********************************************************************
2 *
3 * txt2tdb.cpp -- A utility to convert a stream of text, ala buildproc
4 * encoded output, into a TDB file.
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * Copyright (C) 2011 The New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 **********************************************************************/
27
28//#include <stdlib.h>
29//#include <cstring>
30//#include <fcntl.h>
31#include <iostream>
32
33//#if defined(GSDL_USE_OBJECTSPACE)
34//# include <ospace\std\iostream>
35//#elif defined(GSDL_USE_IOS_H)
36//# include <iostream.h>
37//#else
38//# include <iostream>
39//#endif
40
41#include "tdb.h"
42#include "text_t.h"
43
44using namespace std;
45
46void
47printUsage (char *program_name)
48{
49 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
50 cerr << "options:" << endl;
51 cerr << " -append append to existing database" << endl << endl;
52}
53/** printUsage() **/
54
55int
56main (int argc, char *argv[])
57{
58 // sanity check
59 if (argc != 2 && argc != 3)
60 {
61 printUsage (argv[0]);
62 exit (0);
63 }
64
65 char *dbname;
66 int append = 0;
67 int delkey = 0;
68 if (argc == 3)
69 {
70 if (strcmp (argv[1], "-append") == 0)
71 {
72 append = 1;
73 dbname = argv[2];
74 }
75 else
76 {
77 cerr << argv[1] << " is not a valid option." << endl << endl;
78 printUsage (argv[0]);
79 exit (0);
80 }
81 }
82 else
83 {
84 dbname = argv[1];
85 }
86
87 // open the database
88 int hash_size = 0;
89 int tdb_flags = TDB_DEFAULT; // Default = 0
90 if (append == 0)
91 {
92 tdb_flags = TDB_CLEAR_IF_FIRST;
93 }
94 int tdb_store_flags = TDB_DEFAULT; // used later when storing
95 int open_flags = O_RDWR | O_CREAT;
96 TDB_CONTEXT *tdb = tdb_open(dbname, hash_size, tdb_flags, open_flags, 0664);
97 if (!tdb)
98 {
99 cerr << "couldn't create " << dbname << endl;
100 exit (0);
101 }
102
103 char c;
104 cin.get(c);
105 while (!cin.eof())
106 {
107 int num_dashes = 0;
108 text_t key = "";
109 text_t value = "";
110
111 // Parse out 'key' from [key]\n
112 // - scan for first occurrence of [
113 while (!cin.eof() && c != '[')
114 {
115 cin.get(c);
116 }
117 // - skip [
118 if (!cin.eof())
119 {
120 cin.get(c);
121 }
122 // - now look for closing ], building up 'key' as we go
123 while (!cin.eof() && c != ']')
124 {
125 key.push_back ((unsigned char)c);
126 cin.get(c);
127 }
128 if (!cin.eof())
129 {
130 // most likely an eol char, but if '-', then signifies record
131 // is to be deleted, not added
132 cin.get(c);
133 if (c == '-') {
134 delkey = 1;
135 }
136 else {
137 delkey = 0;
138 }
139 }
140 while (!cin.eof() && (c == '\n' || c == '\r'))
141 {
142 cin.get(c);
143 }
144 // - read in the value, watching for 70 dashes (the end)
145 text_t tmp = "";
146 while (!cin.eof() && (num_dashes < 70))
147 {
148 if (c == '\n')
149 {
150 tmp.push_back ((unsigned char)c);
151 num_dashes = 0;
152 }
153 else if (c == '\r')
154 {
155 // Here we are able to process both Windows-specific text files
156 // (containing carriage-return, newline) and Linux text files
157 // (containing only newline characters) by ignoring the Windows'
158 // carriage-return altogether so that we produce a uniform database
159 // file from either system's type of text file.
160 // If we don't ignore the carriage return here, txt.gz files
161 // produced on Windows cause a GS library running on Linux to break.
162 num_dashes = 0;
163 }
164 else if (c == '-')
165 {
166 tmp.push_back ((unsigned char)c);
167 ++num_dashes;
168 }
169 else
170 {
171 value += tmp;
172 value.push_back ((unsigned char)c);
173 tmp = "";
174 num_dashes = 0;
175 }
176 cin.get(c);
177 }
178
179 // We should now have a key/value pair. If the key is not an empty string
180 // store this key-value pair
181 if (!key.empty())
182 {
183 // convert key to a datum datatype
184 TDB_DATA key_data;
185 // [why are cstrings from text_t not unsigned? from what I can see we
186 // explicitly cast to char * when we return]
187 key_data.dptr = (unsigned char*)key.getcstr();
188 if (key_data.dptr == NULL)
189 {
190 cerr << "NULL key_data.dptr" << endl;
191 exit (0);
192 }
193 key_data.dsize = key.size();
194 // - delete the given key if we've been asked to
195 if (delkey)
196 {
197 if (tdb_delete(tdb, key_data) < 0)
198 {
199 cerr << "tdb_delete returned an error" << endl;
200 }
201 }
202 // - otherwise add
203 else {
204 // - convert value to a datum datatype
205 TDB_DATA value_data;
206 value_data.dptr = (unsigned char*)value.getcstr();
207 if (value_data.dptr == NULL)
208 {
209 cerr << "NULL value_data.dptr" << endl;
210 exit (0);
211 }
212 value_data.dsize = value.size();
213 // - store the value
214 if (tdb_store(tdb, key_data, value_data, tdb_store_flags) < 0)
215 {
216 cerr << "tdb_store returned an error" << endl;
217 exit (0);
218 }
219 free(value_data.dptr);
220 }
221 free(key_data.dptr);
222 }
223 }
224
225 // Close the database connection
226 if (tdb_close(tdb) < 0)
227 {
228 cerr << "tdb_close returned an error" << endl;
229 exit (0);
230 }
231
232 return 0;
233}
Note: See TracBrowser for help on using the repository browser.