source: gs2-extensions/tdb-edit/trunk/src/src/txt2tdb-src/txt2tdb.cpp@ 24365

Last change on this file since 24365 was 24365, checked in by jmt12, 13 years ago

Explicitly include cstdlib as modern compilers don't add by default

File size: 7.7 KB
Line 
1/**********************************************************************
2 *
3 * txt2tdb.cpp -- A utility to convert a stream of text, ala buildproc
4 * encoded output, into a TDB file.
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * Copyright (C) 2011 The New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 **********************************************************************/
27
28#if defined(GSDL_USE_OBJECTSPACE)
29#include <ospace\std\iostream>
30#elif defined(GSDL_USE_IOS_H)
31#include <iostream.h>
32#else
33#include <iostream>
34#endif
35
36#include <cstdlib>
37
38#include <time.h>
39
40#include "tdb.h"
41#include "text_t.h"
42
43// use the standard namespace
44#if !defined (GSDL_NAMESPACE_BROKEN)
45#if defined(GSDL_USE_OBJECTSPACE)
46using namespace ospace::std;
47#else
48using namespace std;
49#endif
50#endif
51
52/**
53 */
54void
55printUsage (char *program_name)
56{
57 cerr << "usage: " << program_name << " [-append] database-name [-debug]" << endl << endl;
58 cerr << "options:" << endl;
59 cerr << " -append append to existing database" << endl;
60 cerr << " -debug add timing information to database" << endl << endl;
61}
62/** printUsage() **/
63
64void
65debugLog(TDB_CONTEXT * tdb, char * msg_content)
66{
67 // Since this log will be used to track order of events, we need an indicator
68 // of time
69 time_t seconds = time(NULL);
70 // We also need some idea of what thread this is - let's try and use the PID
71 pid_t process_id = getpid();
72 // Append the message to the entry in the db (fixed key "debuglog")
73 TDB_DATA key_datum;
74 key_datum.dptr = (unsigned char *)"debuglog";
75 key_datum.dsize = 8;
76 text_t message = "[" + text_t(seconds) + "][" + text_t(process_id) + "] " + msg_content + "\n";
77 TDB_DATA msg_datum;
78 msg_datum.dptr = (unsigned char *) message.getcstr();
79 msg_datum.dsize = message.size();
80 if (tdb_append(tdb, key_datum, msg_datum) != 0)
81 {
82 cerr << "txt2tdb::debugLog() - tdb_append returned an error" << endl;
83 exit (0);
84 }
85}
86/** debugLog() **/
87
88/**
89 */
90int
91main (int argc, char *argv[])
92{
93 // sanity check
94 if (2 > argc || argc > 4)
95 {
96 printUsage (argv[0]);
97 exit (0);
98 }
99
100 char *dbname;
101 int append = 0;
102 int delkey = 0;
103 int debug = 0;
104 if (argc == 3)
105 {
106 if (strcmp (argv[1], "-append") == 0)
107 {
108 append = 1;
109 dbname = argv[2];
110 }
111 else if (strcmp(argv[2], "-debug") == 0)
112 {
113 dbname = argv[1];
114 debug = 1;
115 }
116 else
117 {
118 cerr << argv[1] << " is not a valid option." << endl << endl;
119 printUsage(argv[0]);
120 exit (0);
121 }
122 }
123 else if (argc == 4)
124 {
125 if (strcmp (argv[1], "-append") == 0 && strcmp (argv[3], "-debug") == 0)
126 {
127 append = 1;
128 dbname = argv[2];
129 debug = 1;
130 }
131 else
132 {
133 cerr << argv[1] << " is not a valid option." << endl << endl;
134 printUsage(argv[0]);
135 exit (0);
136 }
137 }
138 else
139 {
140 dbname = argv[1];
141 }
142
143 // open the database
144 int hash_size = 0;
145 int tdb_flags = TDB_DEFAULT; // Default = 0
146 if (append == 0)
147 {
148 tdb_flags = TDB_CLEAR_IF_FIRST;
149 }
150 // Disable file IO for testing purposes
151 /*tdb_flags = tdb_flags | TDB_INTERNAL;*/
152
153 int tdb_store_flags = TDB_DEFAULT; // used later when storing
154 int open_flags = O_RDWR | O_CREAT;
155 TDB_CONTEXT *tdb = tdb_open(dbname, hash_size, tdb_flags, open_flags, 0664);
156 if (!tdb)
157 {
158 cerr << "txt2tdb::main() - couldn't create " << dbname << endl;
159 exit (0);
160 }
161
162 // If we are debugging, we'll write that we just opened the connection
163 if (debug)
164 {
165 debugLog(tdb, "opened connection to database for read/write");
166 }
167
168 char c;
169 cin.get(c);
170 while (!cin.eof())
171 {
172 int num_dashes = 0;
173 text_t key = "";
174 text_t value = "";
175
176 // Parse out 'key' from [key]\n
177 // - scan for first occurrence of [
178 while (!cin.eof() && c != '[')
179 {
180 cin.get(c);
181 }
182 // - skip [
183 if (!cin.eof())
184 {
185 cin.get(c);
186 }
187 // - now look for closing ], building up 'key' as we go
188 while (!cin.eof() && c != ']')
189 {
190 key.push_back ((unsigned char)c);
191 cin.get(c);
192 }
193 if (!cin.eof())
194 {
195 // most likely an eol char, but if '-', then signifies record
196 // is to be deleted, not added
197 cin.get(c);
198 if (c == '-')
199 {
200 delkey = 1;
201 }
202 else
203 {
204 delkey = 0;
205 }
206 }
207 while (!cin.eof() && (c == '\n' || c == '\r'))
208 {
209 cin.get(c);
210 }
211 // - read in the value, watching for 70 dashes (the end)
212 text_t tmp = "";
213 while (!cin.eof() && (num_dashes < 70))
214 {
215 if (c == '\n')
216 {
217 tmp.push_back ((unsigned char)c);
218 num_dashes = 0;
219 }
220 else if (c == '\r')
221 {
222 // Here we are able to process both Windows-specific text files
223 // (containing carriage-return, newline) and Linux text files
224 // (containing only newline characters) by ignoring the Windows'
225 // carriage-return altogether so that we produce a uniform database
226 // file from either system's type of text file.
227 // If we don't ignore the carriage return here, txt.gz files
228 // produced on Windows cause a GS library running on Linux to break.
229 num_dashes = 0;
230 }
231 else if (c == '-')
232 {
233 tmp.push_back ((unsigned char)c);
234 ++num_dashes;
235 }
236 else
237 {
238 value += tmp;
239 value.push_back ((unsigned char)c);
240 tmp = "";
241 num_dashes = 0;
242 }
243 cin.get(c);
244 }
245
246 // We should now have a key/value pair. If the key is not an empty string
247 // store this key-value pair
248 if (!key.empty())
249 {
250 // convert key to a datum datatype
251 TDB_DATA key_data;
252 // [why are cstrings from text_t not unsigned? from what I can see we
253 // explicitly cast to char * when we return]
254 key_data.dptr = (unsigned char*)key.getcstr();
255 if (key_data.dptr == NULL)
256 {
257 cerr << "NULL key_data.dptr" << endl;
258 exit (0);
259 }
260 key_data.dsize = key.size();
261 // - delete the given key if we've been asked to
262 if (delkey)
263 {
264 if (tdb_delete(tdb, key_data) < 0)
265 {
266 cerr << "tdb_delete returned an error" << endl;
267 }
268 }
269 // - otherwise add
270 else
271 {
272 // - convert value to a datum datatype
273 TDB_DATA value_data;
274 value_data.dptr = (unsigned char*)value.getcstr();
275 if (value_data.dptr == NULL)
276 {
277 cerr << "NULL value_data.dptr" << endl;
278 exit (0);
279 }
280 value_data.dsize = value.size();
281 // - store the value
282 if (tdb_store(tdb, key_data, value_data, tdb_store_flags) < 0)
283 {
284 cerr << "tdb_store returned an error" << endl;
285 exit (0);
286 }
287 }
288 }
289 }
290
291 // If we are debugging, we'll write that we are about to close the connection
292 if (debug)
293 {
294 debugLog(tdb, "closing connection to database");
295 }
296
297 // Close the database connection
298 if (tdb_close(tdb) < 0)
299 {
300 cerr << "tdb_close returned an error" << endl;
301 exit (0);
302 }
303
304 return 0;
305}
Note: See TracBrowser for help on using the repository browser.