source: gs2-extensions/tdb-edit/trunk/src/src/txt2tdb-src/txt2tdb.cpp@ 24045

Last change on this file since 24045 was 24045, checked in by jmt12, 13 years ago

I'd misunderstood where I needed to free the memory allocated to storing char*s in the TDB_DATA. Apparently I only have to free them when they are returned by tdb_fetch() or similar. Removed segfault causing calls to free()

File size: 5.9 KB
Line 
1/**********************************************************************
2 *
3 * txt2tdb.cpp -- A utility to convert a stream of text, ala buildproc
4 * encoded output, into a TDB file.
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * Copyright (C) 2011 The New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 **********************************************************************/
27
28#if defined(GSDL_USE_OBJECTSPACE)
29#include <ospace\std\iostream>
30#elif defined(GSDL_USE_IOS_H)
31#include <iostream.h>
32#else
33#include <iostream>
34#endif
35
36#include "tdb.h"
37#include "text_t.h"
38
39// use the standard namespace
40#if !defined (GSDL_NAMESPACE_BROKEN)
41#if defined(GSDL_USE_OBJECTSPACE)
42using namespace ospace::std;
43#else
44using namespace std;
45#endif
46#endif
47
48void
49printUsage (char *program_name)
50{
51 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
52 cerr << "options:" << endl;
53 cerr << " -append append to existing database" << endl << endl;
54}
55/** printUsage() **/
56
57int
58main (int argc, char *argv[])
59{
60 // sanity check
61 if (argc != 2 && argc != 3)
62 {
63 printUsage (argv[0]);
64 exit (0);
65 }
66
67 char *dbname;
68 int append = 0;
69 int delkey = 0;
70 if (argc == 3)
71 {
72 if (strcmp (argv[1], "-append") == 0)
73 {
74 append = 1;
75 dbname = argv[2];
76 }
77 else
78 {
79 cerr << argv[1] << " is not a valid option." << endl << endl;
80 printUsage(argv[0]);
81 exit (0);
82 }
83 }
84 else
85 {
86 dbname = argv[1];
87 }
88
89 // open the database
90 int hash_size = 0;
91 int tdb_flags = TDB_DEFAULT; // Default = 0
92 if (append == 0)
93 {
94 tdb_flags = TDB_CLEAR_IF_FIRST;
95 }
96 int tdb_store_flags = TDB_DEFAULT; // used later when storing
97 int open_flags = O_RDWR | O_CREAT;
98 TDB_CONTEXT *tdb = tdb_open(dbname, hash_size, tdb_flags, open_flags, 0664);
99 if (!tdb)
100 {
101 cerr << "couldn't create " << dbname << endl;
102 exit (0);
103 }
104
105 char c;
106 cin.get(c);
107 while (!cin.eof())
108 {
109 int num_dashes = 0;
110 text_t key = "";
111 text_t value = "";
112
113 // Parse out 'key' from [key]\n
114 // - scan for first occurrence of [
115 while (!cin.eof() && c != '[')
116 {
117 cin.get(c);
118 }
119 // - skip [
120 if (!cin.eof())
121 {
122 cin.get(c);
123 }
124 // - now look for closing ], building up 'key' as we go
125 while (!cin.eof() && c != ']')
126 {
127 key.push_back ((unsigned char)c);
128 cin.get(c);
129 }
130 if (!cin.eof())
131 {
132 // most likely an eol char, but if '-', then signifies record
133 // is to be deleted, not added
134 cin.get(c);
135 if (c == '-')
136 {
137 delkey = 1;
138 }
139 else
140 {
141 delkey = 0;
142 }
143 }
144 while (!cin.eof() && (c == '\n' || c == '\r'))
145 {
146 cin.get(c);
147 }
148 // - read in the value, watching for 70 dashes (the end)
149 text_t tmp = "";
150 while (!cin.eof() && (num_dashes < 70))
151 {
152 if (c == '\n')
153 {
154 tmp.push_back ((unsigned char)c);
155 num_dashes = 0;
156 }
157 else if (c == '\r')
158 {
159 // Here we are able to process both Windows-specific text files
160 // (containing carriage-return, newline) and Linux text files
161 // (containing only newline characters) by ignoring the Windows'
162 // carriage-return altogether so that we produce a uniform database
163 // file from either system's type of text file.
164 // If we don't ignore the carriage return here, txt.gz files
165 // produced on Windows cause a GS library running on Linux to break.
166 num_dashes = 0;
167 }
168 else if (c == '-')
169 {
170 tmp.push_back ((unsigned char)c);
171 ++num_dashes;
172 }
173 else
174 {
175 value += tmp;
176 value.push_back ((unsigned char)c);
177 tmp = "";
178 num_dashes = 0;
179 }
180 cin.get(c);
181 }
182
183 // We should now have a key/value pair. If the key is not an empty string
184 // store this key-value pair
185 if (!key.empty())
186 {
187 // convert key to a datum datatype
188 TDB_DATA key_data;
189 // [why are cstrings from text_t not unsigned? from what I can see we
190 // explicitly cast to char * when we return]
191 key_data.dptr = (unsigned char*)key.getcstr();
192 if (key_data.dptr == NULL)
193 {
194 cerr << "NULL key_data.dptr" << endl;
195 exit (0);
196 }
197 key_data.dsize = key.size();
198 // - delete the given key if we've been asked to
199 if (delkey)
200 {
201 if (tdb_delete(tdb, key_data) < 0)
202 {
203 cerr << "tdb_delete returned an error" << endl;
204 }
205 }
206 // - otherwise add
207 else
208 {
209 // - convert value to a datum datatype
210 TDB_DATA value_data;
211 value_data.dptr = (unsigned char*)value.getcstr();
212 if (value_data.dptr == NULL)
213 {
214 cerr << "NULL value_data.dptr" << endl;
215 exit (0);
216 }
217 value_data.dsize = value.size();
218 // - store the value
219 if (tdb_store(tdb, key_data, value_data, tdb_store_flags) < 0)
220 {
221 cerr << "tdb_store returned an error" << endl;
222 exit (0);
223 }
224 }
225 }
226 }
227
228 // Close the database connection
229 if (tdb_close(tdb) < 0)
230 {
231 cerr << "tdb_close returned an error" << endl;
232 exit (0);
233 }
234
235 return 0;
236}
Note: See TracBrowser for help on using the repository browser.