source: gs2-extensions/tdb-edit/trunk/src/src/txt2tdb-src/txt2tdb.cpp@ 23996

Last change on this file since 23996 was 23996, checked in by jmt12, 13 years ago

Altering some of the header includes to be more bulletproof and adding the executable to the list of ignored files

File size: 6.0 KB
Line 
1/**********************************************************************
2 *
3 * txt2tdb.cpp -- A utility to convert a stream of text, ala buildproc
4 * encoded output, into a TDB file.
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * Copyright (C) 2011 The New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 **********************************************************************/
27
28#if defined(GSDL_USE_OBJECTSPACE)
29#include <ospace\std\iostream>
30#elif defined(GSDL_USE_IOS_H)
31#include <iostream.h>
32#else
33#include <iostream>
34#endif
35
36#include "tdb.h"
37#include "text_t.h"
38
39// use the standard namespace
40#if !defined (GSDL_NAMESPACE_BROKEN)
41#if defined(GSDL_USE_OBJECTSPACE)
42using namespace ospace::std;
43#else
44using namespace std;
45#endif
46#endif
47
48void
49printUsage (char *program_name)
50{
51 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
52 cerr << "options:" << endl;
53 cerr << " -append append to existing database" << endl << endl;
54}
55/** printUsage() **/
56
57int
58main (int argc, char *argv[])
59{
60 // sanity check
61 if (argc != 2 && argc != 3)
62 {
63 printUsage (argv[0]);
64 exit (0);
65 }
66
67 char *dbname;
68 int append = 0;
69 int delkey = 0;
70 if (argc == 3)
71 {
72 if (strcmp (argv[1], "-append") == 0)
73 {
74 append = 1;
75 dbname = argv[2];
76 }
77 else
78 {
79 cerr << argv[1] << " is not a valid option." << endl << endl;
80 printUsage(argv[0]);
81 exit (0);
82 }
83 }
84 else
85 {
86 dbname = argv[1];
87 }
88
89 // open the database
90 int hash_size = 0;
91 int tdb_flags = TDB_DEFAULT; // Default = 0
92 if (append == 0)
93 {
94 tdb_flags = TDB_CLEAR_IF_FIRST;
95 }
96 int tdb_store_flags = TDB_DEFAULT; // used later when storing
97 int open_flags = O_RDWR | O_CREAT;
98 TDB_CONTEXT *tdb = tdb_open(dbname, hash_size, tdb_flags, open_flags, 0664);
99 if (!tdb)
100 {
101 cerr << "couldn't create " << dbname << endl;
102 exit (0);
103 }
104
105 char c;
106 cin.get(c);
107 while (!cin.eof())
108 {
109 int num_dashes = 0;
110 text_t key = "";
111 text_t value = "";
112
113 // Parse out 'key' from [key]\n
114 // - scan for first occurrence of [
115 while (!cin.eof() && c != '[')
116 {
117 cin.get(c);
118 }
119 // - skip [
120 if (!cin.eof())
121 {
122 cin.get(c);
123 }
124 // - now look for closing ], building up 'key' as we go
125 while (!cin.eof() && c != ']')
126 {
127 key.push_back ((unsigned char)c);
128 cin.get(c);
129 }
130 if (!cin.eof())
131 {
132 // most likely an eol char, but if '-', then signifies record
133 // is to be deleted, not added
134 cin.get(c);
135 if (c == '-') {
136 delkey = 1;
137 }
138 else {
139 delkey = 0;
140 }
141 }
142 while (!cin.eof() && (c == '\n' || c == '\r'))
143 {
144 cin.get(c);
145 }
146 // - read in the value, watching for 70 dashes (the end)
147 text_t tmp = "";
148 while (!cin.eof() && (num_dashes < 70))
149 {
150 if (c == '\n')
151 {
152 tmp.push_back ((unsigned char)c);
153 num_dashes = 0;
154 }
155 else if (c == '\r')
156 {
157 // Here we are able to process both Windows-specific text files
158 // (containing carriage-return, newline) and Linux text files
159 // (containing only newline characters) by ignoring the Windows'
160 // carriage-return altogether so that we produce a uniform database
161 // file from either system's type of text file.
162 // If we don't ignore the carriage return here, txt.gz files
163 // produced on Windows cause a GS library running on Linux to break.
164 num_dashes = 0;
165 }
166 else if (c == '-')
167 {
168 tmp.push_back ((unsigned char)c);
169 ++num_dashes;
170 }
171 else
172 {
173 value += tmp;
174 value.push_back ((unsigned char)c);
175 tmp = "";
176 num_dashes = 0;
177 }
178 cin.get(c);
179 }
180
181 // We should now have a key/value pair. If the key is not an empty string
182 // store this key-value pair
183 if (!key.empty())
184 {
185 // convert key to a datum datatype
186 TDB_DATA key_data;
187 // [why are cstrings from text_t not unsigned? from what I can see we
188 // explicitly cast to char * when we return]
189 key_data.dptr = (unsigned char*)key.getcstr();
190 if (key_data.dptr == NULL)
191 {
192 cerr << "NULL key_data.dptr" << endl;
193 exit (0);
194 }
195 key_data.dsize = key.size();
196 // - delete the given key if we've been asked to
197 if (delkey)
198 {
199 if (tdb_delete(tdb, key_data) < 0)
200 {
201 cerr << "tdb_delete returned an error" << endl;
202 }
203 }
204 // - otherwise add
205 else {
206 // - convert value to a datum datatype
207 TDB_DATA value_data;
208 value_data.dptr = (unsigned char*)value.getcstr();
209 if (value_data.dptr == NULL)
210 {
211 cerr << "NULL value_data.dptr" << endl;
212 exit (0);
213 }
214 value_data.dsize = value.size();
215 // - store the value
216 if (tdb_store(tdb, key_data, value_data, tdb_store_flags) < 0)
217 {
218 cerr << "tdb_store returned an error" << endl;
219 exit (0);
220 }
221 free(value_data.dptr);
222 }
223 free(key_data.dptr);
224 }
225 }
226
227 // Close the database connection
228 if (tdb_close(tdb) < 0)
229 {
230 cerr << "tdb_close returned an error" << endl;
231 exit (0);
232 }
233
234 return 0;
235}
Note: See TracBrowser for help on using the repository browser.