source: gsdl/trunk/common-src/src/lib/gdbmclass.cpp@ 19061

Last change on this file since 19061 was 19061, checked in by kjdon, 15 years ago

changed a couple of comments and a bit of indentation

File size: 9.8 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31#include <cstring>
32
33
34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
49
50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 openfile = filename;
59
60 // Map the DB mode values into GDBM mode values
61 int gdbm_mode = GDBM_READER;
62 if (mode == DB_WRITER)
63 {
64 gdbm_mode = GDBM_WRITER;
65 }
66 else if (mode == DB_WRITER_CREATE)
67 {
68 gdbm_mode = GDBM_WRCREAT;
69 }
70
71 text_t gdbm_filename = filename;
72 if (gdbm_mode == GDBM_READER)
73 {
74 // make sure we have the right file extension. Should be all gdb now.
75 // But need to handle old style ldb/bdb collection dbs
76
77 // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
78 if (!file_exists(gdbm_filename))
79 {
80 // should be all gdb now
81 if (ends_with(gdbm_filename, ".gdb")) {
82 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
83
84 if (!file_exists(gdbm_filename)) {
85 // try bdb as well
86 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
87 }
88 }
89 }
90
91 // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
92 if (!file_exists(gdbm_filename))
93 {
94 // We're desperate, so try generating the desired GDBM file from a txtgz file
95 gdbm_filename = filename;
96 generate_from_txtgz(gdbm_filename);
97 }
98 }
99
100 char *namebuffer = gdbm_filename.getcstr();
101 do {
102#ifdef __WIN32__
103 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
104#else
105 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
106#endif
107 --num_retrys;
108 } while (num_retrys>0 && gdbmfile==NULL &&
109 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
110 delete []namebuffer;
111
112 if (gdbmfile == NULL && logout != NULL) {
113 outconvertclass text_t2ascii;
114 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
115 }
116
117 return (gdbmfile != NULL);
118}
119
120
121void gdbmclass::closedatabase ()
122{
123 if (gdbmfile == NULL) return;
124
125 gdbm_close (gdbmfile);
126 gdbmfile = NULL;
127 openfile.clear();
128}
129
130
131void gdbmclass::deletekey (const text_t &key)
132{
133 if (gdbmfile == NULL) return;
134
135 // get a utf-8 encoded c string of the unicode key
136 datum key_data;
137 key_data.dptr = (to_utf8(key)).getcstr();
138 if (key_data.dptr == NULL) return;
139 key_data.dsize = strlen (key_data.dptr);
140
141 // delete the key
142 gdbm_delete (gdbmfile, key_data);
143
144 // free up the key memory
145 delete []key_data.dptr;
146}
147
148
149// returns file extension string
150text_t gdbmclass::getfileextension ()
151{
152 // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
153 return ".gdb";
154 //if (littleEndian()) return ".ldb";
155 //return ".bdb";
156}
157
158
159// returns true on success
160bool gdbmclass::getkeydata (const text_t& key, text_t &data)
161{
162 datum key_data;
163 datum return_data;
164
165 if (gdbmfile == NULL) return false;
166
167 // get a utf-8 encoded c string of the unicode key
168 key_data.dptr = (to_utf8(key)).getcstr();
169 if (key_data.dptr == NULL) {
170 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
171 return false;
172 }
173 key_data.dsize = strlen (key_data.dptr);
174
175 // fetch the result
176 return_data = gdbm_fetch (gdbmfile, key_data);
177 delete []key_data.dptr;
178
179 if (return_data.dptr == NULL) return false;
180
181 data.setcarr (return_data.dptr, return_data.dsize);
182 free (return_data.dptr);
183 data = to_uni(data); // convert to unicode
184
185 return true;
186}
187
188
189// returns array of keys
190text_tarray gdbmclass::getkeys ()
191{
192 text_tarray keys;
193
194 text_t key = getfirstkey();
195 while (!key.empty())
196 {
197 keys.push_back(key);
198 key = getnextkey(key);
199 }
200
201 return keys;
202}
203
204
205// returns true on success
206bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
207{
208 if (gdbmfile == NULL) return false;
209
210 // store the value
211 datum key_data;
212 datum data_data;
213
214 // get a utf-8 encoded c string of the unicode key
215 key_data.dptr = (to_utf8(key)).getcstr();
216 if (key_data.dptr == NULL) {
217 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
218 return false;
219 }
220 key_data.dsize = strlen (key_data.dptr);
221
222 data_data.dptr = (to_utf8(data)).getcstr();
223 if (data_data.dptr == NULL) {
224 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
225 delete []key_data.dptr;
226 return false;
227 }
228 data_data.dsize = strlen (data_data.dptr);
229
230 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
231 delete []key_data.dptr;
232 delete []data_data.dptr;
233
234 return (ret == 0);
235}
236
237
238// ----------------------------------------------------------------------------------------
239// GDBM-ONLY FUNCTIONS
240// ----------------------------------------------------------------------------------------
241
242// getfirstkey and getnextkey are used for traversing the database
243// no insertions or deletions should be carried out while traversing
244// the database. when there are no keys left to visit in the database
245// an empty string is returned.
246text_t gdbmclass::getfirstkey ()
247{
248 if (gdbmfile == NULL) return g_EmptyText;
249
250 // get the first key
251 datum firstkey_data = gdbm_firstkey (gdbmfile);
252 if (firstkey_data.dptr == NULL) return g_EmptyText;
253
254 // convert it to text_t
255 text_t firstkey;
256 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
257 free (firstkey_data.dptr);
258 return to_uni(firstkey); // convert to unicode
259}
260
261
262text_t gdbmclass::getnextkey (const text_t &key)
263{
264 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
265
266 // get a utf-8 encoded c string of the unicode key
267 datum key_data;
268 key_data.dptr = (to_utf8(key)).getcstr();
269 if (key_data.dptr == NULL) return g_EmptyText;
270 key_data.dsize = strlen (key_data.dptr);
271
272 // get the next key
273 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
274 if (nextkey_data.dptr == NULL) {
275 delete []key_data.dptr;
276 return g_EmptyText;
277 }
278
279 // convert it to text_t
280 text_t nextkey;
281 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
282 free (nextkey_data.dptr);
283 delete []key_data.dptr;
284 return to_uni(nextkey); // convert to unicode
285}
286
287
288// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
289void gdbmclass::generate_from_txtgz (text_t filename)
290{
291 // Looking to read in the database
292 // => check to see if .ldb/.bdb file already there
293 // if not (first time) then generate using txt2db
294
295 // need to generate architecture native GDBM file using txt2db
296
297 // replace sought after gdbm filename ext with ".txt.gz"
298
299 text_t::const_iterator begin = filename.begin();
300 text_t::const_iterator end= filename.end();
301
302 if (begin != end) {
303 end = end - 1;
304 }
305
306 text_t::const_iterator here = end;
307
308 bool found_ext = false;
309
310 while (here != begin) {
311 if (*here == '.') {
312 found_ext = true;
313 break;
314 }
315 here--;
316 }
317
318 text_t filename_root;
319
320 if (found_ext) {
321 filename_root = substr(begin,here);
322 }
323 else {
324 filename_root = filename;
325 }
326
327 text_t txtgz_filename = filename_root + ".txt.gz";
328 if (file_exists(txtgz_filename))
329 {
330 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
331 //cmd += " | txt2db \"" + filename + "\"";
332
333 // Test to make sure Perl is on the path
334 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
335 text_t cmd_test = "perl -v 1>&2";
336 int rv_test = gsdl_system(cmd_test, true, cerr);
337 if (rv_test != 0) {
338 cerr << "Tried to find Perl. Return exit value of running "
339 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
340 cerr << "Check that Perl is set in your environment variable PATH." << endl;
341 cerr << "At present, PATH=" << getenv("PATH") << endl;
342 }
343
344 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
345 int rv = gsdl_system(cmd, true, cerr);
346 // For some reason, launching this command with gsdl_system() still returns 1
347 // even when it returns 0 when run from the command-line. We can check whether
348 // we succeeded by looking at whether the output database file was created.
349 if (rv != 0) {
350 cerr << "Warning, non-zero return value on running command \""
351 << cmd << "\": " << rv << endl;
352 if (!file_exists(filename)) {
353 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
354 }
355 }
356 }
357}
Note: See TracBrowser for help on using the repository browser.