source: main/trunk/greenstone2/common-src/src/lib/gdbmclass.cpp@ 21796

Last change on this file since 21796 was 21796, checked in by mdewsnip, 14 years ago

Fixed setting of openfile value, so the database isn't reopened unnecessarily.

File size: 10.6 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31#include <cstring>
32
33
34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
49
50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 // Map the DB mode values into GDBM mode values
59 int gdbm_mode = GDBM_READER;
60 if (mode == DB_WRITER)
61 {
62 gdbm_mode = GDBM_WRITER;
63 }
64 else if (mode == DB_WRITER_CREATE)
65 {
66 gdbm_mode = GDBM_WRCREAT;
67 }
68
69 text_t gdbm_filename = filename;
70 if (gdbm_mode == GDBM_READER)
71 {
72 // make sure we have the right file extension. Should be all gdb now.
73 // But need to handle old style ldb/bdb collection dbs
74
75 // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
76 if (!file_exists(gdbm_filename))
77 {
78 // should be all gdb now
79 if (ends_with(gdbm_filename, ".gdb")) {
80 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
81
82 if (!file_exists(gdbm_filename)) {
83 // try bdb as well
84 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
85 }
86 }
87 }
88
89 // The following fall-back is less likely to be needed now that GDBM
90 // library reads both little-endian and big-endian files
91 if (!file_exists(gdbm_filename))
92 {
93 // We're desperate, so try generating the desired GDBM file from a txtgz file
94 gdbm_filename = filename;
95 generate_from_txtgz(gdbm_filename);
96 }
97 }
98
99 char *namebuffer = gdbm_filename.getcstr();
100 do {
101#ifdef __WIN32__
102 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
103#else
104 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
105#endif
106 --num_retrys;
107 } while (num_retrys>0 && gdbmfile==NULL &&
108 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
109 delete []namebuffer;
110
111 if (gdbmfile == NULL && logout != NULL) {
112 outconvertclass text_t2ascii;
113 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
114 }
115
116 openfile = filename;
117
118 return (gdbmfile != NULL);
119}
120
121
122void gdbmclass::closedatabase ()
123{
124 if (gdbmfile == NULL) return;
125
126 gdbm_close (gdbmfile);
127 gdbmfile = NULL;
128 openfile.clear();
129}
130
131
132void gdbmclass::deletekey (const text_t &key)
133{
134 if (gdbmfile == NULL) return;
135
136 // get a utf-8 encoded c string of the unicode key
137 datum key_data;
138 key_data.dptr = (to_utf8(key)).getcstr();
139 if (key_data.dptr == NULL) return;
140 key_data.dsize = strlen (key_data.dptr);
141
142 // delete the key
143 gdbm_delete (gdbmfile, key_data);
144
145 // free up the key memory
146 delete []key_data.dptr;
147}
148
149
150// returns file extension string
151text_t gdbmclass::getfileextension ()
152{
153 // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
154 return ".gdb";
155 //if (littleEndian()) return ".ldb";
156 //return ".bdb";
157}
158
159
160// returns true on success
161bool gdbmclass::getkeydata (const text_t& key, text_t &data)
162{
163 datum key_data;
164 datum return_data;
165
166 if (gdbmfile == NULL) return false;
167
168 // get a utf-8 encoded c string of the unicode key
169 key_data.dptr = (to_utf8(key)).getcstr();
170 if (key_data.dptr == NULL) {
171 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
172 return false;
173 }
174 key_data.dsize = strlen (key_data.dptr);
175
176 // fetch the result
177 return_data = gdbm_fetch (gdbmfile, key_data);
178 delete []key_data.dptr;
179
180 if (return_data.dptr == NULL) return false;
181
182 data.setcarr (return_data.dptr, return_data.dsize);
183 free (return_data.dptr);
184 data = to_uni(data); // convert to unicode
185
186 return true;
187}
188
189
190// returns array of keys
191text_tarray gdbmclass::getkeys ()
192{
193 text_tarray keys;
194
195 text_t key = getfirstkey();
196 while (!key.empty())
197 {
198 keys.push_back(key);
199 key = getnextkey(key);
200 }
201
202 return keys;
203}
204
205
206// returns true on success
207bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
208{
209 if (gdbmfile == NULL) return false;
210
211 // store the value
212 datum key_data;
213 datum data_data;
214
215 // get a utf-8 encoded c string of the unicode key
216 key_data.dptr = (to_utf8(key)).getcstr();
217 if (key_data.dptr == NULL) {
218 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
219 return false;
220 }
221 key_data.dsize = strlen (key_data.dptr);
222
223 data_data.dptr = (to_utf8(data)).getcstr();
224 if (data_data.dptr == NULL) {
225 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
226 delete []key_data.dptr;
227 return false;
228 }
229 data_data.dsize = strlen (data_data.dptr);
230
231 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
232 delete []key_data.dptr;
233 delete []data_data.dptr;
234
235 return (ret == 0);
236}
237
238
239// ----------------------------------------------------------------------------------------
240// GDBM-ONLY FUNCTIONS
241// ----------------------------------------------------------------------------------------
242
243// getfirstkey and getnextkey are used for traversing the database
244// no insertions or deletions should be carried out while traversing
245// the database. when there are no keys left to visit in the database
246// an empty string is returned.
247text_t gdbmclass::getfirstkey ()
248{
249 if (gdbmfile == NULL) return g_EmptyText;
250
251 // get the first key
252 datum firstkey_data = gdbm_firstkey (gdbmfile);
253 if (firstkey_data.dptr == NULL) return g_EmptyText;
254
255 // convert it to text_t
256 text_t firstkey;
257 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
258 free (firstkey_data.dptr);
259 return to_uni(firstkey); // convert to unicode
260}
261
262
263text_t gdbmclass::getnextkey (const text_t &key)
264{
265 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
266
267 // get a utf-8 encoded c string of the unicode key
268 datum key_data;
269 key_data.dptr = (to_utf8(key)).getcstr();
270 if (key_data.dptr == NULL) return g_EmptyText;
271 key_data.dsize = strlen (key_data.dptr);
272
273 // get the next key
274 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
275 if (nextkey_data.dptr == NULL) {
276 delete []key_data.dptr;
277 return g_EmptyText;
278 }
279
280 // convert it to text_t
281 text_t nextkey;
282 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
283 free (nextkey_data.dptr);
284 delete []key_data.dptr;
285 return to_uni(nextkey); // convert to unicode
286}
287
288
289// The following routine was written before the GDBM library was upgraded
290// to read both little-endian and big-endian files, and so the reason for
291// it being developed is not so important now. It's useful to keep around
292// however because the file format for GDBM is not guaranteed to be the
293// same from one computer architecture to the next. Even if they are both
294// the same endian-ness. We encountered this issue on the Mac at one stage.
295// the size of the header data-structure for GDBM was simply a different
296// size to that produced on Linux. The Mac case resolved to be binary
297// compatabile with Linux, but shows that we can't rely on this always being
298// the case. Using this method provides a contingency plan. The databases
299// can be shipped at .txt.gz (i.e. portable), which are then converted on
300// on the host's machine to a native GDBM database that is meaningly to
301// that computer.
302
303void gdbmclass::generate_from_txtgz (text_t filename)
304{
305 // Looking to read in the database
306 // => check to see if .ldb/.bdb file already there
307 // if not (first time) then generate using txt2db
308
309 // need to generate architecture native GDBM file using txt2db
310
311 // replace sought after gdbm filename ext with ".txt.gz"
312
313 text_t::const_iterator begin = filename.begin();
314 text_t::const_iterator end= filename.end();
315
316 if (begin != end) {
317 end = end - 1;
318 }
319
320 text_t::const_iterator here = end;
321
322 bool found_ext = false;
323
324 while (here != begin) {
325 if (*here == '.') {
326 found_ext = true;
327 break;
328 }
329 here--;
330 }
331
332 text_t filename_root;
333
334 if (found_ext) {
335 filename_root = substr(begin,here);
336 }
337 else {
338 filename_root = filename;
339 }
340
341 text_t txtgz_filename = filename_root + ".txt.gz";
342 if (file_exists(txtgz_filename))
343 {
344 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
345 //cmd += " | txt2db \"" + filename + "\"";
346
347 // Test to make sure Perl is on the path
348 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
349 text_t cmd_test = "perl -v 1>&2";
350 int rv_test = gsdl_system(cmd_test, true, cerr);
351 if (rv_test != 0) {
352 cerr << "Tried to find Perl. Return exit value of running "
353 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
354 cerr << "Check that Perl is set in your environment variable PATH." << endl;
355 cerr << "At present, PATH=" << getenv("PATH") << endl;
356 }
357
358 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
359 int rv = gsdl_system(cmd, true, cerr);
360 // For some reason, launching this command with gsdl_system() still returns 1
361 // even when it returns 0 when run from the command-line. We can check whether
362 // we succeeded by looking at whether the output database file was created.
363 if (rv != 0) {
364 cerr << "Warning, non-zero return value on running command \""
365 << cmd << "\": " << rv << endl;
366 if (!file_exists(filename)) {
367 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
368 }
369 }
370 }
371}
Note: See TracBrowser for help on using the repository browser.