source: main/trunk/greenstone2/common-src/src/lib/gdbmclass.cpp@ 26677

Last change on this file since 26677 was 26677, checked in by davidb, 11 years ago

Mixed up over precise name to use for Microsoft Visualstudio Compile. Correct value is _MSC_VER. Files now fixed.

File size: 10.7 KB
RevLine 
[15429]1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
[15679]27#include "gsdltools.h"
[15429]28#include "gsdlunicode.h"
[16895]29#include "fileutil.h"
[17701]30#include "stdlib.h"
[18880]31#include <cstring>
[15429]32
33
[22067]34gdbmclass::gdbmclass(const text_t& gsdlhome)
35 : dbclass(gsdlhome)
36{
37 gdbmfile = NULL;
38}
39
[15429]40gdbmclass::~gdbmclass()
41{
42 closedatabase();
43}
44
[15644]45
[15429]46// returns true if opened
47bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
[26677]48#ifdef _MSC_VER
[15429]49 bool need_filelock
50#else
51 bool
52#endif
53 )
54{
[16895]55
[15429]56 text_t data_location;
57 int block_size = 512;
58
59 if (gdbmfile != NULL) {
60 if (openfile == filename) return true;
61 else closedatabase ();
62 }
63
[15557]64 // Map the DB mode values into GDBM mode values
65 int gdbm_mode = GDBM_READER;
66 if (mode == DB_WRITER)
67 {
68 gdbm_mode = GDBM_WRITER;
69 }
70 else if (mode == DB_WRITER_CREATE)
71 {
72 gdbm_mode = GDBM_WRCREAT;
73 }
74
[18051]75 text_t gdbm_filename = filename;
[18050]76 if (gdbm_mode == GDBM_READER)
77 {
[19061]78 // make sure we have the right file extension. Should be all gdb now.
79 // But need to handle old style ldb/bdb collection dbs
[18656]80
81 // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
[18051]82 if (!file_exists(gdbm_filename))
[18050]83 {
[19061]84 // should be all gdb now
[18656]85 if (ends_with(gdbm_filename, ".gdb")) {
86 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
[19061]87
[18656]88 if (!file_exists(gdbm_filename)) {
89 // try bdb as well
90 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
91 }
[18051]92 }
[16895]93 }
[18051]94
[21405]95 // The following fall-back is less likely to be needed now that GDBM
96 // library reads both little-endian and big-endian files
[18051]97 if (!file_exists(gdbm_filename))
98 {
99 // We're desperate, so try generating the desired GDBM file from a txtgz file
100 gdbm_filename = filename;
101 generate_from_txtgz(gdbm_filename);
102 }
[16895]103 }
104
[18051]105 char *namebuffer = gdbm_filename.getcstr();
[15429]106 do {
[26677]107#ifdef _MSC_VER
[15557]108 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
[15429]109#else
[15557]110 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
[15429]111#endif
112 --num_retrys;
113 } while (num_retrys>0 && gdbmfile==NULL &&
114 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
115 delete []namebuffer;
116
117 if (gdbmfile == NULL && logout != NULL) {
118 outconvertclass text_t2ascii;
[18051]119 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
[15429]120 }
121
[21796]122 openfile = filename;
123
[15429]124 return (gdbmfile != NULL);
125}
126
127
128void gdbmclass::closedatabase ()
129{
130 if (gdbmfile == NULL) return;
131
132 gdbm_close (gdbmfile);
133 gdbmfile = NULL;
134 openfile.clear();
135}
136
137
[15644]138void gdbmclass::deletekey (const text_t &key)
139{
140 if (gdbmfile == NULL) return;
141
142 // get a utf-8 encoded c string of the unicode key
143 datum key_data;
144 key_data.dptr = (to_utf8(key)).getcstr();
145 if (key_data.dptr == NULL) return;
146 key_data.dsize = strlen (key_data.dptr);
147
148 // delete the key
149 gdbm_delete (gdbmfile, key_data);
150
151 // free up the key memory
152 delete []key_data.dptr;
153}
154
155
[15679]156// returns file extension string
157text_t gdbmclass::getfileextension ()
158{
[18656]159 // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
160 return ".gdb";
161 //if (littleEndian()) return ".ldb";
162 //return ".bdb";
[15679]163}
164
165
[15429]166// returns true on success
[15644]167bool gdbmclass::getkeydata (const text_t& key, text_t &data)
168{
169 datum key_data;
170 datum return_data;
171
172 if (gdbmfile == NULL) return false;
173
174 // get a utf-8 encoded c string of the unicode key
175 key_data.dptr = (to_utf8(key)).getcstr();
176 if (key_data.dptr == NULL) {
[17719]177 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15644]178 return false;
179 }
180 key_data.dsize = strlen (key_data.dptr);
181
182 // fetch the result
183 return_data = gdbm_fetch (gdbmfile, key_data);
184 delete []key_data.dptr;
185
186 if (return_data.dptr == NULL) return false;
187
188 data.setcarr (return_data.dptr, return_data.dsize);
189 free (return_data.dptr);
190 data = to_uni(data); // convert to unicode
191
192 return true;
193}
194
195
196// returns array of keys
197text_tarray gdbmclass::getkeys ()
198{
199 text_tarray keys;
200
201 text_t key = getfirstkey();
202 while (!key.empty())
203 {
204 keys.push_back(key);
205 key = getnextkey(key);
206 }
207
208 return keys;
209}
210
211
212// returns true on success
[15649]213bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
[15429]214{
215 if (gdbmfile == NULL) return false;
216
217 // store the value
218 datum key_data;
219 datum data_data;
220
221 // get a utf-8 encoded c string of the unicode key
222 key_data.dptr = (to_utf8(key)).getcstr();
223 if (key_data.dptr == NULL) {
[17719]224 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15429]225 return false;
226 }
227 key_data.dsize = strlen (key_data.dptr);
228
229 data_data.dptr = (to_utf8(data)).getcstr();
230 if (data_data.dptr == NULL) {
[17719]231 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15429]232 delete []key_data.dptr;
233 return false;
234 }
235 data_data.dsize = strlen (data_data.dptr);
236
237 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
238 delete []key_data.dptr;
239 delete []data_data.dptr;
240
241 return (ret == 0);
242}
243
244
[15644]245// ----------------------------------------------------------------------------------------
246// GDBM-ONLY FUNCTIONS
247// ----------------------------------------------------------------------------------------
[15429]248
249// getfirstkey and getnextkey are used for traversing the database
250// no insertions or deletions should be carried out while traversing
251// the database. when there are no keys left to visit in the database
252// an empty string is returned.
253text_t gdbmclass::getfirstkey ()
254{
255 if (gdbmfile == NULL) return g_EmptyText;
256
257 // get the first key
258 datum firstkey_data = gdbm_firstkey (gdbmfile);
259 if (firstkey_data.dptr == NULL) return g_EmptyText;
260
261 // convert it to text_t
262 text_t firstkey;
263 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
264 free (firstkey_data.dptr);
265 return to_uni(firstkey); // convert to unicode
266}
267
268
269text_t gdbmclass::getnextkey (const text_t &key)
270{
271 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
272
273 // get a utf-8 encoded c string of the unicode key
274 datum key_data;
275 key_data.dptr = (to_utf8(key)).getcstr();
276 if (key_data.dptr == NULL) return g_EmptyText;
277 key_data.dsize = strlen (key_data.dptr);
278
279 // get the next key
280 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
281 if (nextkey_data.dptr == NULL) {
282 delete []key_data.dptr;
283 return g_EmptyText;
284 }
285
286 // convert it to text_t
287 text_t nextkey;
288 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
289 free (nextkey_data.dptr);
290 delete []key_data.dptr;
291 return to_uni(nextkey); // convert to unicode
292}
[18050]293
294
[21396]295// The following routine was written before the GDBM library was upgraded
296// to read both little-endian and big-endian files, and so the reason for
297// it being developed is not so important now. It's useful to keep around
298// however because the file format for GDBM is not guaranteed to be the
299// same from one computer architecture to the next. Even if they are both
300// the same endian-ness. We encountered this issue on the Mac at one stage.
301// the size of the header data-structure for GDBM was simply a different
302// size to that produced on Linux. The Mac case resolved to be binary
303// compatabile with Linux, but shows that we can't rely on this always being
304// the case. Using this method provides a contingency plan. The databases
305// can be shipped at .txt.gz (i.e. portable), which are then converted on
306// on the host's machine to a native GDBM database that is meaningly to
307// that computer.
308
[18050]309void gdbmclass::generate_from_txtgz (text_t filename)
310{
311 // Looking to read in the database
312 // => check to see if .ldb/.bdb file already there
313 // if not (first time) then generate using txt2db
314
315 // need to generate architecture native GDBM file using txt2db
316
317 // replace sought after gdbm filename ext with ".txt.gz"
318
319 text_t::const_iterator begin = filename.begin();
320 text_t::const_iterator end= filename.end();
[19061]321
322 if (begin != end) {
323 end = end - 1;
324 }
[18050]325
326 text_t::const_iterator here = end;
327
328 bool found_ext = false;
329
330 while (here != begin) {
331 if (*here == '.') {
332 found_ext = true;
333 break;
334 }
335 here--;
336 }
337
338 text_t filename_root;
339
340 if (found_ext) {
341 filename_root = substr(begin,here);
342 }
343 else {
344 filename_root = filename;
345 }
346
347 text_t txtgz_filename = filename_root + ".txt.gz";
348 if (file_exists(txtgz_filename))
349 {
350 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
351 //cmd += " | txt2db \"" + filename + "\"";
352
353 // Test to make sure Perl is on the path
354 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
355 text_t cmd_test = "perl -v 1>&2";
356 int rv_test = gsdl_system(cmd_test, true, cerr);
357 if (rv_test != 0) {
358 cerr << "Tried to find Perl. Return exit value of running "
359 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
360 cerr << "Check that Perl is set in your environment variable PATH." << endl;
361 cerr << "At present, PATH=" << getenv("PATH") << endl;
362 }
363
364 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
365 int rv = gsdl_system(cmd, true, cerr);
366 // For some reason, launching this command with gsdl_system() still returns 1
367 // even when it returns 0 when run from the command-line. We can check whether
368 // we succeeded by looking at whether the output database file was created.
369 if (rv != 0) {
370 cerr << "Warning, non-zero return value on running command \""
371 << cmd << "\": " << rv << endl;
372 if (!file_exists(filename)) {
373 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
374 }
375 }
376 }
377}
Note: See TracBrowser for help on using the repository browser.