source: main/trunk/greenstone2/common-src/src/lib/gdbmclass.cpp@ 26653

Last change on this file since 26653 was 26653, checked in by davidb, 11 years ago

Code changed from using WIN32 use to _MSC_VER (the difference being the former is set for any Windows based compiler, whilst the latter is specifically set by a MicroSoft Visual Studio compiler). Up to this point the difference was not important, however to allow for cross-compilation (using mingw under Linux to produce native Windows binaries) the difference is imporant, and needs to be used more carefully

File size: 10.7 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31#include <cstring>
32
33
34gdbmclass::gdbmclass(const text_t& gsdlhome)
35 : dbclass(gsdlhome)
36{
37 gdbmfile = NULL;
38}
39
40gdbmclass::~gdbmclass()
41{
42 closedatabase();
43}
44
45
46// returns true if opened
47bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
48#ifdef __MSC_VER__
49 bool need_filelock
50#else
51 bool
52#endif
53 )
54{
55
56 text_t data_location;
57 int block_size = 512;
58
59 if (gdbmfile != NULL) {
60 if (openfile == filename) return true;
61 else closedatabase ();
62 }
63
64 // Map the DB mode values into GDBM mode values
65 int gdbm_mode = GDBM_READER;
66 if (mode == DB_WRITER)
67 {
68 gdbm_mode = GDBM_WRITER;
69 }
70 else if (mode == DB_WRITER_CREATE)
71 {
72 gdbm_mode = GDBM_WRCREAT;
73 }
74
75 text_t gdbm_filename = filename;
76 if (gdbm_mode == GDBM_READER)
77 {
78 // make sure we have the right file extension. Should be all gdb now.
79 // But need to handle old style ldb/bdb collection dbs
80
81 // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
82 if (!file_exists(gdbm_filename))
83 {
84 // should be all gdb now
85 if (ends_with(gdbm_filename, ".gdb")) {
86 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
87
88 if (!file_exists(gdbm_filename)) {
89 // try bdb as well
90 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
91 }
92 }
93 }
94
95 // The following fall-back is less likely to be needed now that GDBM
96 // library reads both little-endian and big-endian files
97 if (!file_exists(gdbm_filename))
98 {
99 // We're desperate, so try generating the desired GDBM file from a txtgz file
100 gdbm_filename = filename;
101 generate_from_txtgz(gdbm_filename);
102 }
103 }
104
105 char *namebuffer = gdbm_filename.getcstr();
106 do {
107#ifdef __MSC_VER__
108 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
109#else
110 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
111#endif
112 --num_retrys;
113 } while (num_retrys>0 && gdbmfile==NULL &&
114 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
115 delete []namebuffer;
116
117 if (gdbmfile == NULL && logout != NULL) {
118 outconvertclass text_t2ascii;
119 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
120 }
121
122 openfile = filename;
123
124 return (gdbmfile != NULL);
125}
126
127
128void gdbmclass::closedatabase ()
129{
130 if (gdbmfile == NULL) return;
131
132 gdbm_close (gdbmfile);
133 gdbmfile = NULL;
134 openfile.clear();
135}
136
137
138void gdbmclass::deletekey (const text_t &key)
139{
140 if (gdbmfile == NULL) return;
141
142 // get a utf-8 encoded c string of the unicode key
143 datum key_data;
144 key_data.dptr = (to_utf8(key)).getcstr();
145 if (key_data.dptr == NULL) return;
146 key_data.dsize = strlen (key_data.dptr);
147
148 // delete the key
149 gdbm_delete (gdbmfile, key_data);
150
151 // free up the key memory
152 delete []key_data.dptr;
153}
154
155
156// returns file extension string
157text_t gdbmclass::getfileextension ()
158{
159 // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
160 return ".gdb";
161 //if (littleEndian()) return ".ldb";
162 //return ".bdb";
163}
164
165
166// returns true on success
167bool gdbmclass::getkeydata (const text_t& key, text_t &data)
168{
169 datum key_data;
170 datum return_data;
171
172 if (gdbmfile == NULL) return false;
173
174 // get a utf-8 encoded c string of the unicode key
175 key_data.dptr = (to_utf8(key)).getcstr();
176 if (key_data.dptr == NULL) {
177 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
178 return false;
179 }
180 key_data.dsize = strlen (key_data.dptr);
181
182 // fetch the result
183 return_data = gdbm_fetch (gdbmfile, key_data);
184 delete []key_data.dptr;
185
186 if (return_data.dptr == NULL) return false;
187
188 data.setcarr (return_data.dptr, return_data.dsize);
189 free (return_data.dptr);
190 data = to_uni(data); // convert to unicode
191
192 return true;
193}
194
195
196// returns array of keys
197text_tarray gdbmclass::getkeys ()
198{
199 text_tarray keys;
200
201 text_t key = getfirstkey();
202 while (!key.empty())
203 {
204 keys.push_back(key);
205 key = getnextkey(key);
206 }
207
208 return keys;
209}
210
211
212// returns true on success
213bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
214{
215 if (gdbmfile == NULL) return false;
216
217 // store the value
218 datum key_data;
219 datum data_data;
220
221 // get a utf-8 encoded c string of the unicode key
222 key_data.dptr = (to_utf8(key)).getcstr();
223 if (key_data.dptr == NULL) {
224 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
225 return false;
226 }
227 key_data.dsize = strlen (key_data.dptr);
228
229 data_data.dptr = (to_utf8(data)).getcstr();
230 if (data_data.dptr == NULL) {
231 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
232 delete []key_data.dptr;
233 return false;
234 }
235 data_data.dsize = strlen (data_data.dptr);
236
237 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
238 delete []key_data.dptr;
239 delete []data_data.dptr;
240
241 return (ret == 0);
242}
243
244
245// ----------------------------------------------------------------------------------------
246// GDBM-ONLY FUNCTIONS
247// ----------------------------------------------------------------------------------------
248
249// getfirstkey and getnextkey are used for traversing the database
250// no insertions or deletions should be carried out while traversing
251// the database. when there are no keys left to visit in the database
252// an empty string is returned.
253text_t gdbmclass::getfirstkey ()
254{
255 if (gdbmfile == NULL) return g_EmptyText;
256
257 // get the first key
258 datum firstkey_data = gdbm_firstkey (gdbmfile);
259 if (firstkey_data.dptr == NULL) return g_EmptyText;
260
261 // convert it to text_t
262 text_t firstkey;
263 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
264 free (firstkey_data.dptr);
265 return to_uni(firstkey); // convert to unicode
266}
267
268
269text_t gdbmclass::getnextkey (const text_t &key)
270{
271 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
272
273 // get a utf-8 encoded c string of the unicode key
274 datum key_data;
275 key_data.dptr = (to_utf8(key)).getcstr();
276 if (key_data.dptr == NULL) return g_EmptyText;
277 key_data.dsize = strlen (key_data.dptr);
278
279 // get the next key
280 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
281 if (nextkey_data.dptr == NULL) {
282 delete []key_data.dptr;
283 return g_EmptyText;
284 }
285
286 // convert it to text_t
287 text_t nextkey;
288 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
289 free (nextkey_data.dptr);
290 delete []key_data.dptr;
291 return to_uni(nextkey); // convert to unicode
292}
293
294
295// The following routine was written before the GDBM library was upgraded
296// to read both little-endian and big-endian files, and so the reason for
297// it being developed is not so important now. It's useful to keep around
298// however because the file format for GDBM is not guaranteed to be the
299// same from one computer architecture to the next. Even if they are both
300// the same endian-ness. We encountered this issue on the Mac at one stage.
301// the size of the header data-structure for GDBM was simply a different
302// size to that produced on Linux. The Mac case resolved to be binary
303// compatabile with Linux, but shows that we can't rely on this always being
304// the case. Using this method provides a contingency plan. The databases
305// can be shipped at .txt.gz (i.e. portable), which are then converted on
306// on the host's machine to a native GDBM database that is meaningly to
307// that computer.
308
309void gdbmclass::generate_from_txtgz (text_t filename)
310{
311 // Looking to read in the database
312 // => check to see if .ldb/.bdb file already there
313 // if not (first time) then generate using txt2db
314
315 // need to generate architecture native GDBM file using txt2db
316
317 // replace sought after gdbm filename ext with ".txt.gz"
318
319 text_t::const_iterator begin = filename.begin();
320 text_t::const_iterator end= filename.end();
321
322 if (begin != end) {
323 end = end - 1;
324 }
325
326 text_t::const_iterator here = end;
327
328 bool found_ext = false;
329
330 while (here != begin) {
331 if (*here == '.') {
332 found_ext = true;
333 break;
334 }
335 here--;
336 }
337
338 text_t filename_root;
339
340 if (found_ext) {
341 filename_root = substr(begin,here);
342 }
343 else {
344 filename_root = filename;
345 }
346
347 text_t txtgz_filename = filename_root + ".txt.gz";
348 if (file_exists(txtgz_filename))
349 {
350 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
351 //cmd += " | txt2db \"" + filename + "\"";
352
353 // Test to make sure Perl is on the path
354 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
355 text_t cmd_test = "perl -v 1>&2";
356 int rv_test = gsdl_system(cmd_test, true, cerr);
357 if (rv_test != 0) {
358 cerr << "Tried to find Perl. Return exit value of running "
359 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
360 cerr << "Check that Perl is set in your environment variable PATH." << endl;
361 cerr << "At present, PATH=" << getenv("PATH") << endl;
362 }
363
364 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
365 int rv = gsdl_system(cmd, true, cerr);
366 // For some reason, launching this command with gsdl_system() still returns 1
367 // even when it returns 0 when run from the command-line. We can check whether
368 // we succeeded by looking at whether the output database file was created.
369 if (rv != 0) {
370 cerr << "Warning, non-zero return value on running command \""
371 << cmd << "\": " << rv << endl;
372 if (!file_exists(filename)) {
373 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
374 }
375 }
376 }
377}
Note: See TracBrowser for help on using the repository browser.