source: gsdl/trunk/common-src/src/lib/gdbmclass.cpp@ 18880

Last change on this file since 18880 was 18880, checked in by oranfry, 15 years ago

being more explicit about included libraries for the sake of newer compilers

File size: 9.8 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31#include <cstring>
32
33
34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
49
50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 openfile = filename;
59
60 // Map the DB mode values into GDBM mode values
61 int gdbm_mode = GDBM_READER;
62 if (mode == DB_WRITER)
63 {
64 gdbm_mode = GDBM_WRITER;
65 }
66 else if (mode == DB_WRITER_CREATE)
67 {
68 gdbm_mode = GDBM_WRCREAT;
69 }
70
71 text_t gdbm_filename = filename;
72 if (gdbm_mode == GDBM_READER)
73 {
74 // make sure we have the right file extension. Should be db (for systems dbs, and gdb for collection dbs. But need to handle old style ldb/bdb
75
76 // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
77 if (!file_exists(gdbm_filename))
78 {
79 if (ends_with(gdbm_filename, ".gdb")) {
80 // only try ldb and bdb for collection dbs, not system dbs which are .db
81 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
82
83 if (!file_exists(gdbm_filename)) {
84 // try bdb as well
85 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
86 }
87 }
88 }
89
90 // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
91 if (!file_exists(gdbm_filename))
92 {
93 // We're desperate, so try generating the desired GDBM file from a txtgz file
94 gdbm_filename = filename;
95 generate_from_txtgz(gdbm_filename);
96 }
97 }
98
99 char *namebuffer = gdbm_filename.getcstr();
100 do {
101#ifdef __WIN32__
102 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
103#else
104 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
105#endif
106 --num_retrys;
107 } while (num_retrys>0 && gdbmfile==NULL &&
108 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
109 delete []namebuffer;
110
111 if (gdbmfile == NULL && logout != NULL) {
112 outconvertclass text_t2ascii;
113 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
114 }
115
116 return (gdbmfile != NULL);
117}
118
119
120void gdbmclass::closedatabase ()
121{
122 if (gdbmfile == NULL) return;
123
124 gdbm_close (gdbmfile);
125 gdbmfile = NULL;
126 openfile.clear();
127}
128
129
130void gdbmclass::deletekey (const text_t &key)
131{
132 if (gdbmfile == NULL) return;
133
134 // get a utf-8 encoded c string of the unicode key
135 datum key_data;
136 key_data.dptr = (to_utf8(key)).getcstr();
137 if (key_data.dptr == NULL) return;
138 key_data.dsize = strlen (key_data.dptr);
139
140 // delete the key
141 gdbm_delete (gdbmfile, key_data);
142
143 // free up the key memory
144 delete []key_data.dptr;
145}
146
147
148// returns file extension string
149text_t gdbmclass::getfileextension ()
150{
151 // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
152 return ".gdb";
153 //if (littleEndian()) return ".ldb";
154 //return ".bdb";
155}
156
157
158// returns true on success
159bool gdbmclass::getkeydata (const text_t& key, text_t &data)
160{
161 datum key_data;
162 datum return_data;
163
164 if (gdbmfile == NULL) return false;
165
166 // get a utf-8 encoded c string of the unicode key
167 key_data.dptr = (to_utf8(key)).getcstr();
168 if (key_data.dptr == NULL) {
169 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
170 return false;
171 }
172 key_data.dsize = strlen (key_data.dptr);
173
174 // fetch the result
175 return_data = gdbm_fetch (gdbmfile, key_data);
176 delete []key_data.dptr;
177
178 if (return_data.dptr == NULL) return false;
179
180 data.setcarr (return_data.dptr, return_data.dsize);
181 free (return_data.dptr);
182 data = to_uni(data); // convert to unicode
183
184 return true;
185}
186
187
188// returns array of keys
189text_tarray gdbmclass::getkeys ()
190{
191 text_tarray keys;
192
193 text_t key = getfirstkey();
194 while (!key.empty())
195 {
196 keys.push_back(key);
197 key = getnextkey(key);
198 }
199
200 return keys;
201}
202
203
204// returns true on success
205bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
206{
207 if (gdbmfile == NULL) return false;
208
209 // store the value
210 datum key_data;
211 datum data_data;
212
213 // get a utf-8 encoded c string of the unicode key
214 key_data.dptr = (to_utf8(key)).getcstr();
215 if (key_data.dptr == NULL) {
216 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
217 return false;
218 }
219 key_data.dsize = strlen (key_data.dptr);
220
221 data_data.dptr = (to_utf8(data)).getcstr();
222 if (data_data.dptr == NULL) {
223 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
224 delete []key_data.dptr;
225 return false;
226 }
227 data_data.dsize = strlen (data_data.dptr);
228
229 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
230 delete []key_data.dptr;
231 delete []data_data.dptr;
232
233 return (ret == 0);
234}
235
236
237// ----------------------------------------------------------------------------------------
238// GDBM-ONLY FUNCTIONS
239// ----------------------------------------------------------------------------------------
240
241// getfirstkey and getnextkey are used for traversing the database
242// no insertions or deletions should be carried out while traversing
243// the database. when there are no keys left to visit in the database
244// an empty string is returned.
245text_t gdbmclass::getfirstkey ()
246{
247 if (gdbmfile == NULL) return g_EmptyText;
248
249 // get the first key
250 datum firstkey_data = gdbm_firstkey (gdbmfile);
251 if (firstkey_data.dptr == NULL) return g_EmptyText;
252
253 // convert it to text_t
254 text_t firstkey;
255 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
256 free (firstkey_data.dptr);
257 return to_uni(firstkey); // convert to unicode
258}
259
260
261text_t gdbmclass::getnextkey (const text_t &key)
262{
263 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
264
265 // get a utf-8 encoded c string of the unicode key
266 datum key_data;
267 key_data.dptr = (to_utf8(key)).getcstr();
268 if (key_data.dptr == NULL) return g_EmptyText;
269 key_data.dsize = strlen (key_data.dptr);
270
271 // get the next key
272 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
273 if (nextkey_data.dptr == NULL) {
274 delete []key_data.dptr;
275 return g_EmptyText;
276 }
277
278 // convert it to text_t
279 text_t nextkey;
280 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
281 free (nextkey_data.dptr);
282 delete []key_data.dptr;
283 return to_uni(nextkey); // convert to unicode
284}
285
286
287// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
288void gdbmclass::generate_from_txtgz (text_t filename)
289{
290 // Looking to read in the database
291 // => check to see if .ldb/.bdb file already there
292 // if not (first time) then generate using txt2db
293
294 // need to generate architecture native GDBM file using txt2db
295
296 // replace sought after gdbm filename ext with ".txt.gz"
297
298 text_t::const_iterator begin = filename.begin();
299 text_t::const_iterator end= filename.end();
300
301 if (begin != end)
302 {
303 end = end - 1;
304 }
305
306 text_t::const_iterator here = end;
307
308 bool found_ext = false;
309
310 while (here != begin) {
311 if (*here == '.') {
312 found_ext = true;
313 break;
314 }
315 here--;
316 }
317
318 text_t filename_root;
319
320 if (found_ext) {
321 filename_root = substr(begin,here);
322 }
323 else {
324 filename_root = filename;
325 }
326
327 text_t txtgz_filename = filename_root + ".txt.gz";
328 if (file_exists(txtgz_filename))
329 {
330 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
331 //cmd += " | txt2db \"" + filename + "\"";
332
333 // Test to make sure Perl is on the path
334 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
335 text_t cmd_test = "perl -v 1>&2";
336 int rv_test = gsdl_system(cmd_test, true, cerr);
337 if (rv_test != 0) {
338 cerr << "Tried to find Perl. Return exit value of running "
339 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
340 cerr << "Check that Perl is set in your environment variable PATH." << endl;
341 cerr << "At present, PATH=" << getenv("PATH") << endl;
342 }
343
344 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
345 int rv = gsdl_system(cmd, true, cerr);
346 // For some reason, launching this command with gsdl_system() still returns 1
347 // even when it returns 0 when run from the command-line. We can check whether
348 // we succeeded by looking at whether the output database file was created.
349 if (rv != 0) {
350 cerr << "Warning, non-zero return value on running command \""
351 << cmd << "\": " << rv << endl;
352 if (!file_exists(filename)) {
353 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
354 }
355 }
356 }
357}
Note: See TracBrowser for help on using the repository browser.