source: gsdl/trunk/common-src/src/lib/gdbmclass.cpp@ 18309

Last change on this file since 18309 was 18051, checked in by mdewsnip, 16 years ago

Now looks for the opposite endianness file if the desired one doesn't exist, since we can now read both. By Michael Dewsnip from DL Consulting Ltd (http://www.dlconsulting.com).

File size: 9.4 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31
32
33
34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
49
50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 openfile = filename;
59
60 // Map the DB mode values into GDBM mode values
61 int gdbm_mode = GDBM_READER;
62 if (mode == DB_WRITER)
63 {
64 gdbm_mode = GDBM_WRITER;
65 }
66 else if (mode == DB_WRITER_CREATE)
67 {
68 gdbm_mode = GDBM_WRCREAT;
69 }
70
71 text_t gdbm_filename = filename;
72 if (gdbm_mode == GDBM_READER)
73 {
74 // If the specified GDBM file doesn't exist, try the other extension (we can now read both)
75 if (!file_exists(gdbm_filename))
76 {
77 if (ends_with(gdbm_filename, ".ldb"))
78 {
79 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
80 }
81 else if (ends_with(gdbm_filename, ".bdb"))
82 {
83 gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".bdb").size()) + ".ldb";
84 }
85 }
86
87 // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
88 if (!file_exists(gdbm_filename))
89 {
90 // We're desperate, so try generating the desired GDBM file from a txtgz file
91 gdbm_filename = filename;
92 generate_from_txtgz(gdbm_filename);
93 }
94 }
95
96 char *namebuffer = gdbm_filename.getcstr();
97 do {
98#ifdef __WIN32__
99 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
100#else
101 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
102#endif
103 --num_retrys;
104 } while (num_retrys>0 && gdbmfile==NULL &&
105 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
106 delete []namebuffer;
107
108 if (gdbmfile == NULL && logout != NULL) {
109 outconvertclass text_t2ascii;
110 (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
111 }
112
113 return (gdbmfile != NULL);
114}
115
116
117void gdbmclass::closedatabase ()
118{
119 if (gdbmfile == NULL) return;
120
121 gdbm_close (gdbmfile);
122 gdbmfile = NULL;
123 openfile.clear();
124}
125
126
127void gdbmclass::deletekey (const text_t &key)
128{
129 if (gdbmfile == NULL) return;
130
131 // get a utf-8 encoded c string of the unicode key
132 datum key_data;
133 key_data.dptr = (to_utf8(key)).getcstr();
134 if (key_data.dptr == NULL) return;
135 key_data.dsize = strlen (key_data.dptr);
136
137 // delete the key
138 gdbm_delete (gdbmfile, key_data);
139
140 // free up the key memory
141 delete []key_data.dptr;
142}
143
144
145// returns file extension string
146text_t gdbmclass::getfileextension ()
147{
148 if (littleEndian()) return ".ldb";
149 return ".bdb";
150}
151
152
153// returns true on success
154bool gdbmclass::getkeydata (const text_t& key, text_t &data)
155{
156 datum key_data;
157 datum return_data;
158
159 if (gdbmfile == NULL) return false;
160
161 // get a utf-8 encoded c string of the unicode key
162 key_data.dptr = (to_utf8(key)).getcstr();
163 if (key_data.dptr == NULL) {
164 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
165 return false;
166 }
167 key_data.dsize = strlen (key_data.dptr);
168
169 // fetch the result
170 return_data = gdbm_fetch (gdbmfile, key_data);
171 delete []key_data.dptr;
172
173 if (return_data.dptr == NULL) return false;
174
175 data.setcarr (return_data.dptr, return_data.dsize);
176 free (return_data.dptr);
177 data = to_uni(data); // convert to unicode
178
179 return true;
180}
181
182
183// returns array of keys
184text_tarray gdbmclass::getkeys ()
185{
186 text_tarray keys;
187
188 text_t key = getfirstkey();
189 while (!key.empty())
190 {
191 keys.push_back(key);
192 key = getnextkey(key);
193 }
194
195 return keys;
196}
197
198
199// returns true on success
200bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
201{
202 if (gdbmfile == NULL) return false;
203
204 // store the value
205 datum key_data;
206 datum data_data;
207
208 // get a utf-8 encoded c string of the unicode key
209 key_data.dptr = (to_utf8(key)).getcstr();
210 if (key_data.dptr == NULL) {
211 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
212 return false;
213 }
214 key_data.dsize = strlen (key_data.dptr);
215
216 data_data.dptr = (to_utf8(data)).getcstr();
217 if (data_data.dptr == NULL) {
218 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
219 delete []key_data.dptr;
220 return false;
221 }
222 data_data.dsize = strlen (data_data.dptr);
223
224 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
225 delete []key_data.dptr;
226 delete []data_data.dptr;
227
228 return (ret == 0);
229}
230
231
232// ----------------------------------------------------------------------------------------
233// GDBM-ONLY FUNCTIONS
234// ----------------------------------------------------------------------------------------
235
236// getfirstkey and getnextkey are used for traversing the database
237// no insertions or deletions should be carried out while traversing
238// the database. when there are no keys left to visit in the database
239// an empty string is returned.
240text_t gdbmclass::getfirstkey ()
241{
242 if (gdbmfile == NULL) return g_EmptyText;
243
244 // get the first key
245 datum firstkey_data = gdbm_firstkey (gdbmfile);
246 if (firstkey_data.dptr == NULL) return g_EmptyText;
247
248 // convert it to text_t
249 text_t firstkey;
250 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
251 free (firstkey_data.dptr);
252 return to_uni(firstkey); // convert to unicode
253}
254
255
256text_t gdbmclass::getnextkey (const text_t &key)
257{
258 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
259
260 // get a utf-8 encoded c string of the unicode key
261 datum key_data;
262 key_data.dptr = (to_utf8(key)).getcstr();
263 if (key_data.dptr == NULL) return g_EmptyText;
264 key_data.dsize = strlen (key_data.dptr);
265
266 // get the next key
267 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
268 if (nextkey_data.dptr == NULL) {
269 delete []key_data.dptr;
270 return g_EmptyText;
271 }
272
273 // convert it to text_t
274 text_t nextkey;
275 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
276 free (nextkey_data.dptr);
277 delete []key_data.dptr;
278 return to_uni(nextkey); // convert to unicode
279}
280
281
282// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
283void gdbmclass::generate_from_txtgz (text_t filename)
284{
285 // Looking to read in the database
286 // => check to see if .ldb/.bdb file already there
287 // if not (first time) then generate using txt2db
288
289 // need to generate architecture native GDBM file using txt2db
290
291 // replace sought after gdbm filename ext with ".txt.gz"
292
293 text_t::const_iterator begin = filename.begin();
294 text_t::const_iterator end= filename.end();
295
296 if (begin != end)
297 {
298 end = end - 1;
299 }
300
301 text_t::const_iterator here = end;
302
303 bool found_ext = false;
304
305 while (here != begin) {
306 if (*here == '.') {
307 found_ext = true;
308 break;
309 }
310 here--;
311 }
312
313 text_t filename_root;
314
315 if (found_ext) {
316 filename_root = substr(begin,here);
317 }
318 else {
319 filename_root = filename;
320 }
321
322 text_t txtgz_filename = filename_root + ".txt.gz";
323 if (file_exists(txtgz_filename))
324 {
325 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
326 //cmd += " | txt2db \"" + filename + "\"";
327
328 // Test to make sure Perl is on the path
329 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
330 text_t cmd_test = "perl -v 1>&2";
331 int rv_test = gsdl_system(cmd_test, true, cerr);
332 if (rv_test != 0) {
333 cerr << "Tried to find Perl. Return exit value of running "
334 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
335 cerr << "Check that Perl is set in your environment variable PATH." << endl;
336 cerr << "At present, PATH=" << getenv("PATH") << endl;
337 }
338
339 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
340 int rv = gsdl_system(cmd, true, cerr);
341 // For some reason, launching this command with gsdl_system() still returns 1
342 // even when it returns 0 when run from the command-line. We can check whether
343 // we succeeded by looking at whether the output database file was created.
344 if (rv != 0) {
345 cerr << "Warning, non-zero return value on running command \""
346 << cmd << "\": " << rv << endl;
347 if (!file_exists(filename)) {
348 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
349 }
350 }
351 }
352}
Note: See TracBrowser for help on using the repository browser.