source: gsdl/trunk/common-src/src/lib/gdbmclass.cpp@ 18050

Last change on this file since 18050 was 18050, checked in by mdewsnip, 15 years ago

Moved the txtgz code into a new function to tidy up the opendatabase() function, and marked it as deprecated since it will no longer be necessary now that the GDBM library reads both little and big endian databases.

File size: 8.7 KB
RevLine 
[15429]1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
[15679]27#include "gsdltools.h"
[15429]28#include "gsdlunicode.h"
[16895]29#include "fileutil.h"
[17701]30#include "stdlib.h"
[15429]31
32
[16895]33
[15429]34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
[15644]39
[15429]40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
[16895]49
[15429]50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 openfile = filename;
[15557]59
60 // Map the DB mode values into GDBM mode values
61 int gdbm_mode = GDBM_READER;
62 if (mode == DB_WRITER)
63 {
64 gdbm_mode = GDBM_WRITER;
65 }
66 else if (mode == DB_WRITER_CREATE)
67 {
68 gdbm_mode = GDBM_WRCREAT;
69 }
70
[18050]71 if (gdbm_mode == GDBM_READER)
72 {
73 if (!file_exists(filename))
74 {
75 generate_from_txtgz(filename);
[16895]76 }
77 }
78
[15429]79 char *namebuffer = filename.getcstr();
80 do {
81#ifdef __WIN32__
[15557]82 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
[15429]83#else
[15557]84 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
[15429]85#endif
86 --num_retrys;
87 } while (num_retrys>0 && gdbmfile==NULL &&
88 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
89 delete []namebuffer;
90
91 if (gdbmfile == NULL && logout != NULL) {
92 outconvertclass text_t2ascii;
93 (*logout) << text_t2ascii << "database open failed on: " << filename << "\n";
94 }
95
96 return (gdbmfile != NULL);
97}
98
99
100void gdbmclass::closedatabase ()
101{
102 if (gdbmfile == NULL) return;
103
104 gdbm_close (gdbmfile);
105 gdbmfile = NULL;
106 openfile.clear();
107}
108
109
[15644]110void gdbmclass::deletekey (const text_t &key)
111{
112 if (gdbmfile == NULL) return;
113
114 // get a utf-8 encoded c string of the unicode key
115 datum key_data;
116 key_data.dptr = (to_utf8(key)).getcstr();
117 if (key_data.dptr == NULL) return;
118 key_data.dsize = strlen (key_data.dptr);
119
120 // delete the key
121 gdbm_delete (gdbmfile, key_data);
122
123 // free up the key memory
124 delete []key_data.dptr;
125}
126
127
[15679]128// returns file extension string
129text_t gdbmclass::getfileextension ()
130{
131 if (littleEndian()) return ".ldb";
132 return ".bdb";
133}
134
135
[15429]136// returns true on success
[15644]137bool gdbmclass::getkeydata (const text_t& key, text_t &data)
138{
139 datum key_data;
140 datum return_data;
141
142 if (gdbmfile == NULL) return false;
143
144 // get a utf-8 encoded c string of the unicode key
145 key_data.dptr = (to_utf8(key)).getcstr();
146 if (key_data.dptr == NULL) {
[17719]147 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15644]148 return false;
149 }
150 key_data.dsize = strlen (key_data.dptr);
151
152 // fetch the result
153 return_data = gdbm_fetch (gdbmfile, key_data);
154 delete []key_data.dptr;
155
156 if (return_data.dptr == NULL) return false;
157
158 data.setcarr (return_data.dptr, return_data.dsize);
159 free (return_data.dptr);
160 data = to_uni(data); // convert to unicode
161
162 return true;
163}
164
165
166// returns array of keys
167text_tarray gdbmclass::getkeys ()
168{
169 text_tarray keys;
170
171 text_t key = getfirstkey();
172 while (!key.empty())
173 {
174 keys.push_back(key);
175 key = getnextkey(key);
176 }
177
178 return keys;
179}
180
181
182// returns true on success
[15649]183bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
[15429]184{
185 if (gdbmfile == NULL) return false;
186
187 // store the value
188 datum key_data;
189 datum data_data;
190
191 // get a utf-8 encoded c string of the unicode key
192 key_data.dptr = (to_utf8(key)).getcstr();
193 if (key_data.dptr == NULL) {
[17719]194 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15429]195 return false;
196 }
197 key_data.dsize = strlen (key_data.dptr);
198
199 data_data.dptr = (to_utf8(data)).getcstr();
200 if (data_data.dptr == NULL) {
[17719]201 if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
[15429]202 delete []key_data.dptr;
203 return false;
204 }
205 data_data.dsize = strlen (data_data.dptr);
206
207 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
208 delete []key_data.dptr;
209 delete []data_data.dptr;
210
211 return (ret == 0);
212}
213
214
[15644]215// ----------------------------------------------------------------------------------------
216// GDBM-ONLY FUNCTIONS
217// ----------------------------------------------------------------------------------------
[15429]218
219// getfirstkey and getnextkey are used for traversing the database
220// no insertions or deletions should be carried out while traversing
221// the database. when there are no keys left to visit in the database
222// an empty string is returned.
223text_t gdbmclass::getfirstkey ()
224{
225 if (gdbmfile == NULL) return g_EmptyText;
226
227 // get the first key
228 datum firstkey_data = gdbm_firstkey (gdbmfile);
229 if (firstkey_data.dptr == NULL) return g_EmptyText;
230
231 // convert it to text_t
232 text_t firstkey;
233 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
234 free (firstkey_data.dptr);
235 return to_uni(firstkey); // convert to unicode
236}
237
238
239text_t gdbmclass::getnextkey (const text_t &key)
240{
241 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
242
243 // get a utf-8 encoded c string of the unicode key
244 datum key_data;
245 key_data.dptr = (to_utf8(key)).getcstr();
246 if (key_data.dptr == NULL) return g_EmptyText;
247 key_data.dsize = strlen (key_data.dptr);
248
249 // get the next key
250 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
251 if (nextkey_data.dptr == NULL) {
252 delete []key_data.dptr;
253 return g_EmptyText;
254 }
255
256 // convert it to text_t
257 text_t nextkey;
258 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
259 free (nextkey_data.dptr);
260 delete []key_data.dptr;
261 return to_uni(nextkey); // convert to unicode
262}
[18050]263
264
265// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian
266void gdbmclass::generate_from_txtgz (text_t filename)
267{
268 // Looking to read in the database
269 // => check to see if .ldb/.bdb file already there
270 // if not (first time) then generate using txt2db
271
272 // need to generate architecture native GDBM file using txt2db
273
274 // replace sought after gdbm filename ext with ".txt.gz"
275
276 text_t::const_iterator begin = filename.begin();
277 text_t::const_iterator end= filename.end();
278
279 if (begin != end)
280 {
281 end = end - 1;
282 }
283
284 text_t::const_iterator here = end;
285
286 bool found_ext = false;
287
288 while (here != begin) {
289 if (*here == '.') {
290 found_ext = true;
291 break;
292 }
293 here--;
294 }
295
296 text_t filename_root;
297
298 if (found_ext) {
299 filename_root = substr(begin,here);
300 }
301 else {
302 filename_root = filename;
303 }
304
305 text_t txtgz_filename = filename_root + ".txt.gz";
306 if (file_exists(txtgz_filename))
307 {
308 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
309 //cmd += " | txt2db \"" + filename + "\"";
310
311 // Test to make sure Perl is on the path
312 // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
313 text_t cmd_test = "perl -v 1>&2";
314 int rv_test = gsdl_system(cmd_test, true, cerr);
315 if (rv_test != 0) {
316 cerr << "Tried to find Perl. Return exit value of running "
317 << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
318 cerr << "Check that Perl is set in your environment variable PATH." << endl;
319 cerr << "At present, PATH=" << getenv("PATH") << endl;
320 }
321
322 text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
323 int rv = gsdl_system(cmd, true, cerr);
324 // For some reason, launching this command with gsdl_system() still returns 1
325 // even when it returns 0 when run from the command-line. We can check whether
326 // we succeeded by looking at whether the output database file was created.
327 if (rv != 0) {
328 cerr << "Warning, non-zero return value on running command \""
329 << cmd << "\": " << rv << endl;
330 if (!file_exists(filename)) {
331 cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
332 }
333 }
334 }
335}
Note: See TracBrowser for help on using the repository browser.