source: gsdl/trunk/common-src/src/lib/gdbmclass.cpp@ 17680

Last change on this file since 17680 was 17680, checked in by ak19, 15 years ago

Dr Bainbridge fixed this so that the database file txt.gz is now decompressed and converted into .ldb database file. Works on Windows but requires the help of a new perl script txtgz-to-gdbm.pl

File size: 8.0 KB
Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30
31
32
33
34gdbmclass::~gdbmclass()
35{
36 closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43 bool need_filelock
44#else
45 bool
46#endif
47 )
48{
49
50 text_t data_location;
51 int block_size = 512;
52
53 if (gdbmfile != NULL) {
54 if (openfile == filename) return true;
55 else closedatabase ();
56 }
57
58 openfile = filename;
59
60 // Map the DB mode values into GDBM mode values
61 int gdbm_mode = GDBM_READER;
62 if (mode == DB_WRITER)
63 {
64 gdbm_mode = GDBM_WRITER;
65 }
66 else if (mode == DB_WRITER_CREATE)
67 {
68 gdbm_mode = GDBM_WRCREAT;
69 }
70
71 if (gdbm_mode == GDBM_READER) {
72 // Looking to read in the database
73 // => check to see if .ldb/.bdb file already there
74 // if not (first time) then generate using txt2db
75 if (!file_exists(filename)) {
76
77 // need to generate architecture native GDBM file using txt2db
78
79 // replace sought after gdbm filename ext with ".txt.gz"
80
81 text_t::const_iterator begin = filename.begin();
82 text_t::const_iterator end= filename.end();
83 text_t::const_iterator here = end;
84
85 bool found_ext = false;
86
87 while (here != begin) {
88 if (*here == '.') {
89 found_ext = true;
90 break;
91 }
92 here--;
93 }
94
95 text_t filename_root;
96
97 if (found_ext) {
98 filename_root = substr(begin,here);
99 }
100 else {
101 filename_root = filename;
102 }
103
104 text_t txtgz_filename = filename_root + ".txt.gz";
105 if (file_exists(txtgz_filename))
106 {
107 //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
108 //cmd += " | txt2db \"" + filename + "\"";
109
110 text_t cmd;
111#ifdef __WIN32__
112 cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
113#else
114 cmd = "perl txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
115#endif
116
117 int rv = gsdl_system(cmd, true, cerr);
118 // For some reason, launching this command with gsdl_system() still returns 1
119 // even when it returns 0 when run from the command-line. We can check whether
120 // we succeeded by looking at whether the output database file was created.
121 if (rv != 0) {
122 cerr << "\nWarning, non-zero return value on running command \""
123 << cmd << "\": " << rv << endl;
124 if (!file_exists(filename)) {
125 cerr << "Tried to run command \""<<cmd<<"\", but it failed\n";
126 }
127 }
128 }
129 }
130 }
131
132 char *namebuffer = filename.getcstr();
133 do {
134#ifdef __WIN32__
135 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
136#else
137 gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
138#endif
139 --num_retrys;
140 } while (num_retrys>0 && gdbmfile==NULL &&
141 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
142 delete []namebuffer;
143
144 if (gdbmfile == NULL && logout != NULL) {
145 outconvertclass text_t2ascii;
146 (*logout) << text_t2ascii << "database open failed on: " << filename << "\n";
147 }
148
149 return (gdbmfile != NULL);
150}
151
152
153void gdbmclass::closedatabase ()
154{
155 if (gdbmfile == NULL) return;
156
157 gdbm_close (gdbmfile);
158 gdbmfile = NULL;
159 openfile.clear();
160}
161
162
163void gdbmclass::deletekey (const text_t &key)
164{
165 if (gdbmfile == NULL) return;
166
167 // get a utf-8 encoded c string of the unicode key
168 datum key_data;
169 key_data.dptr = (to_utf8(key)).getcstr();
170 if (key_data.dptr == NULL) return;
171 key_data.dsize = strlen (key_data.dptr);
172
173 // delete the key
174 gdbm_delete (gdbmfile, key_data);
175
176 // free up the key memory
177 delete []key_data.dptr;
178}
179
180
181// returns file extension string
182text_t gdbmclass::getfileextension ()
183{
184 if (littleEndian()) return ".ldb";
185 return ".bdb";
186}
187
188
189// returns true on success
190bool gdbmclass::getkeydata (const text_t& key, text_t &data)
191{
192 datum key_data;
193 datum return_data;
194
195 if (gdbmfile == NULL) return false;
196
197 // get a utf-8 encoded c string of the unicode key
198 key_data.dptr = (to_utf8(key)).getcstr();
199 if (key_data.dptr == NULL) {
200 if (logout != NULL) (*logout) << "gdbmclass: out of memory\n";
201 return false;
202 }
203 key_data.dsize = strlen (key_data.dptr);
204
205 // fetch the result
206 return_data = gdbm_fetch (gdbmfile, key_data);
207 delete []key_data.dptr;
208
209 if (return_data.dptr == NULL) return false;
210
211 data.setcarr (return_data.dptr, return_data.dsize);
212 free (return_data.dptr);
213 data = to_uni(data); // convert to unicode
214
215 return true;
216}
217
218
219// returns array of keys
220text_tarray gdbmclass::getkeys ()
221{
222 text_tarray keys;
223
224 text_t key = getfirstkey();
225 while (!key.empty())
226 {
227 keys.push_back(key);
228 key = getnextkey(key);
229 }
230
231 return keys;
232}
233
234
235// returns true on success
236bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
237{
238 if (gdbmfile == NULL) return false;
239
240 // store the value
241 datum key_data;
242 datum data_data;
243
244 // get a utf-8 encoded c string of the unicode key
245 key_data.dptr = (to_utf8(key)).getcstr();
246 if (key_data.dptr == NULL) {
247 if (logout != NULL) (*logout) << "gdbmclass: out of memory\n";
248 return false;
249 }
250 key_data.dsize = strlen (key_data.dptr);
251
252 data_data.dptr = (to_utf8(data)).getcstr();
253 if (data_data.dptr == NULL) {
254 if (logout != NULL) (*logout) << "gdbmclass: out of memory\n";
255 delete []key_data.dptr;
256 return false;
257 }
258 data_data.dsize = strlen (data_data.dptr);
259
260 int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
261 delete []key_data.dptr;
262 delete []data_data.dptr;
263
264 return (ret == 0);
265}
266
267
268// ----------------------------------------------------------------------------------------
269// GDBM-ONLY FUNCTIONS
270// ----------------------------------------------------------------------------------------
271
272// getfirstkey and getnextkey are used for traversing the database
273// no insertions or deletions should be carried out while traversing
274// the database. when there are no keys left to visit in the database
275// an empty string is returned.
276text_t gdbmclass::getfirstkey ()
277{
278 if (gdbmfile == NULL) return g_EmptyText;
279
280 // get the first key
281 datum firstkey_data = gdbm_firstkey (gdbmfile);
282 if (firstkey_data.dptr == NULL) return g_EmptyText;
283
284 // convert it to text_t
285 text_t firstkey;
286 firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
287 free (firstkey_data.dptr);
288 return to_uni(firstkey); // convert to unicode
289}
290
291
292text_t gdbmclass::getnextkey (const text_t &key)
293{
294 if (gdbmfile == NULL || key.empty()) return g_EmptyText;
295
296 // get a utf-8 encoded c string of the unicode key
297 datum key_data;
298 key_data.dptr = (to_utf8(key)).getcstr();
299 if (key_data.dptr == NULL) return g_EmptyText;
300 key_data.dsize = strlen (key_data.dptr);
301
302 // get the next key
303 datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
304 if (nextkey_data.dptr == NULL) {
305 delete []key_data.dptr;
306 return g_EmptyText;
307 }
308
309 // convert it to text_t
310 text_t nextkey;
311 nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
312 free (nextkey_data.dptr);
313 delete []key_data.dptr;
314 return to_uni(nextkey); // convert to unicode
315}
Note: See TracBrowser for help on using the repository browser.