root/gsdl/trunk/common-src/src/lib/gdbmclass.cpp @ 18050

Revision 18050, 8.7 KB (checked in by mdewsnip, 11 years ago)

Moved the txtgz code into a new function to tidy up the opendatabase() function, and marked it as deprecated since it will no longer be necessary now that the GDBM library reads both little and big endian databases.

Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31
32
33
34gdbmclass::~gdbmclass()
35{
36  closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43                  bool need_filelock
44#else
45                              bool
46#endif
47                  )
48{
49
50  text_t data_location;
51  int block_size = 512;
52 
53  if (gdbmfile != NULL) {
54    if (openfile == filename) return true;
55    else closedatabase ();
56  }
57
58  openfile = filename;
59
60  // Map the DB mode values into GDBM mode values
61  int gdbm_mode = GDBM_READER;
62  if (mode == DB_WRITER)
63  {
64    gdbm_mode = GDBM_WRITER;
65  }
66  else if (mode == DB_WRITER_CREATE)
67  {
68    gdbm_mode = GDBM_WRCREAT;
69  }
70
71  if (gdbm_mode == GDBM_READER)
72  {
73    if (!file_exists(filename))
74    {
75      generate_from_txtgz(filename);
76    }
77  }
78
79  char *namebuffer = filename.getcstr();
80  do {
81#ifdef __WIN32__
82    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
83#else
84    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
85#endif
86    --num_retrys;
87  } while (num_retrys>0 && gdbmfile==NULL &&
88       (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
89  delete []namebuffer;
90 
91  if (gdbmfile == NULL && logout != NULL) {
92    outconvertclass text_t2ascii;
93    (*logout) << text_t2ascii << "database open failed on: " << filename << "\n";
94  }
95
96  return (gdbmfile != NULL);
97}
98
99
100void gdbmclass::closedatabase ()
101{
102  if (gdbmfile == NULL) return;
103 
104  gdbm_close (gdbmfile);
105  gdbmfile = NULL;
106  openfile.clear();
107}
108
109
110void gdbmclass::deletekey (const text_t &key)
111{
112  if (gdbmfile == NULL) return;
113
114  // get a utf-8 encoded c string of the unicode key
115  datum key_data;
116  key_data.dptr = (to_utf8(key)).getcstr();
117  if (key_data.dptr == NULL) return;
118  key_data.dsize = strlen (key_data.dptr);
119
120  // delete the key
121  gdbm_delete (gdbmfile, key_data);
122
123  // free up the key memory
124  delete []key_data.dptr;
125}
126
127
128// returns file extension string
129text_t gdbmclass::getfileextension ()
130{
131  if (littleEndian()) return ".ldb";
132  return ".bdb";
133}
134
135
136// returns true on success
137bool gdbmclass::getkeydata (const text_t& key, text_t &data)
138{
139  datum key_data;
140  datum return_data;
141
142  if (gdbmfile == NULL) return false;
143 
144  // get a utf-8 encoded c string of the unicode key
145  key_data.dptr = (to_utf8(key)).getcstr();
146  if (key_data.dptr == NULL) {
147    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
148    return false;
149  }
150  key_data.dsize = strlen (key_data.dptr);
151 
152  // fetch the result
153  return_data = gdbm_fetch (gdbmfile, key_data);
154  delete []key_data.dptr;
155 
156  if (return_data.dptr == NULL) return false;
157
158  data.setcarr (return_data.dptr, return_data.dsize);
159  free (return_data.dptr);
160  data = to_uni(data);  // convert to unicode
161
162  return true;
163}
164
165
166// returns array of keys
167text_tarray gdbmclass::getkeys ()
168{
169  text_tarray keys;
170
171  text_t key = getfirstkey();
172  while (!key.empty())
173  {
174    keys.push_back(key);
175    key = getnextkey(key);
176  }
177
178  return keys;
179}
180
181
182// returns true on success
183bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
184{
185  if (gdbmfile == NULL) return false;
186 
187  // store the value
188  datum key_data;
189  datum data_data;
190
191  // get a utf-8 encoded c string of the unicode key
192  key_data.dptr = (to_utf8(key)).getcstr();
193  if (key_data.dptr == NULL) {
194    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
195    return false;
196  }
197  key_data.dsize = strlen (key_data.dptr);
198
199  data_data.dptr = (to_utf8(data)).getcstr();
200  if (data_data.dptr == NULL) {
201    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
202    delete []key_data.dptr;
203    return false;
204  }
205  data_data.dsize = strlen (data_data.dptr);
206
207  int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
208  delete []key_data.dptr;
209  delete []data_data.dptr;
210
211  return (ret == 0);
212}
213
214
215// ----------------------------------------------------------------------------------------
216//   GDBM-ONLY FUNCTIONS
217// ----------------------------------------------------------------------------------------
218
219// getfirstkey and getnextkey are used for traversing the database
220// no insertions or deletions should be carried out while traversing
221// the database. when there are no keys left to visit in the database
222// an empty string is returned.
223text_t gdbmclass::getfirstkey ()
224{
225  if (gdbmfile == NULL) return g_EmptyText;
226
227  // get the first key
228  datum firstkey_data = gdbm_firstkey (gdbmfile);
229  if (firstkey_data.dptr == NULL) return g_EmptyText;
230
231  // convert it to text_t
232  text_t firstkey;
233  firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
234  free (firstkey_data.dptr);
235  return to_uni(firstkey);  // convert to unicode
236}
237
238
239text_t gdbmclass::getnextkey (const text_t &key)
240{
241  if (gdbmfile == NULL || key.empty()) return g_EmptyText;
242
243  // get a utf-8 encoded c string of the unicode key
244  datum key_data;
245  key_data.dptr = (to_utf8(key)).getcstr();
246  if (key_data.dptr == NULL) return g_EmptyText;
247  key_data.dsize = strlen (key_data.dptr);
248 
249  // get the next key
250  datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
251  if (nextkey_data.dptr == NULL) {
252    delete []key_data.dptr;
253    return g_EmptyText;
254  }
255
256  // convert it to text_t
257  text_t nextkey;
258  nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
259  free (nextkey_data.dptr);
260  delete []key_data.dptr;
261  return to_uni(nextkey);  // convert to unicode
262}
263
264
265// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian
266void gdbmclass::generate_from_txtgz (text_t filename)
267{
268    // Looking to read in the database
269    // => check to see if .ldb/.bdb file already there
270    // if not (first time) then generate using txt2db
271
272      // need to generate architecture native GDBM file using txt2db
273
274      // replace sought after gdbm filename ext with ".txt.gz"
275
276      text_t::const_iterator begin = filename.begin();
277      text_t::const_iterator end= filename.end();
278     
279      if (begin != end)
280        {
281        end = end - 1;
282        }
283       
284      text_t::const_iterator here = end;
285
286      bool found_ext = false;
287
288      while (here != begin) {
289    if (*here == '.') {
290      found_ext = true;
291      break;
292    }
293    here--;
294      }
295     
296      text_t filename_root;
297
298      if (found_ext) {
299    filename_root = substr(begin,here);
300      }
301      else {
302    filename_root = filename;
303      }
304
305      text_t txtgz_filename = filename_root + ".txt.gz";
306      if (file_exists(txtgz_filename))
307      {
308    //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
309    //cmd += " | txt2db \"" + filename + "\"";
310
311    // Test to make sure Perl is on the path
312    // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
313    text_t cmd_test = "perl -v 1>&2";
314    int rv_test = gsdl_system(cmd_test, true, cerr);
315    if (rv_test != 0) {
316      cerr << "Tried to find Perl. Return exit value of running "
317           << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
318      cerr << "Check that Perl is set in your environment variable PATH." << endl;
319      cerr << "At present, PATH=" << getenv("PATH") << endl;
320    }
321
322    text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
323    int rv = gsdl_system(cmd, true, cerr);
324    // For some reason, launching this command with gsdl_system() still returns 1
325    // even when it returns 0 when run from the command-line. We can check whether
326    // we succeeded by looking at whether the output database file was created.
327    if (rv != 0) {
328      cerr << "Warning, non-zero return value on running command \""
329           << cmd << "\": " << rv << endl;
330      if (!file_exists(filename)) {
331        cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
332      }
333    }   
334      }
335}
Note: See TracBrowser for help on using the browser.