root/gsdl/trunk/common-src/src/lib/gdbmclass.cpp @ 18051

Revision 18051, 9.4 KB (checked in by mdewsnip, 12 years ago)

Now looks for the opposite endianness file if the desired one doesn't exist, since we can now read both. By Michael Dewsnip from DL Consulting Ltd ( http://www.dlconsulting.com).

Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31
32
33
34gdbmclass::~gdbmclass()
35{
36  closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43                  bool need_filelock
44#else
45                              bool
46#endif
47                  )
48{
49
50  text_t data_location;
51  int block_size = 512;
52 
53  if (gdbmfile != NULL) {
54    if (openfile == filename) return true;
55    else closedatabase ();
56  }
57
58  openfile = filename;
59
60  // Map the DB mode values into GDBM mode values
61  int gdbm_mode = GDBM_READER;
62  if (mode == DB_WRITER)
63  {
64    gdbm_mode = GDBM_WRITER;
65  }
66  else if (mode == DB_WRITER_CREATE)
67  {
68    gdbm_mode = GDBM_WRCREAT;
69  }
70
71  text_t gdbm_filename = filename;
72  if (gdbm_mode == GDBM_READER)
73  {
74    // If the specified GDBM file doesn't exist, try the other extension (we can now read both)
75    if (!file_exists(gdbm_filename))
76    {
77      if (ends_with(gdbm_filename, ".ldb"))
78      {
79    gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
80      }
81      else if (ends_with(gdbm_filename, ".bdb"))
82      {
83    gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".bdb").size()) + ".ldb";
84      }
85    }
86
87    // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
88    if (!file_exists(gdbm_filename))
89    {
90      // We're desperate, so try generating the desired GDBM file from a txtgz file
91      gdbm_filename = filename;
92      generate_from_txtgz(gdbm_filename);
93    }
94  }
95
96  char *namebuffer = gdbm_filename.getcstr();
97  do {
98#ifdef __WIN32__
99    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
100#else
101    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
102#endif
103    --num_retrys;
104  } while (num_retrys>0 && gdbmfile==NULL &&
105       (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
106  delete []namebuffer;
107 
108  if (gdbmfile == NULL && logout != NULL) {
109    outconvertclass text_t2ascii;
110    (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
111  }
112
113  return (gdbmfile != NULL);
114}
115
116
117void gdbmclass::closedatabase ()
118{
119  if (gdbmfile == NULL) return;
120 
121  gdbm_close (gdbmfile);
122  gdbmfile = NULL;
123  openfile.clear();
124}
125
126
127void gdbmclass::deletekey (const text_t &key)
128{
129  if (gdbmfile == NULL) return;
130
131  // get a utf-8 encoded c string of the unicode key
132  datum key_data;
133  key_data.dptr = (to_utf8(key)).getcstr();
134  if (key_data.dptr == NULL) return;
135  key_data.dsize = strlen (key_data.dptr);
136
137  // delete the key
138  gdbm_delete (gdbmfile, key_data);
139
140  // free up the key memory
141  delete []key_data.dptr;
142}
143
144
145// returns file extension string
146text_t gdbmclass::getfileextension ()
147{
148  if (littleEndian()) return ".ldb";
149  return ".bdb";
150}
151
152
153// returns true on success
154bool gdbmclass::getkeydata (const text_t& key, text_t &data)
155{
156  datum key_data;
157  datum return_data;
158
159  if (gdbmfile == NULL) return false;
160 
161  // get a utf-8 encoded c string of the unicode key
162  key_data.dptr = (to_utf8(key)).getcstr();
163  if (key_data.dptr == NULL) {
164    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
165    return false;
166  }
167  key_data.dsize = strlen (key_data.dptr);
168 
169  // fetch the result
170  return_data = gdbm_fetch (gdbmfile, key_data);
171  delete []key_data.dptr;
172 
173  if (return_data.dptr == NULL) return false;
174
175  data.setcarr (return_data.dptr, return_data.dsize);
176  free (return_data.dptr);
177  data = to_uni(data);  // convert to unicode
178
179  return true;
180}
181
182
183// returns array of keys
184text_tarray gdbmclass::getkeys ()
185{
186  text_tarray keys;
187
188  text_t key = getfirstkey();
189  while (!key.empty())
190  {
191    keys.push_back(key);
192    key = getnextkey(key);
193  }
194
195  return keys;
196}
197
198
199// returns true on success
200bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
201{
202  if (gdbmfile == NULL) return false;
203 
204  // store the value
205  datum key_data;
206  datum data_data;
207
208  // get a utf-8 encoded c string of the unicode key
209  key_data.dptr = (to_utf8(key)).getcstr();
210  if (key_data.dptr == NULL) {
211    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
212    return false;
213  }
214  key_data.dsize = strlen (key_data.dptr);
215
216  data_data.dptr = (to_utf8(data)).getcstr();
217  if (data_data.dptr == NULL) {
218    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
219    delete []key_data.dptr;
220    return false;
221  }
222  data_data.dsize = strlen (data_data.dptr);
223
224  int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
225  delete []key_data.dptr;
226  delete []data_data.dptr;
227
228  return (ret == 0);
229}
230
231
232// ----------------------------------------------------------------------------------------
233//   GDBM-ONLY FUNCTIONS
234// ----------------------------------------------------------------------------------------
235
236// getfirstkey and getnextkey are used for traversing the database
237// no insertions or deletions should be carried out while traversing
238// the database. when there are no keys left to visit in the database
239// an empty string is returned.
240text_t gdbmclass::getfirstkey ()
241{
242  if (gdbmfile == NULL) return g_EmptyText;
243
244  // get the first key
245  datum firstkey_data = gdbm_firstkey (gdbmfile);
246  if (firstkey_data.dptr == NULL) return g_EmptyText;
247
248  // convert it to text_t
249  text_t firstkey;
250  firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
251  free (firstkey_data.dptr);
252  return to_uni(firstkey);  // convert to unicode
253}
254
255
256text_t gdbmclass::getnextkey (const text_t &key)
257{
258  if (gdbmfile == NULL || key.empty()) return g_EmptyText;
259
260  // get a utf-8 encoded c string of the unicode key
261  datum key_data;
262  key_data.dptr = (to_utf8(key)).getcstr();
263  if (key_data.dptr == NULL) return g_EmptyText;
264  key_data.dsize = strlen (key_data.dptr);
265 
266  // get the next key
267  datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
268  if (nextkey_data.dptr == NULL) {
269    delete []key_data.dptr;
270    return g_EmptyText;
271  }
272
273  // convert it to text_t
274  text_t nextkey;
275  nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
276  free (nextkey_data.dptr);
277  delete []key_data.dptr;
278  return to_uni(nextkey);  // convert to unicode
279}
280
281
282// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
283void gdbmclass::generate_from_txtgz (text_t filename)
284{
285    // Looking to read in the database
286    // => check to see if .ldb/.bdb file already there
287    // if not (first time) then generate using txt2db
288
289      // need to generate architecture native GDBM file using txt2db
290
291      // replace sought after gdbm filename ext with ".txt.gz"
292
293      text_t::const_iterator begin = filename.begin();
294      text_t::const_iterator end= filename.end();
295     
296      if (begin != end)
297        {
298        end = end - 1;
299        }
300       
301      text_t::const_iterator here = end;
302
303      bool found_ext = false;
304
305      while (here != begin) {
306    if (*here == '.') {
307      found_ext = true;
308      break;
309    }
310    here--;
311      }
312     
313      text_t filename_root;
314
315      if (found_ext) {
316    filename_root = substr(begin,here);
317      }
318      else {
319    filename_root = filename;
320      }
321
322      text_t txtgz_filename = filename_root + ".txt.gz";
323      if (file_exists(txtgz_filename))
324      {
325    //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
326    //cmd += " | txt2db \"" + filename + "\"";
327
328    // Test to make sure Perl is on the path
329    // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
330    text_t cmd_test = "perl -v 1>&2";
331    int rv_test = gsdl_system(cmd_test, true, cerr);
332    if (rv_test != 0) {
333      cerr << "Tried to find Perl. Return exit value of running "
334           << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
335      cerr << "Check that Perl is set in your environment variable PATH." << endl;
336      cerr << "At present, PATH=" << getenv("PATH") << endl;
337    }
338
339    text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
340    int rv = gsdl_system(cmd, true, cerr);
341    // For some reason, launching this command with gsdl_system() still returns 1
342    // even when it returns 0 when run from the command-line. We can check whether
343    // we succeeded by looking at whether the output database file was created.
344    if (rv != 0) {
345      cerr << "Warning, non-zero return value on running command \""
346           << cmd << "\": " << rv << endl;
347      if (!file_exists(filename)) {
348        cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
349      }
350    }   
351      }
352}
Note: See TracBrowser for help on using the browser.