root/gsdl/trunk/common-src/src/lib/gdbmclass.cpp @ 18656

Revision 18656, 9.8 KB (checked in by kjdon, 12 years ago)

now look for .gdb files for collection databases, not ldb/bdb

Line 
1/**********************************************************************
2 *
3 * gdbmclass.cpp --
4 * Copyright (C) 1999-2008  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmclass.h"
27#include "gsdltools.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "stdlib.h"
31
32
33
34gdbmclass::~gdbmclass()
35{
36  closedatabase();
37}
38
39
40// returns true if opened
41bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys,
42#ifdef __WIN32__
43                  bool need_filelock
44#else
45                              bool
46#endif
47                  )
48{
49
50  text_t data_location;
51  int block_size = 512;
52 
53  if (gdbmfile != NULL) {
54    if (openfile == filename) return true;
55    else closedatabase ();
56  }
57
58  openfile = filename;
59
60  // Map the DB mode values into GDBM mode values
61  int gdbm_mode = GDBM_READER;
62  if (mode == DB_WRITER)
63  {
64    gdbm_mode = GDBM_WRITER;
65  }
66  else if (mode == DB_WRITER_CREATE)
67  {
68    gdbm_mode = GDBM_WRCREAT;
69  }
70
71  text_t gdbm_filename = filename;
72  if (gdbm_mode == GDBM_READER)
73  {
74    // make sure we have the right file extension. Should be db (for systems dbs, and gdb for collection dbs. But need to handle old style ldb/bdb
75
76    // If the specified GDBM file doesn't exist, (should be gdb) try the other extensions (we can now read both)
77    if (!file_exists(gdbm_filename))
78    {
79      if (ends_with(gdbm_filename, ".gdb")) {
80    // only try ldb and bdb for collection dbs, not system dbs which are .db
81    gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".gdb").size()) + ".ldb";
82     
83    if (!file_exists(gdbm_filename)) {
84      // try bdb as well
85      gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb";
86    }
87      }
88    }
89
90    // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
91    if (!file_exists(gdbm_filename))
92    {
93      // We're desperate, so try generating the desired GDBM file from a txtgz file
94      gdbm_filename = filename;
95      generate_from_txtgz(gdbm_filename);
96    }
97  }
98
99  char *namebuffer = gdbm_filename.getcstr();
100  do {
101#ifdef __WIN32__
102    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0);
103#else
104    gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL);
105#endif
106    --num_retrys;
107  } while (num_retrys>0 && gdbmfile==NULL &&
108       (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
109  delete []namebuffer;
110 
111  if (gdbmfile == NULL && logout != NULL) {
112    outconvertclass text_t2ascii;
113    (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n";
114  }
115
116  return (gdbmfile != NULL);
117}
118
119
120void gdbmclass::closedatabase ()
121{
122  if (gdbmfile == NULL) return;
123 
124  gdbm_close (gdbmfile);
125  gdbmfile = NULL;
126  openfile.clear();
127}
128
129
130void gdbmclass::deletekey (const text_t &key)
131{
132  if (gdbmfile == NULL) return;
133
134  // get a utf-8 encoded c string of the unicode key
135  datum key_data;
136  key_data.dptr = (to_utf8(key)).getcstr();
137  if (key_data.dptr == NULL) return;
138  key_data.dsize = strlen (key_data.dptr);
139
140  // delete the key
141  gdbm_delete (gdbmfile, key_data);
142
143  // free up the key memory
144  delete []key_data.dptr;
145}
146
147
148// returns file extension string
149text_t gdbmclass::getfileextension ()
150{
151  // now we always use gdb for gdbm files. later on, if we can't find it, we'll try ldb and bdb for backwards compatibility
152  return ".gdb";
153  //if (littleEndian()) return ".ldb";
154  //return ".bdb";
155}
156
157
158// returns true on success
159bool gdbmclass::getkeydata (const text_t& key, text_t &data)
160{
161  datum key_data;
162  datum return_data;
163
164  if (gdbmfile == NULL) return false;
165 
166  // get a utf-8 encoded c string of the unicode key
167  key_data.dptr = (to_utf8(key)).getcstr();
168  if (key_data.dptr == NULL) {
169    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
170    return false;
171  }
172  key_data.dsize = strlen (key_data.dptr);
173 
174  // fetch the result
175  return_data = gdbm_fetch (gdbmfile, key_data);
176  delete []key_data.dptr;
177 
178  if (return_data.dptr == NULL) return false;
179
180  data.setcarr (return_data.dptr, return_data.dsize);
181  free (return_data.dptr);
182  data = to_uni(data);  // convert to unicode
183
184  return true;
185}
186
187
188// returns array of keys
189text_tarray gdbmclass::getkeys ()
190{
191  text_tarray keys;
192
193  text_t key = getfirstkey();
194  while (!key.empty())
195  {
196    keys.push_back(key);
197    key = getnextkey(key);
198  }
199
200  return keys;
201}
202
203
204// returns true on success
205bool gdbmclass::setkeydata (const text_t &key, const text_t &data)
206{
207  if (gdbmfile == NULL) return false;
208 
209  // store the value
210  datum key_data;
211  datum data_data;
212
213  // get a utf-8 encoded c string of the unicode key
214  key_data.dptr = (to_utf8(key)).getcstr();
215  if (key_data.dptr == NULL) {
216    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
217    return false;
218  }
219  key_data.dsize = strlen (key_data.dptr);
220
221  data_data.dptr = (to_utf8(data)).getcstr();
222  if (data_data.dptr == NULL) {
223    if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl;
224    delete []key_data.dptr;
225    return false;
226  }
227  data_data.dsize = strlen (data_data.dptr);
228
229  int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE);
230  delete []key_data.dptr;
231  delete []data_data.dptr;
232
233  return (ret == 0);
234}
235
236
237// ----------------------------------------------------------------------------------------
238//   GDBM-ONLY FUNCTIONS
239// ----------------------------------------------------------------------------------------
240
241// getfirstkey and getnextkey are used for traversing the database
242// no insertions or deletions should be carried out while traversing
243// the database. when there are no keys left to visit in the database
244// an empty string is returned.
245text_t gdbmclass::getfirstkey ()
246{
247  if (gdbmfile == NULL) return g_EmptyText;
248
249  // get the first key
250  datum firstkey_data = gdbm_firstkey (gdbmfile);
251  if (firstkey_data.dptr == NULL) return g_EmptyText;
252
253  // convert it to text_t
254  text_t firstkey;
255  firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize);
256  free (firstkey_data.dptr);
257  return to_uni(firstkey);  // convert to unicode
258}
259
260
261text_t gdbmclass::getnextkey (const text_t &key)
262{
263  if (gdbmfile == NULL || key.empty()) return g_EmptyText;
264
265  // get a utf-8 encoded c string of the unicode key
266  datum key_data;
267  key_data.dptr = (to_utf8(key)).getcstr();
268  if (key_data.dptr == NULL) return g_EmptyText;
269  key_data.dsize = strlen (key_data.dptr);
270 
271  // get the next key
272  datum nextkey_data = gdbm_nextkey (gdbmfile, key_data);
273  if (nextkey_data.dptr == NULL) {
274    delete []key_data.dptr;
275    return g_EmptyText;
276  }
277
278  // convert it to text_t
279  text_t nextkey;
280  nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize);
281  free (nextkey_data.dptr);
282  delete []key_data.dptr;
283  return to_uni(nextkey);  // convert to unicode
284}
285
286
287// DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files
288void gdbmclass::generate_from_txtgz (text_t filename)
289{
290    // Looking to read in the database
291    // => check to see if .ldb/.bdb file already there
292    // if not (first time) then generate using txt2db
293
294      // need to generate architecture native GDBM file using txt2db
295
296      // replace sought after gdbm filename ext with ".txt.gz"
297
298      text_t::const_iterator begin = filename.begin();
299      text_t::const_iterator end= filename.end();
300     
301      if (begin != end)
302        {
303        end = end - 1;
304        }
305       
306      text_t::const_iterator here = end;
307
308      bool found_ext = false;
309
310      while (here != begin) {
311    if (*here == '.') {
312      found_ext = true;
313      break;
314    }
315    here--;
316      }
317     
318      text_t filename_root;
319
320      if (found_ext) {
321    filename_root = substr(begin,here);
322      }
323      else {
324    filename_root = filename;
325      }
326
327      text_t txtgz_filename = filename_root + ".txt.gz";
328      if (file_exists(txtgz_filename))
329      {
330    //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\"";
331    //cmd += " | txt2db \"" + filename + "\"";
332
333    // Test to make sure Perl is on the path
334    // On Linux, the output of the test goes to STDOUT so redirect it to STDERR
335    text_t cmd_test = "perl -v 1>&2";
336    int rv_test = gsdl_system(cmd_test, true, cerr);
337    if (rv_test != 0) {
338      cerr << "Tried to find Perl. Return exit value of running "
339           << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl;
340      cerr << "Check that Perl is set in your environment variable PATH." << endl;
341      cerr << "At present, PATH=" << getenv("PATH") << endl;
342    }
343
344    text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\"";
345    int rv = gsdl_system(cmd, true, cerr);
346    // For some reason, launching this command with gsdl_system() still returns 1
347    // even when it returns 0 when run from the command-line. We can check whether
348    // we succeeded by looking at whether the output database file was created.
349    if (rv != 0) {
350      cerr << "Warning, non-zero return value on running command \""
351           << cmd << "\": " << rv << endl;
352      if (!file_exists(filename)) {
353        cerr << "Tried to run command \""<<cmd<<"\", but it failed" << endl;
354      }
355    }   
356      }
357}
Note: See TracBrowser for help on using the browser.