/********************************************************************** * * gdbmclass.cpp -- * Copyright (C) 1999-2008 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "gdbmclass.h" #include "gsdltools.h" #include "gsdlunicode.h" #include "fileutil.h" #include "stdlib.h" gdbmclass::~gdbmclass() { closedatabase(); } // returns true if opened bool gdbmclass::opendatabase (const text_t &filename, int mode, int num_retrys, #ifdef __WIN32__ bool need_filelock #else bool #endif ) { text_t data_location; int block_size = 512; if (gdbmfile != NULL) { if (openfile == filename) return true; else closedatabase (); } openfile = filename; // Map the DB mode values into GDBM mode values int gdbm_mode = GDBM_READER; if (mode == DB_WRITER) { gdbm_mode = GDBM_WRITER; } else if (mode == DB_WRITER_CREATE) { gdbm_mode = GDBM_WRCREAT; } text_t gdbm_filename = filename; if (gdbm_mode == GDBM_READER) { // If the specified GDBM file doesn't exist, try the other extension (we can now read both) if (!file_exists(gdbm_filename)) { if (ends_with(gdbm_filename, ".ldb")) { gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".ldb").size()) + ".bdb"; } else if (ends_with(gdbm_filename, ".bdb")) { gdbm_filename = substr(gdbm_filename.begin(), gdbm_filename.end() - ((text_t) ".bdb").size()) + ".ldb"; } } // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files if (!file_exists(gdbm_filename)) { // We're desperate, so try generating the desired GDBM file from a txtgz file gdbm_filename = filename; generate_from_txtgz(gdbm_filename); } } char *namebuffer = gdbm_filename.getcstr(); do { #ifdef __WIN32__ gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL, (need_filelock) ? 1 : 0); #else gdbmfile = gdbm_open (namebuffer, block_size, gdbm_mode, 00664, NULL); #endif --num_retrys; } while (num_retrys>0 && gdbmfile==NULL && (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER)); delete []namebuffer; if (gdbmfile == NULL && logout != NULL) { outconvertclass text_t2ascii; (*logout) << text_t2ascii << "database open failed on: " << gdbm_filename << "\n"; } return (gdbmfile != NULL); } void gdbmclass::closedatabase () { if (gdbmfile == NULL) return; gdbm_close (gdbmfile); gdbmfile = NULL; openfile.clear(); } void gdbmclass::deletekey (const text_t &key) { if (gdbmfile == NULL) return; // get a utf-8 encoded c string of the unicode key datum key_data; key_data.dptr = (to_utf8(key)).getcstr(); if (key_data.dptr == NULL) return; key_data.dsize = strlen (key_data.dptr); // delete the key gdbm_delete (gdbmfile, key_data); // free up the key memory delete []key_data.dptr; } // returns file extension string text_t gdbmclass::getfileextension () { if (littleEndian()) return ".ldb"; return ".bdb"; } // returns true on success bool gdbmclass::getkeydata (const text_t& key, text_t &data) { datum key_data; datum return_data; if (gdbmfile == NULL) return false; // get a utf-8 encoded c string of the unicode key key_data.dptr = (to_utf8(key)).getcstr(); if (key_data.dptr == NULL) { if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl; return false; } key_data.dsize = strlen (key_data.dptr); // fetch the result return_data = gdbm_fetch (gdbmfile, key_data); delete []key_data.dptr; if (return_data.dptr == NULL) return false; data.setcarr (return_data.dptr, return_data.dsize); free (return_data.dptr); data = to_uni(data); // convert to unicode return true; } // returns array of keys text_tarray gdbmclass::getkeys () { text_tarray keys; text_t key = getfirstkey(); while (!key.empty()) { keys.push_back(key); key = getnextkey(key); } return keys; } // returns true on success bool gdbmclass::setkeydata (const text_t &key, const text_t &data) { if (gdbmfile == NULL) return false; // store the value datum key_data; datum data_data; // get a utf-8 encoded c string of the unicode key key_data.dptr = (to_utf8(key)).getcstr(); if (key_data.dptr == NULL) { if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl; return false; } key_data.dsize = strlen (key_data.dptr); data_data.dptr = (to_utf8(data)).getcstr(); if (data_data.dptr == NULL) { if (logout != NULL) (*logout) << "gdbmclass: out of memory" << endl; delete []key_data.dptr; return false; } data_data.dsize = strlen (data_data.dptr); int ret = gdbm_store (gdbmfile, key_data, data_data, GDBM_REPLACE); delete []key_data.dptr; delete []data_data.dptr; return (ret == 0); } // ---------------------------------------------------------------------------------------- // GDBM-ONLY FUNCTIONS // ---------------------------------------------------------------------------------------- // getfirstkey and getnextkey are used for traversing the database // no insertions or deletions should be carried out while traversing // the database. when there are no keys left to visit in the database // an empty string is returned. text_t gdbmclass::getfirstkey () { if (gdbmfile == NULL) return g_EmptyText; // get the first key datum firstkey_data = gdbm_firstkey (gdbmfile); if (firstkey_data.dptr == NULL) return g_EmptyText; // convert it to text_t text_t firstkey; firstkey.setcarr (firstkey_data.dptr, firstkey_data.dsize); free (firstkey_data.dptr); return to_uni(firstkey); // convert to unicode } text_t gdbmclass::getnextkey (const text_t &key) { if (gdbmfile == NULL || key.empty()) return g_EmptyText; // get a utf-8 encoded c string of the unicode key datum key_data; key_data.dptr = (to_utf8(key)).getcstr(); if (key_data.dptr == NULL) return g_EmptyText; key_data.dsize = strlen (key_data.dptr); // get the next key datum nextkey_data = gdbm_nextkey (gdbmfile, key_data); if (nextkey_data.dptr == NULL) { delete []key_data.dptr; return g_EmptyText; } // convert it to text_t text_t nextkey; nextkey.setcarr (nextkey_data.dptr, nextkey_data.dsize); free (nextkey_data.dptr); delete []key_data.dptr; return to_uni(nextkey); // convert to unicode } // DEPRECATED -- No longer necessary now that the GDBM library reads both little-endian and big-endian files void gdbmclass::generate_from_txtgz (text_t filename) { // Looking to read in the database // => check to see if .ldb/.bdb file already there // if not (first time) then generate using txt2db // need to generate architecture native GDBM file using txt2db // replace sought after gdbm filename ext with ".txt.gz" text_t::const_iterator begin = filename.begin(); text_t::const_iterator end= filename.end(); if (begin != end) { end = end - 1; } text_t::const_iterator here = end; bool found_ext = false; while (here != begin) { if (*here == '.') { found_ext = true; break; } here--; } text_t filename_root; if (found_ext) { filename_root = substr(begin,here); } else { filename_root = filename; } text_t txtgz_filename = filename_root + ".txt.gz"; if (file_exists(txtgz_filename)) { //text_t cmd = "gzip --decompress --to-stdout \"" + txtgz_filename + "\""; //cmd += " | txt2db \"" + filename + "\""; // Test to make sure Perl is on the path // On Linux, the output of the test goes to STDOUT so redirect it to STDERR text_t cmd_test = "perl -v 1>&2"; int rv_test = gsdl_system(cmd_test, true, cerr); if (rv_test != 0) { cerr << "Tried to find Perl. Return exit value of running " << cmd_test <<": "<< rv_test << ", (expected this to be 0)" << endl; cerr << "Check that Perl is set in your environment variable PATH." << endl; cerr << "At present, PATH=" << getenv("PATH") << endl; } text_t cmd = "perl -S txtgz-to-gdbm.pl \"" + txtgz_filename + "\" \"" + filename + "\""; int rv = gsdl_system(cmd, true, cerr); // For some reason, launching this command with gsdl_system() still returns 1 // even when it returns 0 when run from the command-line. We can check whether // we succeeded by looking at whether the output database file was created. if (rv != 0) { cerr << "Warning, non-zero return value on running command \"" << cmd << "\": " << rv << endl; if (!file_exists(filename)) { cerr << "Tried to run command \""<