/********************************************************************** * * txt2db.cpp -- * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Copyright (C) 1999 The New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **********************************************************************/ #ifdef __WIN32__ #include "autoconf.h" #include "systems.h" #include "gdbmconst.h" #include "gdbm.h" #else #include #endif //#include "gsdlconf.h" #include "text_t.h" #include #include #include #include #include #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_IOS_H) # include #else # include #endif using namespace std; void print_usage (char *program_name) { cerr << "usage: " << program_name << " database-name" << endl; cerr << "usage: " << program_name << " -update database-name" << endl; cerr << "usage: " << program_name << " -append database-name" << endl << endl; cerr << "options:" << endl; cerr << " -update update existing database" << endl; cerr << " -append legacy alias for -update" << endl << endl; } // lock a file on linux // [hs, 2 july 2010] // - modified to create a locl file local to the collection [jmt12] int lock () { string file_path (""); char *collect_dir = getenv ("GSDLCOLLECTDIR"); if (collect_dir != NULL) { file_path += collect_dir; } file_path += "/tmp"; if ( access( file_path.c_str(), 00 ) != 0 ) { mkdir(file_path.c_str(), 00777); } file_path += "/gdb.lock"; ///out << "txt2dbl::lock(" << file_path << ") => "; int fd2 = open (file_path.c_str(), O_CREAT|O_RDWR, 00644); close (fd2); int fd = open (file_path.c_str(), O_RDWR); flock lock = {F_WRLCK, SEEK_SET, 0, 0, 0}; fcntl (fd, F_SETLKW, &lock); ///out << "locked!" << endl; return fd; } // unlock a file on linux // [hs, 2 july 2010] int unlock ( int fd ) { ///out << "txt2dbl::unlock() => "; flock lock1 = {F_UNLCK, SEEK_SET, 0, 0, 0}; fcntl (fd, F_SETLKW, &lock1); ///out << "unlocked!" << endl; return 0; } int main (int argc, char *argv[]) { ///out << "===== TXT2DB+Locking=====" << endl; int block_size = 0; GDBM_FILE dbf; char c; text_t key; text_t value; text_t tmp; int num_dashes = 0; // sanity check if (argc != 2 && argc != 3) { print_usage (argv[0]); exit (0); } char *dbname; int update = 0; int delkey = 0; int switched_flags = 0; if (argc == 3) { // legacy support if (strcmp (argv[1], "-append") == 0) { update = 1; dbname = argv[2]; } else if (strcmp (argv[1], "-update") == 0) { update = 1; dbname = argv[2]; } else { cerr << argv[1] << " is not a valid option." << endl << endl; print_usage (argv[0]); exit (0); } } else dbname = argv[1]; // open the database // note that GDBM_FAST is obsolete on newer versions of gdbm int read_write = GDBM_NEWDB | GDBM_FAST; if (update) read_write = GDBM_WRCREAT | GDBM_FAST; /* #ifdef __WIN32__ dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1); #else dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL); #endif if (dbf == NULL) { cerr << "couldn't create " << dbname << endl; exit (0); } */ cin.get(c); while (!cin.eof()) { num_dashes = 0; key = ""; value = ""; // Parse out 'key' from [key]\n // scan for first occurrence of [ while (!cin.eof() && c != '[') cin.get(c); if (!cin.eof()) cin.get(c); // skip [ // now look for closing ], building up 'key' as we go while (!cin.eof() && c != ']') { key.push_back ((unsigned char)c); cin.get(c); } if (!cin.eof()) { // most likely an eol char, but if '-', then signifies record // is to be deleted, not added cin.get(c); if (c == '-') { delkey = 1; } else { delkey = 0; } } while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c); // look for 70 dashes tmp = ""; while (!cin.eof() && (num_dashes < 70)) { if (c == '\n') { tmp.push_back ((unsigned char)c); num_dashes = 0; } else if (c == '\r') { // Here we are able to process both Windows-specific text files // (containing carriage-return, newline) and Linux text files // (containing only newline characters) by ignoring the Windows' // carriage-return altogether so that we produce a uniform database // file from either system's type of text file. // If we don't ignore the carriage return here, txt.gz files // produced on Windows cause a GS library running on Linux to break. num_dashes = 0; } else if (c == '-') { tmp.push_back ((unsigned char)c); ++num_dashes; } else { value += tmp; value.push_back ((unsigned char)c); tmp = ""; num_dashes = 0; } cin.get(c); } // if the key is not an empty string store this key-value pair if (!key.empty()) { // convert key to a datum datatype datum key_data; key_data.dptr = key.getcstr(); if (key_data.dptr == NULL) { cerr << "NULL key_data.dptr" << endl; exit (0); } key_data.dsize = strlen(key_data.dptr); // moved for better localisation at the expense of some speed // and lock before the operation // [hs, 2 july 2010] int thelock = lock (); #ifdef __WIN32__ dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1); #else dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL); #endif if (dbf == NULL) { cerr << "couldn't create " << dbname << endl; exit (0); } // If opening was successful the first time, and regardless of what flags // we may have been given, we must make future opens '-update' (rather // than erase the file over and over!) if (!update && !switched_flags) { read_write = GDBM_WRCREAT | GDBM_FAST; switched_flags = 1; } if (delkey) { // delete the given key if (gdbm_delete(dbf, key_data) < 0) { cerr << "gdbm_delete returned an error" << endl; } } else { // add/update // convert value to a datum datatype datum value_data; value_data.dptr = value.getcstr(); if (value_data.dptr == NULL) { cerr << "NULL value_data.dptr" << endl; exit (0); } value_data.dsize = strlen(value_data.dptr); // store the value if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) { cerr << "gdbm_store returned an error" << endl; exit (0); } free(value_data.dptr); } // moved for better localisation at the expense of some speed // and unlock after the operation // [hs, 2 july 2010] gdbm_close (dbf); unlock (thelock); free(key_data.dptr); } } /* gdbm_close (dbf); */ return 0; }