/********************************************************************** * * txt2db.cpp -- * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * Copyright (C) 1999 The New Zealand Digital Library Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **********************************************************************/ #ifdef _MSC_VER #include "autoconf.h" #include "systems.h" #include "gdbmconst.h" extern "C" { #include "gdbm.h" } #else #include #endif #include "gsdlconf.h" #include "text_t.h" #include #include #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_IOS_H) # include #else # include #endif void print_usage (char *program_name) { cerr << "usage: " << program_name << " [options] database-name" << endl << endl; cerr << "options:" << endl; cerr << " -append append to existing database" << endl << endl; } int main (int argc, char *argv[]) { int block_size = 0; GDBM_FILE dbf; char c; text_t key; text_t value; text_t tmp; int num_dashes = 0; // sanity check if (argc != 2 && argc != 3) { print_usage (argv[0]); exit (0); } char *dbname; int append = 0; int delkey = 0; if (argc == 3) { if (strcmp (argv[1], "-append") == 0) { append = 1; dbname = argv[2]; } else { cerr << argv[1] << " is not a valid option." << endl << endl; print_usage (argv[0]); exit (0); } } else dbname = argv[1]; // open the database // note that GDBM_FAST is obsolete on newer versions of gdbm int read_write = GDBM_NEWDB | GDBM_FAST; if (append) read_write = GDBM_WRCREAT | GDBM_FAST; #ifdef _MSC_VER dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1); #else dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL); #endif if (dbf == NULL) { cerr << "couldn't create " << dbname << endl; exit (0); } cin.get(c); while (!cin.eof()) { num_dashes = 0; key = ""; value = ""; // Parse out 'key' from [key]\n // scan for first occurrence of [ while (!cin.eof() && c != '[') cin.get(c); if (!cin.eof()) cin.get(c); // skip [ // now look for closing ], building up 'key' as we go while (!cin.eof() && c != ']') { key.push_back ((unsigned char)c); cin.get(c); } if (!cin.eof()) { // most likely an eol char, but if '-', then signifies record // is to be deleted, not added cin.get(c); if (c == '-') { delkey = 1; } else { delkey = 0; } } while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c); // look for 70 dashes tmp = ""; while (!cin.eof() && (num_dashes < 70)) { if (c == '\n') { tmp.push_back ((unsigned char)c); num_dashes = 0; } else if (c == '\r') { // Here we are able to process both Windows-specific text files // (containing carriage-return, newline) and Linux text files // (containing only newline characters) by ignoring the Windows' // carriage-return altogether so that we produce a uniform database // file from either system's type of text file. // If we don't ignore the carriage return here, txt.gz files // produced on Windows cause a GS library running on Linux to break. num_dashes = 0; } else if (c == '-') { tmp.push_back ((unsigned char)c); ++num_dashes; } else { value += tmp; value.push_back ((unsigned char)c); tmp = ""; num_dashes = 0; } cin.get(c); } // if the key is not an empty string store this key-value pair if (!key.empty()) { // convert key to a datum datatype datum key_data; key_data.dptr = key.getcstr(); if (key_data.dptr == NULL) { cerr << "NULL key_data.dptr" << endl; exit (0); } key_data.dsize = strlen(key_data.dptr); if (delkey) { // delete the given key if (gdbm_delete(dbf, key_data) < 0) { cerr << "gdbm_delete returned an error trying to delete key " << key << ": " << gdbm_strerror (gdbm_errno) <