[536] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * txt2db.cpp --
|
---|
| 4 | * A component of the Greenstone digital library software
|
---|
| 5 | * from the New Zealand Digital Library Project at the
|
---|
| 6 | * University of Waikato, New Zealand.
|
---|
| 7 | *
|
---|
| 8 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 9 | *
|
---|
| 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
| 24 | **********************************************************************/
|
---|
| 25 |
|
---|
| 26 |
|
---|
[26650] | 27 | #ifdef _MSC_VER
|
---|
[115] | 28 | #include "autoconf.h"
|
---|
| 29 | #include "systems.h"
|
---|
| 30 | #include "gdbmconst.h"
|
---|
[23136] | 31 |
|
---|
| 32 | extern "C" {
|
---|
[115] | 33 | #include "gdbm.h"
|
---|
[23136] | 34 | }
|
---|
[115] | 35 |
|
---|
| 36 | #else
|
---|
| 37 | #include <gdbm.h>
|
---|
| 38 | #endif
|
---|
| 39 |
|
---|
| 40 | #include "gsdlconf.h"
|
---|
| 41 | #include "text_t.h"
|
---|
| 42 | #include <stdlib.h>
|
---|
[18880] | 43 | #include <cstring>
|
---|
[115] | 44 |
|
---|
| 45 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
| 46 | # include <ospace\std\iostream>
|
---|
| 47 | #elif defined(GSDL_USE_IOS_H)
|
---|
| 48 | # include <iostream.h>
|
---|
| 49 | #else
|
---|
[519] | 50 | # include <iostream>
|
---|
[115] | 51 | #endif
|
---|
| 52 |
|
---|
| 53 |
|
---|
| 54 | void print_usage (char *program_name) {
|
---|
[17783] | 55 | cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
|
---|
| 56 | cerr << "options:" << endl;
|
---|
| 57 | cerr << " -append append to existing database" << endl << endl;
|
---|
[115] | 58 | }
|
---|
| 59 |
|
---|
| 60 |
|
---|
| 61 | int main (int argc, char *argv[]) {
|
---|
[623] | 62 | int block_size = 0;
|
---|
| 63 | GDBM_FILE dbf;
|
---|
| 64 | char c;
|
---|
| 65 | text_t key;
|
---|
| 66 | text_t value;
|
---|
| 67 | text_t tmp;
|
---|
| 68 | int num_dashes = 0;
|
---|
[625] | 69 |
|
---|
[623] | 70 | // sanity check
|
---|
[625] | 71 | if (argc != 2 && argc != 3) {
|
---|
[623] | 72 | print_usage (argv[0]);
|
---|
| 73 | exit (0);
|
---|
| 74 | }
|
---|
| 75 |
|
---|
[625] | 76 | char *dbname;
|
---|
| 77 | int append = 0;
|
---|
[18469] | 78 | int delkey = 0;
|
---|
| 79 |
|
---|
[625] | 80 | if (argc == 3) {
|
---|
| 81 | if (strcmp (argv[1], "-append") == 0) {
|
---|
| 82 | append = 1;
|
---|
| 83 | dbname = argv[2];
|
---|
| 84 | } else {
|
---|
[17783] | 85 | cerr << argv[1] << " is not a valid option." << endl << endl;
|
---|
[625] | 86 | print_usage (argv[0]);
|
---|
| 87 | exit (0);
|
---|
| 88 | }
|
---|
| 89 | } else dbname = argv[1];
|
---|
| 90 |
|
---|
| 91 |
|
---|
[623] | 92 | // open the database
|
---|
[625] | 93 | // note that GDBM_FAST is obsolete on newer versions of gdbm
|
---|
| 94 | int read_write = GDBM_NEWDB | GDBM_FAST;
|
---|
| 95 | if (append) read_write = GDBM_WRCREAT | GDBM_FAST;
|
---|
| 96 |
|
---|
[26650] | 97 | #ifdef _MSC_VER
|
---|
[982] | 98 | dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
|
---|
| 99 | #else
|
---|
[625] | 100 | dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
|
---|
[982] | 101 | #endif
|
---|
[623] | 102 | if (dbf == NULL) {
|
---|
[17783] | 103 | cerr << "couldn't create " << dbname << endl;
|
---|
[623] | 104 | exit (0);
|
---|
| 105 | }
|
---|
| 106 |
|
---|
| 107 | cin.get(c);
|
---|
| 108 | while (!cin.eof()) {
|
---|
| 109 | num_dashes = 0;
|
---|
| 110 | key = "";
|
---|
| 111 | value = "";
|
---|
| 112 |
|
---|
[18469] | 113 | // Parse out 'key' from [key]\n
|
---|
| 114 |
|
---|
| 115 | // scan for first occurrence of [
|
---|
[623] | 116 | while (!cin.eof() && c != '[') cin.get(c);
|
---|
[18469] | 117 |
|
---|
[623] | 118 | if (!cin.eof()) cin.get(c); // skip [
|
---|
[18469] | 119 |
|
---|
| 120 | // now look for closing ], building up 'key' as we go
|
---|
[623] | 121 | while (!cin.eof() && c != ']') {
|
---|
| 122 | key.push_back ((unsigned char)c);
|
---|
| 123 | cin.get(c);
|
---|
| 124 | }
|
---|
[18469] | 125 |
|
---|
| 126 | if (!cin.eof()) {
|
---|
| 127 | // most likely an eol char, but if '-', then signifies record
|
---|
| 128 | // is to be deleted, not added
|
---|
| 129 | cin.get(c);
|
---|
| 130 | if (c == '-') {
|
---|
| 131 | delkey = 1;
|
---|
| 132 | }
|
---|
| 133 | else {
|
---|
| 134 | delkey = 0;
|
---|
| 135 | }
|
---|
| 136 | }
|
---|
[17783] | 137 | while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
|
---|
[623] | 138 |
|
---|
| 139 | // look for 70 dashes
|
---|
| 140 | tmp = "";
|
---|
| 141 | while (!cin.eof() && (num_dashes < 70)) {
|
---|
[17783] | 142 | if (c == '\n') {
|
---|
[623] | 143 | tmp.push_back ((unsigned char)c);
|
---|
[17783] | 144 | num_dashes = 0;
|
---|
| 145 |
|
---|
| 146 | } else if (c == '\r') {
|
---|
| 147 | // Here we are able to process both Windows-specific text files
|
---|
| 148 | // (containing carriage-return, newline) and Linux text files
|
---|
| 149 | // (containing only newline characters) by ignoring the Windows'
|
---|
| 150 | // carriage-return altogether so that we produce a uniform database
|
---|
| 151 | // file from either system's type of text file.
|
---|
| 152 | // If we don't ignore the carriage return here, txt.gz files
|
---|
| 153 | // produced on Windows cause a GS library running on Linux to break.
|
---|
[623] | 154 | num_dashes = 0;
|
---|
[17783] | 155 |
|
---|
[623] | 156 | } else if (c == '-') {
|
---|
| 157 | tmp.push_back ((unsigned char)c);
|
---|
[9596] | 158 | ++num_dashes;
|
---|
[623] | 159 |
|
---|
| 160 | } else {
|
---|
| 161 | value += tmp;
|
---|
| 162 | value.push_back ((unsigned char)c);
|
---|
| 163 | tmp = "";
|
---|
| 164 | num_dashes = 0;
|
---|
| 165 | }
|
---|
| 166 | cin.get(c);
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | // if the key is not an empty string store this key-value pair
|
---|
| 170 | if (!key.empty()) {
|
---|
| 171 | // convert key to a datum datatype
|
---|
| 172 | datum key_data;
|
---|
| 173 | key_data.dptr = key.getcstr();
|
---|
| 174 | if (key_data.dptr == NULL) {
|
---|
[17783] | 175 | cerr << "NULL key_data.dptr" << endl;
|
---|
[623] | 176 | exit (0);
|
---|
| 177 | }
|
---|
| 178 | key_data.dsize = strlen(key_data.dptr);
|
---|
[18469] | 179 |
|
---|
| 180 | if (delkey) {
|
---|
| 181 | // delete the given key
|
---|
| 182 | if (gdbm_delete(dbf, key_data) < 0) {
|
---|
[23128] | 183 | cerr << "gdbm_delete returned an error trying to delete key " << key << ": " << gdbm_strerror (gdbm_errno) <<endl;
|
---|
[18469] | 184 | }
|
---|
[623] | 185 | }
|
---|
[18469] | 186 | else {
|
---|
| 187 |
|
---|
| 188 | // add/append
|
---|
| 189 |
|
---|
| 190 | // convert value to a datum datatype
|
---|
| 191 | datum value_data;
|
---|
| 192 | value_data.dptr = value.getcstr();
|
---|
| 193 | if (value_data.dptr == NULL) {
|
---|
| 194 | cerr << "NULL value_data.dptr" << endl;
|
---|
| 195 | exit (0);
|
---|
| 196 | }
|
---|
| 197 | value_data.dsize = strlen(value_data.dptr);
|
---|
[623] | 198 |
|
---|
[18469] | 199 | // store the value
|
---|
| 200 | if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
|
---|
| 201 | cerr << "gdbm_store returned an error" << endl;
|
---|
| 202 | exit (0);
|
---|
| 203 | }
|
---|
| 204 |
|
---|
| 205 |
|
---|
| 206 | free(value_data.dptr);
|
---|
[623] | 207 | }
|
---|
[18469] | 208 |
|
---|
[15219] | 209 | free(key_data.dptr);
|
---|
[623] | 210 | }
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | gdbm_close (dbf);
|
---|
| 214 |
|
---|
| 215 | return 0;
|
---|
[115] | 216 | }
|
---|