source: gsdl/trunk/common-src/src/gdbmedit/txt2db/txt2db.cpp@ 18469

Last change on this file since 18469 was 18469, checked in by davidb, 15 years ago

Support for reindexing a document added

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.2 KB
Line 
1/**********************************************************************
2 *
3 * txt2db.cpp --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26
27#ifdef __WIN32__
28#include "autoconf.h"
29#include "systems.h"
30#include "gdbmconst.h"
31#include "gdbm.h"
32
33#else
34#include <gdbm.h>
35#endif
36
37#include "gsdlconf.h"
38#include "text_t.h"
39#include <stdlib.h>
40
41#if defined(GSDL_USE_OBJECTSPACE)
42# include <ospace\std\iostream>
43#elif defined(GSDL_USE_IOS_H)
44# include <iostream.h>
45#else
46# include <iostream>
47#endif
48
49
50void print_usage (char *program_name) {
51 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
52 cerr << "options:" << endl;
53 cerr << " -append append to existing database" << endl << endl;
54}
55
56
57int main (int argc, char *argv[]) {
58 int block_size = 0;
59 GDBM_FILE dbf;
60 char c;
61 text_t key;
62 text_t value;
63 text_t tmp;
64 int num_dashes = 0;
65
66 // sanity check
67 if (argc != 2 && argc != 3) {
68 print_usage (argv[0]);
69 exit (0);
70 }
71
72 char *dbname;
73 int append = 0;
74 int delkey = 0;
75
76 if (argc == 3) {
77 if (strcmp (argv[1], "-append") == 0) {
78 append = 1;
79 dbname = argv[2];
80 } else {
81 cerr << argv[1] << " is not a valid option." << endl << endl;
82 print_usage (argv[0]);
83 exit (0);
84 }
85 } else dbname = argv[1];
86
87
88 // open the database
89 // note that GDBM_FAST is obsolete on newer versions of gdbm
90 int read_write = GDBM_NEWDB | GDBM_FAST;
91 if (append) read_write = GDBM_WRCREAT | GDBM_FAST;
92
93#ifdef __WIN32__
94 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
95#else
96 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
97#endif
98 if (dbf == NULL) {
99 cerr << "couldn't create " << dbname << endl;
100 exit (0);
101 }
102
103 cin.get(c);
104 while (!cin.eof()) {
105 num_dashes = 0;
106 key = "";
107 value = "";
108
109 // Parse out 'key' from [key]\n
110
111 // scan for first occurrence of [
112 while (!cin.eof() && c != '[') cin.get(c);
113
114 if (!cin.eof()) cin.get(c); // skip [
115
116 // now look for closing ], building up 'key' as we go
117 while (!cin.eof() && c != ']') {
118 key.push_back ((unsigned char)c);
119 cin.get(c);
120 }
121
122 if (!cin.eof()) {
123 // most likely an eol char, but if '-', then signifies record
124 // is to be deleted, not added
125 cin.get(c);
126 if (c == '-') {
127 delkey = 1;
128 }
129 else {
130 delkey = 0;
131 }
132 }
133 while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
134
135 // look for 70 dashes
136 tmp = "";
137 while (!cin.eof() && (num_dashes < 70)) {
138 if (c == '\n') {
139 tmp.push_back ((unsigned char)c);
140 num_dashes = 0;
141
142 } else if (c == '\r') {
143 // Here we are able to process both Windows-specific text files
144 // (containing carriage-return, newline) and Linux text files
145 // (containing only newline characters) by ignoring the Windows'
146 // carriage-return altogether so that we produce a uniform database
147 // file from either system's type of text file.
148 // If we don't ignore the carriage return here, txt.gz files
149 // produced on Windows cause a GS library running on Linux to break.
150 num_dashes = 0;
151
152 } else if (c == '-') {
153 tmp.push_back ((unsigned char)c);
154 ++num_dashes;
155
156 } else {
157 value += tmp;
158 value.push_back ((unsigned char)c);
159 tmp = "";
160 num_dashes = 0;
161 }
162 cin.get(c);
163 }
164
165 // if the key is not an empty string store this key-value pair
166 if (!key.empty()) {
167 // convert key to a datum datatype
168 datum key_data;
169 key_data.dptr = key.getcstr();
170 if (key_data.dptr == NULL) {
171 cerr << "NULL key_data.dptr" << endl;
172 exit (0);
173 }
174 key_data.dsize = strlen(key_data.dptr);
175
176 if (delkey) {
177 // delete the given key
178 if (gdbm_delete(dbf, key_data) < 0) {
179 cerr << "gdbm_delete returned an error" << endl;
180 }
181 }
182 else {
183
184 // add/append
185
186 // convert value to a datum datatype
187 datum value_data;
188 value_data.dptr = value.getcstr();
189 if (value_data.dptr == NULL) {
190 cerr << "NULL value_data.dptr" << endl;
191 exit (0);
192 }
193 value_data.dsize = strlen(value_data.dptr);
194
195 // store the value
196 if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
197 cerr << "gdbm_store returned an error" << endl;
198 exit (0);
199 }
200
201
202 free(value_data.dptr);
203 }
204
205 free(key_data.dptr);
206 }
207 }
208
209 gdbm_close (dbf);
210
211 return 0;
212}
Note: See TracBrowser for help on using the repository browser.