source: main/trunk/greenstone2/common-src/src/gdbmedit/txt2db/txt2db.cpp@ 28090

Last change on this file since 28090 was 28090, checked in by davidb, 11 years ago

Further header file needed

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1/**********************************************************************
2 *
3 * txt2db.cpp --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26
27#ifdef _MSC_VER
28#include "autoconf.h"
29#include "systems.h"
30#include "gdbmconst.h"
31
32extern "C" {
33#include "gdbm.h"
34}
35
36#else
37#include <gdbm.h>
38#include <errno.h>
39#endif
40
41#include "gsdlconf.h"
42#include "text_t.h"
43#include <stdlib.h>
44#include <stdio.h>
45#include <cstring>
46
47#if defined(GSDL_USE_OBJECTSPACE)
48# include <ospace\std\iostream>
49#elif defined(GSDL_USE_IOS_H)
50# include <iostream.h>
51#else
52# include <iostream>
53#endif
54
55
56void print_usage (char *program_name) {
57 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
58 cerr << "options:" << endl;
59 cerr << " -append append to existing database" << endl << endl;
60}
61
62
63int main (int argc, char *argv[]) {
64 int block_size = 0;
65 GDBM_FILE dbf;
66 char c;
67 text_t key;
68 text_t value;
69 text_t tmp;
70 int num_dashes = 0;
71
72 // sanity check
73 if (argc != 2 && argc != 3) {
74 print_usage (argv[0]);
75 exit (0);
76 }
77
78 char *dbname;
79 int append = 0;
80 int delkey = 0;
81
82 if (argc == 3) {
83 if (strcmp (argv[1], "-append") == 0) {
84 append = 1;
85 dbname = argv[2];
86 } else {
87 cerr << argv[1] << " is not a valid option." << endl << endl;
88 print_usage (argv[0]);
89 exit (0);
90 }
91 } else dbname = argv[1];
92
93
94 // open the database
95 // note that GDBM_FAST is obsolete on newer versions of gdbm
96 int read_write = GDBM_NEWDB | GDBM_FAST;
97 if (append) read_write = GDBM_WRCREAT | GDBM_FAST;
98
99#ifdef _MSC_VER
100 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
101#else
102 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
103#endif
104 if (dbf == NULL) {
105 cerr << argv[0] << ": Couldn't create/open '" << dbname << "'" << endl;
106 cerr << " " << gdbm_strerror(gdbm_errno) << endl;
107 cerr << " gdbm errno = " << gdbm_errno << endl;
108#ifndef _MSC_VER
109 cerr << " OS errno = " << errno << endl;
110 perror("gdbm_open failed: ");
111#endif
112
113 exit (-1);
114 }
115
116 cin.get(c);
117 while (!cin.eof()) {
118 num_dashes = 0;
119 key = "";
120 value = "";
121
122 // Parse out 'key' from [key]\n
123
124 // scan for first occurrence of [
125 while (!cin.eof() && c != '[') cin.get(c);
126
127 if (!cin.eof()) cin.get(c); // skip [
128
129 // now look for closing ], building up 'key' as we go
130 while (!cin.eof() && c != ']') {
131 key.push_back ((unsigned char)c);
132 cin.get(c);
133 }
134
135 if (!cin.eof()) {
136 // most likely an eol char, but if '-', then signifies record
137 // is to be deleted, not added
138 cin.get(c);
139 if (c == '-') {
140 delkey = 1;
141 }
142 else {
143 delkey = 0;
144 }
145 }
146 while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
147
148 // look for 70 dashes
149 tmp = "";
150 while (!cin.eof() && (num_dashes < 70)) {
151 if (c == '\n') {
152 tmp.push_back ((unsigned char)c);
153 num_dashes = 0;
154
155 } else if (c == '\r') {
156 // Here we are able to process both Windows-specific text files
157 // (containing carriage-return, newline) and Linux text files
158 // (containing only newline characters) by ignoring the Windows'
159 // carriage-return altogether so that we produce a uniform database
160 // file from either system's type of text file.
161 // If we don't ignore the carriage return here, txt.gz files
162 // produced on Windows cause a GS library running on Linux to break.
163 num_dashes = 0;
164
165 } else if (c == '-') {
166 tmp.push_back ((unsigned char)c);
167 ++num_dashes;
168
169 } else {
170 value += tmp;
171 value.push_back ((unsigned char)c);
172 tmp = "";
173 num_dashes = 0;
174 }
175 cin.get(c);
176 }
177
178 // if the key is not an empty string store this key-value pair
179 if (!key.empty()) {
180 // convert key to a datum datatype
181 datum key_data;
182 key_data.dptr = key.getcstr();
183 if (key_data.dptr == NULL) {
184 cerr << "NULL key_data.dptr" << endl;
185 exit (0);
186 }
187 key_data.dsize = strlen(key_data.dptr);
188
189 if (delkey) {
190 // delete the given key
191 if (gdbm_delete(dbf, key_data) < 0) {
192 cerr << "gdbm_delete returned an error trying to delete key " << key << ": " << gdbm_strerror (gdbm_errno) <<endl;
193 }
194 }
195 else {
196
197 // add/append
198
199 // convert value to a datum datatype
200 datum value_data;
201 value_data.dptr = value.getcstr();
202 if (value_data.dptr == NULL) {
203 cerr << "NULL value_data.dptr" << endl;
204 exit (0);
205 }
206 value_data.dsize = strlen(value_data.dptr);
207
208 // store the value
209 if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
210 cerr << "gdbm_store returned an error" << endl;
211 exit (0);
212 }
213
214
215 free(value_data.dptr);
216 }
217
218 free(key_data.dptr);
219 }
220 }
221
222 gdbm_close (dbf);
223
224 return 0;
225}
Note: See TracBrowser for help on using the repository browser.