source: gsdl/trunk/common-src/src/gdbmedit/txt2db/txt2db.cpp@ 17783

Last change on this file since 17783 was 17783, checked in by ak19, 15 years ago
  1. When a collection's txt.gz version of its database produced on a Windows machine is brought over to a Linux machine, the library server used to break when trying to serve the collection, because the txt2db program would preserve the Windows-specific carriage-return character. Now this is discarded to make the database file generated from the txt.gz db files uniform between operating systems. 2. (Minor changes, defensive programming.) Dr Bainbridge corrected all newlines chars printed out to stderr and stdout to endl as this may produce more consistent results across operating systems.
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1/**********************************************************************
2 *
3 * txt2db.cpp --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26
27#ifdef __WIN32__
28#include "autoconf.h"
29#include "systems.h"
30#include "gdbmconst.h"
31#include "gdbm.h"
32
33#else
34#include <gdbm.h>
35#endif
36
37#include "gsdlconf.h"
38#include "text_t.h"
39#include <stdlib.h>
40
41#if defined(GSDL_USE_OBJECTSPACE)
42# include <ospace\std\iostream>
43#elif defined(GSDL_USE_IOS_H)
44# include <iostream.h>
45#else
46# include <iostream>
47#endif
48
49
50void print_usage (char *program_name) {
51 cerr << "usage: " << program_name << " [options] database-name" << endl << endl;
52 cerr << "options:" << endl;
53 cerr << " -append append to existing database" << endl << endl;
54}
55
56
57int main (int argc, char *argv[]) {
58 int block_size = 0;
59 GDBM_FILE dbf;
60 char c;
61 text_t key;
62 text_t value;
63 text_t tmp;
64 int num_dashes = 0;
65
66 // sanity check
67 if (argc != 2 && argc != 3) {
68 print_usage (argv[0]);
69 exit (0);
70 }
71
72 char *dbname;
73 int append = 0;
74 if (argc == 3) {
75 if (strcmp (argv[1], "-append") == 0) {
76 append = 1;
77 dbname = argv[2];
78 } else {
79 cerr << argv[1] << " is not a valid option." << endl << endl;
80 print_usage (argv[0]);
81 exit (0);
82 }
83 } else dbname = argv[1];
84
85
86 // open the database
87 // note that GDBM_FAST is obsolete on newer versions of gdbm
88 int read_write = GDBM_NEWDB | GDBM_FAST;
89 if (append) read_write = GDBM_WRCREAT | GDBM_FAST;
90
91#ifdef __WIN32__
92 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
93#else
94 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
95#endif
96 if (dbf == NULL) {
97 cerr << "couldn't create " << dbname << endl;
98 exit (0);
99 }
100
101 cin.get(c);
102 while (!cin.eof()) {
103 num_dashes = 0;
104 key = "";
105 value = "";
106
107 // look for [key]\n
108 while (!cin.eof() && c != '[') cin.get(c);
109 if (!cin.eof()) cin.get(c); // skip [
110 while (!cin.eof() && c != ']') {
111 key.push_back ((unsigned char)c);
112 cin.get(c);
113 }
114 if (!cin.eof()) cin.get(c); // skip ]
115 while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
116
117 // look for 70 dashes
118 tmp = "";
119 while (!cin.eof() && (num_dashes < 70)) {
120 if (c == '\n') {
121 tmp.push_back ((unsigned char)c);
122 num_dashes = 0;
123
124 } else if (c == '\r') {
125 // Here we are able to process both Windows-specific text files
126 // (containing carriage-return, newline) and Linux text files
127 // (containing only newline characters) by ignoring the Windows'
128 // carriage-return altogether so that we produce a uniform database
129 // file from either system's type of text file.
130 // If we don't ignore the carriage return here, txt.gz files
131 // produced on Windows cause a GS library running on Linux to break.
132 num_dashes = 0;
133
134 } else if (c == '-') {
135 tmp.push_back ((unsigned char)c);
136 ++num_dashes;
137
138 } else {
139 value += tmp;
140 value.push_back ((unsigned char)c);
141 tmp = "";
142 num_dashes = 0;
143 }
144 cin.get(c);
145 }
146
147 // if the key is not an empty string store this key-value pair
148 if (!key.empty()) {
149 // convert key to a datum datatype
150 datum key_data;
151 key_data.dptr = key.getcstr();
152 if (key_data.dptr == NULL) {
153 cerr << "NULL key_data.dptr" << endl;
154 exit (0);
155 }
156 key_data.dsize = strlen(key_data.dptr);
157
158 // convert value to a datum datatype
159 datum value_data;
160 value_data.dptr = value.getcstr();
161 if (value_data.dptr == NULL) {
162 cerr << "NULL value_data.dptr" << endl;
163 exit (0);
164 }
165 value_data.dsize = strlen(value_data.dptr);
166
167 // store the value
168 if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
169 cerr << "gdbm_store returned an error" << endl;
170 exit (0);
171 }
172
173 free(key_data.dptr);
174 free(value_data.dptr);
175 }
176 }
177
178 gdbm_close (dbf);
179
180 return 0;
181}
Note: See TracBrowser for help on using the repository browser.