source: gs2-extensions/parallel-building/trunk/src/src/txt2dbl-src/txt2dbl.cpp@ 35762

Last change on this file since 35762 was 24836, checked in by jmt12, 12 years ago

Altered lockfile generation so that it occurs in specific collections tmp directory instead.

File size: 7.8 KB
Line 
1/**********************************************************************
2 *
3 * txt2db.cpp --
4 * A component of the Greenstone digital library software
5 * from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Copyright (C) 1999 The New Zealand Digital Library Project
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 **********************************************************************/
25
26
27#ifdef __WIN32__
28#include "autoconf.h"
29#include "systems.h"
30#include "gdbmconst.h"
31#include "gdbm.h"
32
33#else
34#include <gdbm.h>
35#endif
36
37//#include "gsdlconf.h"
38#include "text_t.h"
39#include <stdlib.h>
40#include <cstring>
41#include <fcntl.h>
42#include <string.h>
43#include <sys/stat.h>
44
45#if defined(GSDL_USE_OBJECTSPACE)
46# include <ospace\std\iostream>
47#elif defined(GSDL_USE_IOS_H)
48# include <iostream.h>
49#else
50# include <iostream>
51#endif
52
53using namespace std;
54
55void print_usage (char *program_name) {
56 cerr << "usage: " << program_name << " database-name" << endl;
57 cerr << "usage: " << program_name << " -update database-name" << endl;
58 cerr << "usage: " << program_name << " -append database-name" << endl << endl;
59 cerr << "options:" << endl;
60 cerr << " -update update existing database" << endl;
61 cerr << " -append legacy alias for -update" << endl << endl;
62}
63
64// lock a file on linux
65// [hs, 2 july 2010]
66// - modified to create a locl file local to the collection [jmt12]
67int lock ()
68{
69 string file_path ("");
70 char *collect_dir = getenv ("GSDLCOLLECTDIR");
71 if (collect_dir != NULL)
72 {
73 file_path += collect_dir;
74 }
75 file_path += "/tmp";
76 if ( access( file_path.c_str(), 00 ) != 0 )
77 {
78 mkdir(file_path.c_str(), 00777);
79 }
80 file_path += "/gdb.lock";
81 ///out << "txt2dbl::lock(" << file_path << ") => ";
82 int fd2 = open (file_path.c_str(), O_CREAT|O_RDWR, 00644);
83 close (fd2);
84 int fd = open (file_path.c_str(), O_RDWR);
85 flock lock = {F_WRLCK, SEEK_SET, 0, 0, 0};
86 fcntl (fd, F_SETLKW, &lock);
87 ///out << "locked!" << endl;
88 return fd;
89}
90
91// unlock a file on linux
92// [hs, 2 july 2010]
93int unlock ( int fd )
94{
95 ///out << "txt2dbl::unlock() => ";
96 flock lock1 = {F_UNLCK, SEEK_SET, 0, 0, 0};
97 fcntl (fd, F_SETLKW, &lock1);
98 ///out << "unlocked!" << endl;
99 return 0;
100}
101
102int main (int argc, char *argv[]) {
103
104 ///out << "===== TXT2DB+Locking=====" << endl;
105
106 int block_size = 0;
107 GDBM_FILE dbf;
108 char c;
109 text_t key;
110 text_t value;
111 text_t tmp;
112 int num_dashes = 0;
113
114 // sanity check
115 if (argc != 2 && argc != 3) {
116 print_usage (argv[0]);
117 exit (0);
118 }
119
120 char *dbname;
121 int update = 0;
122 int delkey = 0;
123 int switched_flags = 0;
124
125 if (argc == 3) {
126 // legacy support
127 if (strcmp (argv[1], "-append") == 0) {
128 update = 1;
129 dbname = argv[2];
130 } else if (strcmp (argv[1], "-update") == 0) {
131 update = 1;
132 dbname = argv[2];
133 } else {
134 cerr << argv[1] << " is not a valid option." << endl << endl;
135 print_usage (argv[0]);
136 exit (0);
137 }
138 } else dbname = argv[1];
139
140
141 // open the database
142 // note that GDBM_FAST is obsolete on newer versions of gdbm
143 int read_write = GDBM_NEWDB | GDBM_FAST;
144 if (update) read_write = GDBM_WRCREAT | GDBM_FAST;
145
146/*
147#ifdef __WIN32__
148 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
149#else
150 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
151#endif
152 if (dbf == NULL) {
153 cerr << "couldn't create " << dbname << endl;
154 exit (0);
155 }
156*/
157
158 cin.get(c);
159 while (!cin.eof()) {
160 num_dashes = 0;
161 key = "";
162 value = "";
163
164 // Parse out 'key' from [key]\n
165
166 // scan for first occurrence of [
167 while (!cin.eof() && c != '[') cin.get(c);
168
169 if (!cin.eof()) cin.get(c); // skip [
170
171 // now look for closing ], building up 'key' as we go
172 while (!cin.eof() && c != ']') {
173 key.push_back ((unsigned char)c);
174 cin.get(c);
175 }
176
177 if (!cin.eof()) {
178 // most likely an eol char, but if '-', then signifies record
179 // is to be deleted, not added
180 cin.get(c);
181 if (c == '-') {
182 delkey = 1;
183 }
184 else {
185 delkey = 0;
186 }
187 }
188 while (!cin.eof() && (c == '\n' || c == '\r')) cin.get(c);
189
190 // look for 70 dashes
191 tmp = "";
192 while (!cin.eof() && (num_dashes < 70)) {
193 if (c == '\n') {
194 tmp.push_back ((unsigned char)c);
195 num_dashes = 0;
196
197 } else if (c == '\r') {
198 // Here we are able to process both Windows-specific text files
199 // (containing carriage-return, newline) and Linux text files
200 // (containing only newline characters) by ignoring the Windows'
201 // carriage-return altogether so that we produce a uniform database
202 // file from either system's type of text file.
203 // If we don't ignore the carriage return here, txt.gz files
204 // produced on Windows cause a GS library running on Linux to break.
205 num_dashes = 0;
206
207 } else if (c == '-') {
208 tmp.push_back ((unsigned char)c);
209 ++num_dashes;
210
211 } else {
212 value += tmp;
213 value.push_back ((unsigned char)c);
214 tmp = "";
215 num_dashes = 0;
216 }
217 cin.get(c);
218 }
219
220 // if the key is not an empty string store this key-value pair
221 if (!key.empty()) {
222 // convert key to a datum datatype
223 datum key_data;
224 key_data.dptr = key.getcstr();
225 if (key_data.dptr == NULL) {
226 cerr << "NULL key_data.dptr" << endl;
227 exit (0);
228 }
229 key_data.dsize = strlen(key_data.dptr);
230 // moved for better localisation at the expense of some speed
231 // and lock before the operation
232 // [hs, 2 july 2010]
233 int thelock = lock ();
234#ifdef __WIN32__
235 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL, 1);
236#else
237 dbf = gdbm_open (dbname, block_size, read_write, 00664, NULL);
238#endif
239 if (dbf == NULL) {
240 cerr << "couldn't create " << dbname << endl;
241 exit (0);
242 }
243 // If opening was successful the first time, and regardless of what flags
244 // we may have been given, we must make future opens '-update' (rather
245 // than erase the file over and over!)
246 if (!update && !switched_flags)
247 {
248 read_write = GDBM_WRCREAT | GDBM_FAST;
249 switched_flags = 1;
250 }
251
252 if (delkey) {
253 // delete the given key
254 if (gdbm_delete(dbf, key_data) < 0) {
255 cerr << "gdbm_delete returned an error" << endl;
256 }
257 }
258 else {
259
260 // add/update
261
262 // convert value to a datum datatype
263 datum value_data;
264 value_data.dptr = value.getcstr();
265 if (value_data.dptr == NULL) {
266 cerr << "NULL value_data.dptr" << endl;
267 exit (0);
268 }
269 value_data.dsize = strlen(value_data.dptr);
270
271 // store the value
272 if (gdbm_store (dbf, key_data, value_data, GDBM_REPLACE) < 0) {
273 cerr << "gdbm_store returned an error" << endl;
274 exit (0);
275 }
276
277 free(value_data.dptr);
278 }
279
280 // moved for better localisation at the expense of some speed
281 // and unlock after the operation
282 // [hs, 2 july 2010]
283 gdbm_close (dbf);
284 unlock (thelock);
285
286 free(key_data.dptr);
287 }
288 }
289/* gdbm_close (dbf); */
290
291 return 0;
292}
Note: See TracBrowser for help on using the repository browser.