source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 25454

Last change on this file since 25454 was 25454, checked in by jmt12, 12 years ago

Replacing potentially problematic harness code with calls to individual TDB utilities (although still using a pipe to TXT2TDB - so we'll see how that goes

  • Property svn:executable set to *
File size: 15.7 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Setup Environment
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
11
12 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
13 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
17
18 if (defined $ENV{'GSDLEXTS'})
19 {
20 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
21 foreach my $e (@extensions)
22 {
23 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
24 unshift (@INC, "$ext_prefix/perllib");
25 unshift (@INC, "$ext_prefix/perllib/cpan");
26 unshift (@INC, "$ext_prefix/perllib/plugins");
27 unshift (@INC, "$ext_prefix/perllib/classify");
28 }
29 }
30 if (defined $ENV{'GSDL3EXTS'})
31 {
32 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
33 foreach my $e (@extensions)
34 {
35 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
36 unshift (@INC, "$ext_prefix/perllib");
37 unshift (@INC, "$ext_prefix/perllib/cpan");
38 unshift (@INC, "$ext_prefix/perllib/plugins");
39 unshift (@INC, "$ext_prefix/perllib/classify");
40 }
41 }
42
43 # Manually installed CPAN package in GEXT*INSTALL
44 # - parse up version number
45 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
46 # - get rid of leading zeros by making them integers
47 $major += 0;
48 $minor += 0;
49 $revision += 0;
50 # - and add to Perl's path
51 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
52}
53
54use Cwd;
55
56# We need to do a little file locking
57use Fcntl qw(:flock); #import LOCK_* constants
58
59# Advanced child process control allowing bidirectional pipes
60use IPC::Run qw(harness start pump finish);
61
62# we need to run as a daemon
63use Proc::Daemon;
64
65# The server will need to accept requests from multiple threads, and
66# so will need threads in and of itself
67use threads;
68use threads::shared;
69
70# Greenstone utility functions (filename_cat)
71use util;
72
73# A simple server that listens on a socket and 'forks' off child threads to
74# handle each incoming request
75use SocketsSwimmingThreadPoolServer;
76
77# Globally available - but once set these are read-only - so locking isn't
78# an issue
79my $use_harness = 0;
80my $tdbexe = 'tdbcli';
81my $parent_pid = 0;
82my $collection = '';
83my $no_daemon = 0;
84my $debug = 1;
85my $server;
86my $server_host;
87my $server_port;
88my $server_threads;
89# - shared and, more importantly, lockable
90my %listeners :shared;
91my $should_stop :shared = 0;
92my $debug_log :shared = 0;
93
94my $msg_counter :shared = 0;
95
96print "===== TDB Server =====\n";
97print "Provides a server to allow multiple remote machines to simultaenously\n";
98print "edit one or more TDB databases on the local machine. This is to work\n";
99print "around NFS file locking issues when parallel processing on a cluster.\n";
100
101MAIN:
102{
103 # Check arguments
104 # - compulsory
105 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
106 {
107 &printUsageAndExit('Error! Missing parent process ID or not a PID');
108 }
109 $parent_pid = $ARGV[0];
110 if (!defined $ARGV[1])
111 {
112 &printUsageAndExit('Error! Missing active Greenstone collection name');
113 }
114 $collection = $ARGV[1];
115 # - optional
116 my $i = 2;
117 while (defined $ARGV[$i])
118 {
119 if ($ARGV[$i] eq "-nodaemon")
120 {
121 $no_daemon = 1;
122 }
123 if ($ARGV[$i] eq "-debug")
124 {
125 $debug = 1;
126 }
127 $i++;
128 }
129
130 # Read in the collection specific configuration
131 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
132 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
133 my $line = '';
134 while (($line = <CFGIN>))
135 {
136 if ($line =~ /^(\w+)\s+(.*)$/)
137 {
138 my $key = $1;
139 my $value = $2;
140 if ($key eq "serverhost")
141 {
142 $server_host = $value;
143 }
144 if ($key eq "serverport")
145 {
146 $server_port = $value;
147 }
148 if ($key eq "threads")
149 {
150 $server_threads = $value;
151 }
152 }
153 }
154 close(CFGIN);
155
156 if ($debug)
157 {
158 print " - collection: " . $collection . "\n";
159 print " - parent pid: " . $parent_pid . "\n";
160 print " - no daemon? " . $no_daemon . "\n";
161 print " - debug? " . $debug . "\n";
162 print " - serverhost: " . $server_host . "\n";
163 print " - serverport: " . $server_port . "\n";
164 print " - threads: " . $server_threads . "\n";
165 print "\n";
166 }
167
168 # Information about any running TDBServer is stored in a lockfile in
169 # Greenstone's tmp directory (named after the active collection)
170 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
171 if (!-d $tmp_dir)
172 {
173 mkdir($tmp_dir, 0755);
174 }
175 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
176
177 # If already running, then exit
178 print " * Testing if TDBServer for this collection already running... ";
179 if (-e $server_lockfile_path)
180 {
181 print "Error! TDBServer already running!\n";
182 print "Lockfile found at: " . $server_lockfile_path . "\n";
183 exit(0);
184 }
185 print "All clear!\n";
186
187 # Ensure we can see tdb edit tools on the path
188 print " * Testing for tool: " . $tdbexe . "... ";
189 my $result = `$tdbexe 2>&1`;
190 if ($result !~ /usage:\s+$tdbexe/)
191 {
192 print "Error! " . $tdbexe . " not available - check path.\n";
193 exit(0);
194 }
195 print "Found!\n";
196
197 # Daemonize
198 my $pid = 0;
199 if (!$no_daemon)
200 {
201 print " * Spawning Daemon...\n" unless (!$debug);
202 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
203 if (!-d $logs_dir)
204 {
205 mkdir($logs_dir, 0755);
206 }
207 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
208 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
209 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
210 child_STDOUT => $daemon_out_path,
211 child_STDERR => $daemon_err_path,
212 } );
213 }
214
215 # Master process has pid > 0
216 if ($pid == 0)
217 {
218 # - create server object
219 print " * Creating pool of " . $server_threads . " threads listening on socket: " . $server_host . ":" . $server_port . "\n";
220 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
221 port=>$server_port,
222 thread_count=>$server_threads,
223 main_cb => \&exitCheck,
224 processor_cb => \&process);
225
226 # - write a lockfile
227 print " * Creating lock file: " . $server_lockfile_path . "\n";
228 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
229 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
230 print SLFH $server_host . ':' . $server_port;
231 flock(SLFH, LOCK_UN);
232 close(SLFH);
233
234 # Perform main loop
235 # - loop is actually in Server code. start() only returns once server's stop
236 # command has been called
237 print " * Listening:\n";
238 $server->start;
239 print " * Stopping...\n";
240
241 # Perform deinitializes here
242 # - remove server lockfile
243 print " * Removing lock file...\n";
244 unlink($server_lockfile_path);
245 print "Done!\n";
246 }
247 # Forked child processes
248 else
249 {
250 print " * Waiting for lockfile to be created";
251 while (!-e $server_lockfile_path)
252 {
253 print '.';
254 sleep(1);
255 }
256 print "\n * TDBServer lockfile created.\n";
257 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
258 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
259 my $line = <SLFH>;
260 if ($line =~ /(^.+):(\d+)$/)
261 {
262 print " => Server now listening on " . $1 . ":" . $2 . "\n";
263 }
264 else
265 {
266 die ("Error! Failed to retrieve host and port information from lockfile!");
267 }
268 flock(SLFH, LOCK_UN);
269 close(SLFH);
270 }
271
272 print "===== Complete! =====\n";
273}
274exit(0);
275
276# @function exitCheck
277# A callback function, called every 5 seconds (default) by the socket server,
278# to see whether the parent process (by pid) is actually still running. This
279# will cover the case where the parent process (import.pl or build.pl) dies
280# without properly asking the server to shutdown.
281sub exitCheck
282{
283 my $counter = shift @_;
284 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
285 # Parent PID not available or we aren't allowed to talk to it (debugging)
286 if ($parent_pid == 0)
287 {
288 return;
289 }
290 # note: kill, when passed a first argument of 0, checks whether it's possible
291 # to send a signal to the pid given as the second argument, and returns true
292 # if it is. Thus it provides a means to determine if the parent process is
293 # still running (and hence can be signalled) In newer versions of Perl
294 # (5.8.9) it should even work cross-platform.
295 if (!kill(0, $parent_pid))
296 {
297 print " * Parent processs gone away... forcing server shutdown\n";
298 $server->stop;
299 if ($debug)
300 {
301 lock($debug_log);
302 $|++;
303 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
304 $|--;
305 }
306 }
307}
308
309# /** @function process
310# * A horribly named function that is called back to process each of the
311# * requests to alter the TDB databases. It expects a complete TDB CLI
312# * command as a text blob, or one of a limited number of special commands
313# * ([a]dd or [r]emove listener, or [q]uit).
314# */
315sub process
316{
317 my $data = shift @_;
318 my $ip = shift @_;
319 my $tid = shift @_;
320 my $result = "#ERROR#";
321 my $the_count = 0;
322 {
323 lock($msg_counter);
324 $msg_counter++;
325 $the_count = $msg_counter + 0;
326 # //unlock($msg_counter);
327 }
328 &debugPrint($the_count, $tid, 'RECV', $data);
329 # process special commands first
330 if ($data =~ /^#([arq]):(.*)$/)
331 {
332 my $command = $1;
333 my $argument = $2;
334 # addlistener(<pid>)
335 if ($command eq "a")
336 {
337 lock(%listeners);
338 $listeners{$argument} = 1;
339 my $listener_count = scalar(keys(%listeners));
340 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
341 # //unlock(%listeners)
342 }
343 # removelistener(<pid>)
344 elsif ($command eq "r")
345 {
346 my $listener_count = 0;
347 {
348 lock(%listeners);
349 if (defined $listeners{$argument})
350 {
351 delete $listeners{$argument};
352 }
353 $listener_count = scalar(keys(%listeners));
354 # //unlock(%listeners)
355 }
356 lock($should_stop);
357 if ($should_stop == 1 && $listener_count == 0)
358 {
359 # server isn't shared, but the stop data member is!
360 $server->stop;
361 $result = "[SUCCESS] removed last listener, stopping";
362 }
363 else
364 {
365 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
366 }
367 # //unlock($should_stop)
368 }
369 # we may be asked to stop the server, but only by the process that created
370 # us. If there are no listeners registered, we stop straight away,
371 # otherwise we set a flag so that as soon as there are no listeners we
372 # stop.
373 elsif ($command eq "q")
374 {
375 if ($argument ne $parent_pid && $argument ne "*")
376 {
377 $result = "[IGNORED] can only be stopped by parent process";
378 }
379 else
380 {
381 my $listener_count = 0;
382 {
383 lock(%listeners);
384 $listener_count = scalar(keys(%listeners));
385 # //unlock(%listeners)
386 }
387 if ($listener_count == 0)
388 {
389 # server isn't shared, but the stop data member is!
390 $server->stop;
391 $result = "[SUCCESS] stopping";
392 }
393 else
394 {
395 lock($should_stop);
396 $should_stop = 1;
397 $result = "[PENDING] will stop when no more listeners";
398 # //unlock($should_stop)
399 }
400 }
401 }
402 }
403 # Everything thing else should be a TDB command
404 # form <database>:<key>:<value>
405 # where: database is [d]oc, [i]ndex, or [s]rc
406 elsif ($data =~ /^([dis]):\[([^\]]+)\]([+?\-])(.+)$/s)
407 {
408 my $database = $1;
409 my $key = $2;
410 my $action = $3
411 my $payload = $4;
412 # Build path to database file
413 my $tdb_path = '';
414 if ($database eq 'd')
415 {
416 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
417 }
418 elsif ($database eq 's')
419 {
420 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
421 }
422 else
423 {
424 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
425 }
426 # Harnesses seem like goodly magic - but unfortunately may be broken
427 # magic. Testing on Medusa randomly hangs on the finish() function.
428 if ($use_harness)
429 {
430 my $record = '[' . $key . ']' . $action . $payload;
431 # Open harness to TDBCLI
432 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
433 my $buffer_to_tdb = '';
434 my $buffer_from_tdb = '';
435 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
436 # Check the harness worked
437 if (!pumpable $tdb_harness)
438 {
439 die("Error! Harness to " . $tdbexe . " has gone away!");
440 }
441 # - write the data to the TDBCLI
442 $buffer_to_tdb = $record . "\n";
443 my $write_msg = '#' . $the_count . ' writing: |' . $record . '| => ';
444 while (length($buffer_to_tdb))
445 {
446 $write_msg .='.';
447 pump($tdb_harness);
448 }
449 print STDERR $write_msg . "\n";
450 # - read any response from TDBCLI
451 my $read_msg = '#' . $the_count . ' reading: ';
452 while ($buffer_from_tdb !~ /-{70}/)
453 {
454 $read_msg .= '.';
455 pump($tdb_harness);
456 }
457 print STDERR $read_msg . ' => |' . $buffer_from_tdb . "|\n";
458 # - not that this result doesn't include the [Server] prefix as it
459 # may be parsed for data by the client
460 $result = $buffer_from_tdb;
461 chomp($result);
462 # Finished with harness
463 finish($tdb_harness);
464 }
465 # Use different TDB tools depending on arguments
466 # - lookups using TDBGET
467 elsif ($action eq '?')
468 {
469 my $command = 'tdbget "' . $tdb_path . '" "' . $key . '"';
470 &debugPrint($the_count, $tid, 'TDBGET', 'Command: ' . $command);
471 my $result = `$command`;
472 &debugPrint($the_count, $tid, 'TDBGET', 'Result: ' . $result);
473 if ($result =~ /-{70}/)
474 {
475 $result .= "-"x70 . "\n";
476 }
477 }
478 # - adds, updates and deletes using TXT2TDB
479 elsif ($action eq '+' || $action eq '-')
480 {
481 my $command = 'txt2tdb "' . $tdb_path . '" -append';
482 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command);
483 open(my $infodb_handle, '| ' . $command) or die("Error! Failed to open pipe to TXT2TDB\n");
484 print $infodb_handle '[' . $key . ']';
485 if ($action eq '-')
486 {
487 print $infodb_handle $action;
488 }
489 print $infodb_handle $payload;
490 close($infodb_handle);
491 $result = "-"x70 . "\n";
492 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result);
493 }
494 else
495 {
496 die("Error! Unknown action: " . $action . "\n");
497 }
498 }
499 # Synchronized debug log writing
500 &debugPrint($the_count, $tid, 'SEND', $result);
501 return $result;
502}
503
504sub debugPrint
505{
506 my ($the_count, $tid, $type, $msg) = @_;
507 if ($debug)
508 {
509 lock($debug_log);
510 $|++;
511 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
512 $|--;
513 # //unlock($debug_log);
514 }
515}
516
517sub printUsageAndExit
518{
519 my ($msg) = @_;
520 print "$msg\n\n";
521 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
522 exit(0);
523}
524
5251;
Note: See TracBrowser for help on using the repository browser.