source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 26990

Last change on this file since 26990 was 26990, checked in by jmt12, 11 years ago

Oh - also made not finding an existing TDB non-fatal, as there is a call to tdbkeys almost as soon as import starts

  • Property svn:executable set to *
File size: 17.3 KB
Line 
1#!/usr/bin/perl
2
3# jmt12
4
5use strict;
6use warnings;
7
8# Setup Environment
9BEGIN
10{
11 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
12 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
13
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
19
20 if (defined $ENV{'GSDLEXTS'})
21 {
22 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
23 foreach my $e (@extensions)
24 {
25 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
26 unshift (@INC, "$ext_prefix/perllib");
27 unshift (@INC, "$ext_prefix/perllib/cpan");
28 unshift (@INC, "$ext_prefix/perllib/plugins");
29 unshift (@INC, "$ext_prefix/perllib/classify");
30 }
31 }
32 if (defined $ENV{'GSDL3EXTS'})
33 {
34 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
35 foreach my $e (@extensions)
36 {
37 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
38 unshift (@INC, "$ext_prefix/perllib");
39 unshift (@INC, "$ext_prefix/perllib/cpan");
40 unshift (@INC, "$ext_prefix/perllib/plugins");
41 unshift (@INC, "$ext_prefix/perllib/classify");
42 }
43 }
44
45 # Manually installed CPAN package in GEXT*INSTALL
46 # - parse up version number
47 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
48 # - get rid of leading zeros by making them integers
49 $major += 0;
50 $minor += 0;
51 $revision += 0;
52 # - and add to Perl's path
53 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
54 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
55}
56
57use Cwd;
58
59# We need to do a little file locking
60use Fcntl qw(:flock); #import LOCK_* constants
61
62# Advanced child process control allowing bidirectional pipes
63use IPC::Run qw(harness start pump finish);
64
65# we need to run as a daemon
66use Proc::Daemon;
67
68# The server will need to accept requests from multiple threads, and
69# so will need threads in and of itself
70use threads;
71use threads::shared;
72
73# Greenstone utility functions (filename_cat)
74use util;
75
76# A simple server that listens on a socket and 'forks' off child threads to
77# handle each incoming request
78use SocketsSwimmingThreadPoolServer;
79
80# Globally available - but once set these are read-only - so locking isn't
81# an issue
82my $use_harness = 0;
83my $tdbexe = 'tdbcli';
84my $parent_pid = 0;
85my $collection = '';
86my $no_daemon = 0;
87my $debug = 1;
88my $server;
89my $machine_name = `hostname -s`;
90chomp($machine_name);
91my $server_host = $machine_name . '.local';
92my $server_port;
93my $server_threads;
94# - shared and, more importantly, lockable
95my %listeners :shared;
96my $should_stop :shared = 0;
97my $debug_log :shared = 0;
98
99my $msg_counter :shared = 0;
100
101print "===== TDB Server =====\n";
102print "Provides a server to allow multiple remote machines to simultaenously\n";
103print "edit one or more TDB databases on the local machine. This is to work\n";
104print "around NFS file locking issues when parallel processing on a cluster.\n";
105
106MAIN:
107{
108 # Check arguments
109 # - compulsory
110 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
111 {
112 &printUsageAndExit('Error! Missing parent process ID or not a PID');
113 }
114 $parent_pid = $ARGV[0];
115 if (!defined $ARGV[1])
116 {
117 &printUsageAndExit('Error! Missing active Greenstone collection name');
118 }
119 $collection = $ARGV[1];
120 # - optional
121 my $i = 2;
122 while (defined $ARGV[$i])
123 {
124 if ($ARGV[$i] eq "-nodaemon")
125 {
126 $no_daemon = 1;
127 }
128 if ($ARGV[$i] eq "-debug")
129 {
130 $debug = 1;
131 }
132 $i++;
133 }
134
135 # Read in the collection specific configuration
136 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
137 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
138 my $line = '';
139 while (($line = <CFGIN>))
140 {
141 if ($line =~ /^(\w+)\s+(.*)$/)
142 {
143 my $key = $1;
144 my $value = $2;
145 # Allow the override of serverhost
146 if ($key eq "serverhost")
147 {
148 $server_host = $value;
149 }
150 if ($key eq "serverport")
151 {
152 $server_port = $value;
153 }
154 if ($key eq "threads")
155 {
156 $server_threads = $value;
157 }
158 }
159 }
160 close(CFGIN);
161
162 if ($debug)
163 {
164 print " - collection: " . $collection . "\n";
165 print " - parent pid: " . $parent_pid . "\n";
166 print " - no daemon? " . $no_daemon . "\n";
167 print " - debug? " . $debug . "\n";
168 print " - serverhost: " . $server_host . "\n";
169 print " - serverport: " . $server_port . "\n";
170 print " - threads: " . $server_threads . "\n";
171 print "\n";
172 }
173
174 # Information about any running TDBServer is stored in a lockfile in
175 # Greenstone's tmp directory (named after the active collection)
176 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
177 if (!-d $tmp_dir)
178 {
179 mkdir($tmp_dir, 0755);
180 }
181 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
182
183 # If already running, then exit
184 print " * Testing if TDBServer for this collection already running... ";
185 if (-e $server_lockfile_path)
186 {
187 print "Error! TDBServer already running!\n";
188 print "Lockfile found at: " . $server_lockfile_path . "\n";
189 exit(0);
190 }
191 print "All clear!\n";
192
193 # Ensure we can see tdb edit tools on the path
194 print " * Testing for tool: " . $tdbexe . "... ";
195 my $result = `$tdbexe 2>&1`;
196 if ($result !~ /usage:\s+$tdbexe/)
197 {
198 print "Error! " . $tdbexe . " not available - check path.\n";
199 exit(0);
200 }
201 print "Found!\n";
202
203 # Daemonize
204 my $pid = 0;
205 if (!$no_daemon)
206 {
207 print " * Spawning Daemon...\n" unless (!$debug);
208 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
209 if (!-d $logs_dir)
210 {
211 mkdir($logs_dir, 0755);
212 }
213 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
214 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
215 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
216 child_STDOUT => $daemon_out_path,
217 child_STDERR => $daemon_err_path,
218 } );
219 }
220
221 # Master process has pid > 0
222 if ($pid == 0)
223 {
224 print STDERR "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
225 print " * Starting server on " . $server_host . ":" . $server_port . "\n";
226 # - create server object
227 print " * Creating pool of " . $server_threads . " threads listening on socket\n";
228 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
229 port=>$server_port,
230 thread_count=>$server_threads,
231 main_cb => \&exitCheck,
232 processor_cb => \&process);
233
234 # - write a lockfile
235 print " * Creating lock file: " . $server_lockfile_path . "\n";
236 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
237 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
238 print SLFH $server_host . ':' . $server_port;
239 flock(SLFH, LOCK_UN);
240 close(SLFH);
241
242 # Perform main loop
243 # - loop is actually in Server code. start() only returns once server's stop
244 # command has been called
245 print " * Listening:\n";
246 $server->start;
247 print " * Stopping...\n";
248
249 # Perform deinitializes here
250 # - remove server lockfile
251 print " * Removing lock file...\n";
252 unlink($server_lockfile_path);
253 print "Done!\n";
254 }
255 # Forked child processes
256 else
257 {
258 print " * Waiting for lockfile to be created";
259 while (!-e $server_lockfile_path)
260 {
261 print '.';
262 sleep(1);
263 }
264 print "\n * TDBServer lockfile created.\n";
265 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
266 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
267 my $line = <SLFH>;
268 if ($line =~ /(^.+):(\d+)$/)
269 {
270 print " => Server now listening on " . $1 . ":" . $2 . "\n";
271 }
272 else
273 {
274 die ("Error! Failed to retrieve host and port information from lockfile!");
275 }
276 flock(SLFH, LOCK_UN);
277 close(SLFH);
278 }
279
280 print "===== Complete! =====\n";
281}
282exit(0);
283
284# @function exitCheck
285# A callback function, called every 5 seconds (default) by the socket server,
286# to see whether the parent process (by pid) is actually still running. This
287# will cover the case where the parent process (import.pl or build.pl) dies
288# without properly asking the server to shutdown.
289sub exitCheck
290{
291 my $counter = shift @_;
292 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
293 # Parent PID not available or we aren't allowed to talk to it (debugging)
294 if ($parent_pid == 0)
295 {
296 return;
297 }
298 # note: kill, when passed a first argument of 0, checks whether it's possible
299 # to send a signal to the pid given as the second argument, and returns true
300 # if it is. Thus it provides a means to determine if the parent process is
301 # still running (and hence can be signalled) In newer versions of Perl
302 # (5.8.9) it should even work cross-platform.
303 if (!kill(0, $parent_pid))
304 {
305 print " * Parent processs gone away... forcing server shutdown\n";
306 $server->stop;
307 if ($debug)
308 {
309 lock($debug_log);
310 $|++;
311 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
312 $|--;
313 }
314 }
315}
316
317# /** @function process
318# * A horribly named function that is called back to process each of the
319# * requests to alter the TDB databases. It expects a complete TDB CLI
320# * command as a text blob, or one of a limited number of special commands
321# * ([a]dd or [r]emove listener, or [q]uit).
322# */
323sub process
324{
325 my $data = shift @_;
326 my $ip = shift @_;
327 my $tid = shift @_;
328 my $result = "#ERROR#";
329 my $the_count = 0;
330 {
331 lock($msg_counter);
332 $msg_counter++;
333 $the_count = $msg_counter + 0;
334 # //unlock($msg_counter);
335 }
336 &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
337 # process special commands first
338 if ($data =~ /^#([arq]):(.*)$/)
339 {
340 my $command = $1;
341 my $argument = $2;
342 # addlistener(<pid>)
343 if ($command eq "a")
344 {
345 lock(%listeners);
346 $listeners{$argument} = 1;
347 my $listener_count = scalar(keys(%listeners));
348 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
349 # //unlock(%listeners)
350 }
351 # removelistener(<pid>)
352 elsif ($command eq "r")
353 {
354 my $listener_count = 0;
355 {
356 lock(%listeners);
357 if (defined $listeners{$argument})
358 {
359 delete $listeners{$argument};
360 }
361 $listener_count = scalar(keys(%listeners));
362 # //unlock(%listeners)
363 }
364 lock($should_stop);
365 if ($should_stop == 1 && $listener_count == 0)
366 {
367 # server isn't shared, but the stop data member is!
368 $server->stop;
369 $result = "[SUCCESS] removed last listener, stopping";
370 }
371 else
372 {
373 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
374 }
375 # //unlock($should_stop)
376 }
377 # we may be asked to stop the server, but only by the process that created
378 # us. If there are no listeners registered, we stop straight away,
379 # otherwise we set a flag so that as soon as there are no listeners we
380 # stop.
381 elsif ($command eq "q")
382 {
383 if ($argument ne $parent_pid && $argument ne "*")
384 {
385 $result = "[IGNORED] can only be stopped by parent process";
386 }
387 else
388 {
389 my $listener_count = 0;
390 {
391 lock(%listeners);
392 $listener_count = scalar(keys(%listeners));
393 # //unlock(%listeners)
394 }
395 if ($listener_count == 0)
396 {
397 # server isn't shared, but the stop data member is!
398 $server->stop;
399 $result = "[SUCCESS] stopping";
400 }
401 else
402 {
403 lock($should_stop);
404 $should_stop = 1;
405 $result = "[PENDING] will stop when no more listeners";
406 # //unlock($should_stop)
407 }
408 }
409 }
410 }
411 # Everything thing else should be a TDB command
412 # form <database>:<key>:<value>
413 # where: database is [d]oc, [i]ndex, or [s]rc
414 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
415 {
416 my $database = $1;
417 my $key = $2;
418 my $action = $3;
419 # by default we add for backwards compatibility
420 if (!defined $action || $action eq '')
421 {
422 #rint STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
423 $action = '+';
424 }
425 my $payload = $4;
426 $payload =~ s/^\s+|\s+$//g;
427 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
428 # Build path to database file
429 my $tdb_path = '';
430 if ($database eq 'd')
431 {
432 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
433 }
434 elsif ($database eq 's')
435 {
436 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
437 }
438 else
439 {
440 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
441 }
442 # Harnesses seem like goodly magic - but unfortunately may be broken
443 # magic. Testing on Medusa randomly hangs on the finish() function.
444 if ($use_harness)
445 {
446 my $record = '[' . $key . ']' . $action . $payload;
447 # Open harness to TDBCLI
448 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
449 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
450 my $buffer_to_tdb = '';
451 my $buffer_from_tdb = '';
452 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
453 # Check the harness worked
454 if (!pumpable $tdb_harness)
455 {
456 die("Error! Harness to " . $tdbexe . " has gone away!");
457 }
458 # - write the data to the TDBCLI
459 $buffer_to_tdb = $record . "\n";
460 while (length($buffer_to_tdb))
461 {
462 pump($tdb_harness);
463 }
464 # - read any response from TDBCLI
465 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
466 while ($buffer_from_tdb !~ /-{70}/)
467 {
468 pump($tdb_harness);
469 }
470 # - explicitly tell the pipe to quit (empty key)
471 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
472 $buffer_to_tdb = "[]\n";
473 while (length($buffer_to_tdb))
474 {
475 pump($tdb_harness);
476 }
477 # - not that this result doesn't include the [Server] prefix as it
478 # may be parsed for data by the client
479 $result = $buffer_from_tdb;
480 chomp($result);
481 # Finished with harness
482 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
483 finish($tdb_harness);
484 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
485 }
486 # Use different TDB tools depending on arguments
487 # - lookups using TDBGET
488 elsif ($action eq '?')
489 {
490 my $command_name = '';
491 my $command1 = '';
492 # Special case for retrieve all keys (indicated by *)
493 if ($key eq '*')
494 {
495 $command_name = 'TDBKEYS';
496 $command1 = 'tdbkeys "' . $tdb_path . '"';
497 }
498 else
499 {
500 $command_name = 'TDBGET';
501 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
502 }
503 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
504 if (-e $tdb_path)
505 {
506 $result = `$command1`;
507 }
508 else
509 {
510 &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
511 $result = '';
512 }
513 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
514 if ($result !~ /-{70}/)
515 {
516 $result .= "-"x70 . "\n";
517 }
518 }
519 # - adds, updates and deletes using TXT2TDB
520 elsif ($action eq '+' || $action eq '-')
521 {
522 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
523 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
524 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
525 print $infodb_handle '[' . $key . ']';
526 if ($action eq '-')
527 {
528 print $infodb_handle $action;
529 }
530 print $infodb_handle $payload;
531 close($infodb_handle);
532 $result = "-"x70 . "\n";
533 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
534 }
535 else
536 {
537 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
538 }
539 }
540 # Synchronized debug log writing
541 &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
542 return $result;
543}
544
545sub debugPrint
546{
547 my ($the_count, $tid, $type, $msg) = @_;
548 if ($debug)
549 {
550 lock($debug_log);
551 $|++;
552 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
553 $|--;
554 # //unlock($debug_log);
555 }
556}
557
558sub printUsageAndExit
559{
560 my ($msg) = @_;
561 print "$msg\n\n";
562 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
563 exit(0);
564}
565
5661;
Note: See TracBrowser for help on using the repository browser.