#!/usr/bin/perl use strict; use warnings; # Setup Environment BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); if (defined $ENV{'GSDLEXTS'}) { my @extensions = split(/:/,$ENV{'GSDLEXTS'}); foreach my $e (@extensions) { my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e"; unshift (@INC, "$ext_prefix/perllib"); unshift (@INC, "$ext_prefix/perllib/cpan"); unshift (@INC, "$ext_prefix/perllib/plugins"); unshift (@INC, "$ext_prefix/perllib/classify"); } } if (defined $ENV{'GSDL3EXTS'}) { my @extensions = split(/:/,$ENV{'GSDL3EXTS'}); foreach my $e (@extensions) { my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e"; unshift (@INC, "$ext_prefix/perllib"); unshift (@INC, "$ext_prefix/perllib/cpan"); unshift (@INC, "$ext_prefix/perllib/plugins"); unshift (@INC, "$ext_prefix/perllib/classify"); } } # Manually installed CPAN package in GEXT*INSTALL # - parse up version number my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/; # - get rid of leading zeros by making them integers $major += 0; $minor += 0; $revision += 0; # - and add to Perl's path unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision); } use Cwd; # We need to do a little file locking use Fcntl qw(:flock); #import LOCK_* constants # Advanced child process control allowing bidirectional pipes use IPC::Run qw(harness start pump finish); # we need to run as a daemon use Proc::Daemon; # The server will need to accept requests from multiple threads, and # so will need threads in and of itself use threads; use threads::shared; # Greenstone utility functions (filename_cat) use util; # A simple server that listens on a socket and 'forks' off child threads to # handle each incoming request use SocketsSwimmingThreadPoolServer; # Globally available - but once set these are read-only - so locking isn't # an issue my $use_harness = 0; my $tdbexe = 'tdbcli'; my $parent_pid = 0; my $collection = ''; my $no_daemon = 0; my $debug = 0; my $server; my $server_host; my $server_port; my $server_threads; # - shared and, more importantly, lockable my %listeners :shared; my $should_stop :shared = 0; my $debug_log :shared = 0; my $msg_counter :shared = 0; print "===== TDB Server =====\n"; print "Provides a server to allow multiple remote machines to simultaenously\n"; print "edit one or more TDB databases on the local machine. This is to work\n"; print "around NFS file locking issues when parallel processing on a cluster.\n"; MAIN: { # Check arguments # - compulsory if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/) { &printUsageAndExit('Error! Missing parent process ID or not a PID'); } $parent_pid = $ARGV[0]; if (!defined $ARGV[1]) { &printUsageAndExit('Error! Missing active Greenstone collection name'); } $collection = $ARGV[1]; # - optional my $i = 2; while (defined $ARGV[$i]) { if ($ARGV[$i] eq "-nodaemon") { $no_daemon = 1; } if ($ARGV[$i] eq "-debug") { $debug = 1; } $i++; } # Read in the collection specific configuration my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf'); open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path); my $line = ''; while (($line = )) { if ($line =~ /^(\w+)\s+(.*)$/) { my $key = $1; my $value = $2; if ($key eq "serverhost") { $server_host = $value; } if ($key eq "serverport") { $server_port = $value; } if ($key eq "threads") { $server_threads = $value; } } } close(CFGIN); if ($debug) { print " - collection: " . $collection . "\n"; print " - parent pid: " . $parent_pid . "\n"; print " - no daemon? " . $no_daemon . "\n"; print " - debug? " . $debug . "\n"; print " - serverhost: " . $server_host . "\n"; print " - serverport: " . $server_port . "\n"; print " - threads: " . $server_threads . "\n"; print "\n"; } # Information about any running TDBServer is stored in a lockfile in # Greenstone's tmp directory (named after the active collection) my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp'); if (!-d $tmp_dir) { mkdir($tmp_dir, 0755); } my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock'); # If already running, then exit print " * Testing if TDBServer for this collection already running... "; if (-e $server_lockfile_path) { print "Error! TDBServer already running!\n"; print "Lockfile found at: " . $server_lockfile_path . "\n"; exit(0); } print "All clear!\n"; # Ensure we can see tdb edit tools on the path print " * Testing for tool: " . $tdbexe . "... "; my $result = `$tdbexe 2>&1`; if ($result !~ /usage:\s+$tdbexe/) { print "Error! " . $tdbexe . " not available - check path.\n"; exit(0); } print "Found!\n"; # Daemonize my $pid = 0; if (!$no_daemon) { print " * Spawning Daemon...\n" unless (!$debug); my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs'); if (!-d $logs_dir) { mkdir($logs_dir, 0755); } my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out'); my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err'); $pid = Proc::Daemon::Init( { work_dir => getcwd(), child_STDOUT => $daemon_out_path, child_STDERR => $daemon_err_path, } ); } # Master process has pid > 0 if ($pid == 0) { # - create server object print " * Creating pool of " . $server_threads . " threads listening on socket: " . $server_host . ":" . $server_port . "\n"; $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host, port=>$server_port, thread_count=>$server_threads, main_cb => \&exitCheck, processor_cb => \&process); # - write a lockfile print " * Creating lock file: " . $server_lockfile_path . "\n"; open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n"); flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n"); print SLFH $server_host . ':' . $server_port; flock(SLFH, LOCK_UN); close(SLFH); # Perform main loop # - loop is actually in Server code. start() only returns once server's stop # command has been called print " * Listening:\n"; $server->start; print " * Stopping...\n"; # Perform deinitializes here # - remove server lockfile print " * Removing lock file...\n"; unlink($server_lockfile_path); print "Done!\n"; } # Forked child processes else { print " * Waiting for lockfile to be created"; while (!-e $server_lockfile_path) { print '.'; sleep(1); } print "\n * TDBServer lockfile created.\n"; open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n"); flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n"); my $line = ; if ($line =~ /(^.+):(\d+)$/) { print " => Server now listening on " . $1 . ":" . $2 . "\n"; } else { die ("Error! Failed to retrieve host and port information from lockfile!"); } flock(SLFH, LOCK_UN); close(SLFH); } print "===== Complete! =====\n"; } exit(0); # @function exitCheck # A callback function, called every 5 seconds (default) by the socket server, # to see whether the parent process (by pid) is actually still running. This # will cover the case where the parent process (import.pl or build.pl) dies # without properly asking the server to shutdown. sub exitCheck { my $counter = shift @_; #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n"; # Parent PID not available or we aren't allowed to talk to it (debugging) if ($parent_pid == 0) { return; } # note: kill, when passed a first argument of 0, checks whether it's possible # to send a signal to the pid given as the second argument, and returns true # if it is. Thus it provides a means to determine if the parent process is # still running (and hence can be signalled) In newer versions of Perl # (5.8.9) it should even work cross-platform. if (!kill(0, $parent_pid)) { print " * Parent processs gone away... forcing server shutdown\n"; $server->stop; if ($debug) { lock($debug_log); $|++; print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n"; $|--; } } } # /** @function process # * A horribly named function that is called back to process each of the # * requests to alter the TDB databases. It expects a complete TDB CLI # * command as a text blob, or one of a limited number of special commands # * ([a]dd or [r]emove listener, or [q]uit). # */ sub process { my $data = shift @_; my $ip = shift @_; my $tid = shift @_; my $result = "#ERROR#"; my $the_count = 0; { lock($msg_counter); $msg_counter++; $the_count = $msg_counter + 0; # //unlock($msg_counter); } &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug; # process special commands first if ($data =~ /^#([arq]):(.*)$/) { my $command = $1; my $argument = $2; # addlistener() if ($command eq "a") { lock(%listeners); $listeners{$argument} = 1; my $listener_count = scalar(keys(%listeners)); $result = "[SUCCESS] added listener [" . $listener_count . " listeners]"; # //unlock(%listeners) } # removelistener() elsif ($command eq "r") { my $listener_count = 0; { lock(%listeners); if (defined $listeners{$argument}) { delete $listeners{$argument}; } $listener_count = scalar(keys(%listeners)); # //unlock(%listeners) } lock($should_stop); if ($should_stop == 1 && $listener_count == 0) { # server isn't shared, but the stop data member is! $server->stop; $result = "[SUCCESS] removed last listener, stopping"; } else { $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]"; } # //unlock($should_stop) } # we may be asked to stop the server, but only by the process that created # us. If there are no listeners registered, we stop straight away, # otherwise we set a flag so that as soon as there are no listeners we # stop. elsif ($command eq "q") { if ($argument ne $parent_pid && $argument ne "*") { $result = "[IGNORED] can only be stopped by parent process"; } else { my $listener_count = 0; { lock(%listeners); $listener_count = scalar(keys(%listeners)); # //unlock(%listeners) } if ($listener_count == 0) { # server isn't shared, but the stop data member is! $server->stop; $result = "[SUCCESS] stopping"; } else { lock($should_stop); $should_stop = 1; $result = "[PENDING] will stop when no more listeners"; # //unlock($should_stop) } } } } # Everything thing else should be a TDB command # form :: # where: database is [d]oc, [i]ndex, or [s]rc elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s) { my $database = $1; my $key = $2; my $action = $3; # by default we add for backwards compatibility if (!defined $action || $action eq '') { #rint STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n"; $action = '+'; } my $payload = $4; $payload =~ s/^\s+|\s+$//g; &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug; # Build path to database file my $tdb_path = ''; if ($database eq 'd') { $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb'); } elsif ($database eq 's') { $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb'); } else { $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb'); } # Harnesses seem like goodly magic - but unfortunately may be broken # magic. Testing on Medusa randomly hangs on the finish() function. if ($use_harness) { my $record = '[' . $key . ']' . $action . $payload; # Open harness to TDBCLI &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug; my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count); my $buffer_to_tdb = ''; my $buffer_from_tdb = ''; my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb); # Check the harness worked if (!pumpable $tdb_harness) { die("Error! Harness to " . $tdbexe . " has gone away!"); } # - write the data to the TDBCLI $buffer_to_tdb = $record . "\n"; while (length($buffer_to_tdb)) { pump($tdb_harness); } # - read any response from TDBCLI &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug; while ($buffer_from_tdb !~ /-{70}/) { pump($tdb_harness); } # - explicitly tell the pipe to quit (empty key) &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug; $buffer_to_tdb = "[]\n"; while (length($buffer_to_tdb)) { pump($tdb_harness); } # - not that this result doesn't include the [Server] prefix as it # may be parsed for data by the client $result = $buffer_from_tdb; chomp($result); # Finished with harness &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug; finish($tdb_harness); &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug; } # Use different TDB tools depending on arguments # - lookups using TDBGET elsif ($action eq '?') { my $command_name = ''; my $command1 = ''; # Special case for retrieve all keys (indicated by *) if ($key eq '*') { $command_name = 'TDBKEYS'; $command1 = 'tdbkeys "' . $tdb_path . '"'; } else { $command_name = 'TDBGET'; $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"'; } &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug; $result = `$command1`; &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug; if ($result =~ /-{70}/) { $result .= "-"x70 . "\n"; } } # - adds, updates and deletes using TXT2TDB elsif ($action eq '+' || $action eq '-') { my $command2 = 'txt2tdb -append "' . $tdb_path . '"'; &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug; open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n"); print $infodb_handle '[' . $key . ']'; if ($action eq '-') { print $infodb_handle $action; } print $infodb_handle $payload; close($infodb_handle); $result = "-"x70 . "\n"; &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug; } else { print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n"; } } # Synchronized debug log writing &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug; return $result; } sub debugPrint { my ($the_count, $tid, $type, $msg) = @_; if ($debug) { lock($debug_log); $|++; print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n"; $|--; # //unlock($debug_log); } } sub printUsageAndExit { my ($msg) = @_; print "$msg\n\n"; print "Usage: TDBServer.pl [-nodaemon] [-debug]\n\n"; exit(0); } 1;