#!/usr/bin/perl use strict; use warnings; # Configuration my $thread_count = 10; # Setup Environment BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); if (defined $ENV{'GSDLEXTS'}) { my @extensions = split(/:/,$ENV{'GSDLEXTS'}); foreach my $e (@extensions) { my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e"; unshift (@INC, "$ext_prefix/perllib"); unshift (@INC, "$ext_prefix/perllib/cpan"); unshift (@INC, "$ext_prefix/perllib/plugins"); unshift (@INC, "$ext_prefix/perllib/classify"); } } if (defined $ENV{'GSDL3EXTS'}) { my @extensions = split(/:/,$ENV{'GSDL3EXTS'}); foreach my $e (@extensions) { my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e"; unshift (@INC, "$ext_prefix/perllib"); unshift (@INC, "$ext_prefix/perllib/cpan"); unshift (@INC, "$ext_prefix/perllib/plugins"); unshift (@INC, "$ext_prefix/perllib/classify"); } } # Manually installed CPAN package in GEXT*INSTALL unshift (@INC, $ENV{'GEXTPARALLELBUILDING_INSTALLED'} . "/share/perl5"); } use Cwd; # Locking is required (otherwise other threads might glom onto the lock file # before we've written our port number to it). use Fcntl qw(:flock); # advanced child process control use IPC::Run qw(harness start pump finish); # the GDBMCLI tool accepts commands on STDIN and write results on STDOUT # so we'll need a double ended pipe # @note couldn't get this to reliably work when passing between threads #use IPC::Open2; # we need to run as a daemon use Proc::Daemon; # and the whole thing will need to accept requests from multiple threads, and # so will need threads in and of itself use threads; use threads::shared; # Greenstone utility functions (filename_cat) use util; # A simple server that listens on a socket and 'forks' off child threads to # handle each incoming request use SocketsSwimmingThreadPoolServer; # Globally available my $parent_pid = 0; my $infodb_file_path = ''; my $remove_old = ''; my $gdbm; my $gdbm_reader; my $gdbm_writer; my $debug = 1; my $server; # - shared and, more importantly, lockable my %listeners :shared; my $accessing_gdbm :shared; my $should_stop :shared; my $debug_log :shared; print "===== GDBM Server =====\n"; print "Provides a persistent connection to one or more GDBM databases via a\n"; print "pool of threads listening on a specific socket.\n"; MAIN: { $accessing_gdbm = 0; $should_stop = 0; $debug_log = 0; # Check arguments if (!defined $ARGV[0] || !defined $ARGV[1]) { print "Error! Missing parent process id or path to database\n\n"; print "Usage: GDBMServer.pl [-removeold] [-nodaemon] [-debug]\n\n"; exit(0); } $parent_pid = $ARGV[0]; $infodb_file_path = $ARGV[1]; my $no_daemon = 0; my $i = 2; while (defined $ARGV[$i]) { if ($ARGV[$i] eq "-nodaemon") { $no_daemon = 1; } if ($ARGV[$i] eq "-removeold") { $remove_old = '-removeold '; } if ($ARGV[$i] eq "-debug") { $debug = 1; } $i++; } if ($debug) { print " - parent pid: " . $parent_pid . "\n"; print " - infodb: " . $infodb_file_path . "\n"; print " - no daemon? " . $no_daemon . "\n"; print " - remove old? " . $remove_old . "\n"; print " - debug? yes\n"; print "\n"; } # Information about any running GDBMServer is stored in a lockfile in # Greenstone's tmp directory (and based on the database opened) my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i; my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock'); # If already running, then exit print " * Testing for other GDBMServers already running... "; if (-e $server_lockfile_path) { print "Error! GDBMServer already running!\n"; print "Lockfile found at: " . $server_lockfile_path . "\n"; exit(0); } print "All clear!\n"; # Ensure we can see gdbmcli on the path print " * Testing for GDBMCLI... "; my $result = `gdbmcli 2>&1`; if ($result !~ /GDBM Command Line Interface/) { print "Error! GDBMCLI not available - check path.\n"; exit(0); } print "Found!\n"; # @note Easiest way to figure out the open file descriptors is to close them. # You start by figure out what the maximum number of file handles is # for your system. You then iterate through trying to close them using # the POSIX close function - which returns true iif the file descriptor # existed and was successfully closed. You can then keep track of the # highest file descriptor number successfully closed. Do this before and # after your desired function call (like start() below) and the # difference in hc_fd gives your new file descriptors in use! [jmt12] #my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX ); #$openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax; #my $hc_fd = 2; #foreach ( 3 .. $openmax ) #{ # $hc_fd = $_ if POSIX::close( $_ ); #} #print "[debug] After start fd count = " . $hc_fd . "\n"; #exit(0); # @note As mentioned above, I couldn't get the file handles produced by open2 # realiably shared between handles (even using the tricks mentioned on # the perlmonks site - http://perlmonks.org/?node_id=395513). Typically # they'd work the first time they were used (read or write) but # subsequent actions would block indefinitely. Moreover, I couldn't get # open2 to work as advertised and accept arguments (despite several # hours of dicking around) - and the work-around to make the dbpath the # first lot of IO works, but then definately causes the next action to # block forever. [jmt12] # #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice? # my @cmd = ('gdbmcli'); # $gdbm_reader = IO::Handle->new(); # $gdbm_writer = IO::Handle->new(); # print " * Opening GDBM database\n"; # $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli'); # if (!$gdbm_writer) # { # die("Error! Failed to open GDBMCLI for writing\n"); # } # if (!$gdbm_reader) # { # die("Error! Failed to open GDBMCLI for reading\n"); # } # # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any # # arguments - so instead the first command to the GDBMCLI is the path to the # # GDBM database to load. # print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n"); # my $load_result; # if ($load_result = <$gdbm_reader>) # { # print $load_result; # } # else # { # die("Error! Failed to actually read something from GDBMCLI\n"); # } # Open the database connection my @cmd = ('gdbmcli',$infodb_file_path); $gdbm_writer = ''; $gdbm_reader = ''; # @note start opens a total of four file descriptors to the 'cmd', but # we never get to know their names (maybe $gdbm->{'WIN'} etc but # I'm note sure) so instead we just have to hope that the number # of file descriptors already open doesn't change, in which case # these are fd 3, 4, 5, and 6. $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader; # - start opens four handles! # Daemonize my $pid = 0; if (!$no_daemon) { # Determine the anonymous array of file descriptors *not* to close my $dont_close_fd = []; # Building upon the "POSIX::Close()" test above, we need to explicitly # determine the new file descriptors opened by the start command. my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX ); $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax; # - then figure out the file descriptors currently open. We do this # by attempting to 'copy' each file descriptor. for (my $fd = 3; $fd <= $openmax; $fd++) { #rint "Checking file descriptor: $fd -> "; my $tmpfh; if (open $tmpfh, ">&$fd") { #rint "writable!\n"; push(@{$dont_close_fd}, $fd); close($tmpfh); } elsif (open $tmpfh, "<&$fd") { #rint "readable!\n"; push(@{$dont_close_fd}, $fd); close($tmpfh); } #else #{ # print "not open\n"; #} } print " * When forking don't close these filehandles: [" . join(",", @{$dont_close_fd}) . "]\n"; print " * Spawning Daemon...\n" unless (!$debug); my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out'); my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err'); $pid = Proc::Daemon::Init( { work_dir => getcwd(), child_STDOUT => $daemon_out_path, child_STDERR => $daemon_err_path, # @note as mentioned above, start creates four file descriptors that we need # to keep open even through the separation of the daemon process. dont_close_fd => $dont_close_fd, } ); } # Parent process has pid > 0 if ($pid == 0) { # Perform initializes here # - database connection is now handled as a special command, as there may # be multiple databases handled by this server # - localhost is good enough for now my $host = 'localhost'; # - determine a suitable port (checking that they aren't already in use) # @note this isn't at all portable, but then neither is the daemon the # way I've written it. my $port = 8190; my $result = `netstat -tnl | grep :$port`; while ($result =~ /LISTEN/) { $port++; $result = `netstat -tnl | grep :$port`; } # - create server object print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n"; $server = SocketsSwimmingThreadPoolServer->new(host=>$host, port=>$port, main_cb => \&exitCheck, processor_cb => \&process); # - write our port number into the lockfile so that other threads can figure # out where we are print " * Writing port number to lock file: " . $server_lockfile_path . "\n"; open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n"); flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n"); print SLFH $host . ':' . $port; flock(SLFH, LOCK_UN); close(SLFH); # Perform main loop # - loop is actually in Server code. start() only returns once server's stop # command has been called print " * Listening:\n"; $server->start; print " * Stopping...\n"; # Perform deinitializes here # - remove server lockfile print " * Removing lock file...\n"; unlink($server_lockfile_path); # - now close database handles (forcing flush) print " * Closing GDBMCLI\n"; finish($gdbm); print "Done!\n"; } else { print " * Waiting for GDBMServer lockfile to be created"; while (!-e $server_lockfile_path) { print '.'; sleep(1); } print "\n * GDBMServer lockfile created.\n"; open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n"); flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n"); my $line = ; if ($line =~ /(^.+):(\d+)$/) { print " => Server now listening on " . $1 . ":" . $2 . "\n"; } else { die ("Error! Failed to retrieve host and port information from lockfile!"); } flock(SLFH, LOCK_UN); close(SLFH); } print "===== Complete! =====\n"; } exit(0); # @function exitCheck # A callback function, called every 5 seconds (default) by the socket server, # to see whether the parent process (by pid) is actually still running. This # will cover the case where the parent process (import.pl or build.pl) dies # without properly asking the server to shutdown. sub exitCheck { my $counter = shift @_; # note: kill, when passed a first argument of 0, checks whether it's possible # to send a signal to the pid given as the second argument, and returns true # if it is. Thus it provides a means to determine if the parent process is # still running (and hence can be signalled) In newer versions of Perl # (5.8.9) it should even work cross-platform. if (!kill(0, $parent_pid)) { print " * Parent processs gone away... forcing server shutdown\n"; $server->stop; if ($debug) { lock($debug_log); $|++; print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n"; $|--; } } } # /** @function process # * A horribly named function that is called back to process each of the # * requests to alter the GDBM database. It expects either a typical GDBM # * text blob, or one of a limited number of special commands (which start # * with the sentinel character "!"). Note that synchronization over the # * open GDBM handle is used to ensure only one edit occurs at a time. # */ sub process { my $data = shift @_; my $ip = shift @_; my $tid = shift @_; my $value = "#ERROR#"; # Synchronized debug log writing if ($debug) { lock($debug_log); $|++; print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n"; $|--; } # process special commands first if ($data =~ /^!(.*):(.*)$/) { my $command = $1; my $argument = $2; # addlistener() if ($command eq "addlistener") { lock(%listeners); $listeners{$argument} = 1; my $listener_count = scalar(keys(%listeners)); $value = "[SUCCESS] added listener [" . $listener_count . " listeners]"; # unlock(%listeners) } # removelistener() elsif ($command eq "removelistener") { lock(%listeners); if (defined $listeners{$argument}) { delete $listeners{$argument}; } my $listener_count = scalar(keys(%listeners)); lock($should_stop); if ($should_stop == 1 && $listener_count == 0) { # server isn't shared, but the stop data member is! $server->stop; $value = "[SUCCESS] removed last listener, stopping"; } else { $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]"; } # unlock($should_stop) # unlock(%listeners) } # we may be asked to stop the server, but only by the process that created # us. If there are no listeners registered, we stop straight away, # otherwise we set a flag so that as soon as there are no listeners we # stop. elsif ($command eq "stop") { if ($argument ne $parent_pid && $argument ne "*") { $value = "[IGNORED] can only be stopped by parent process"; } else { my $listener_count = 0; { lock(%listeners); $listener_count = scalar(keys(%listeners)); # unlock(%listeners) } if ($listener_count == 0) { # server isn't shared, but the stop data member is! $server->stop; $value = "[SUCCESS] stopping"; } else { lock($should_stop); $should_stop = 1; $value = "[PENDING] will stop when no more listeners"; # unlock($should_stop) } } } } # Everything thing else should be a GDBMCLI command else { lock($accessing_gdbm); # lets check that we can still access the GDBM bidirectional pump if (!pumpable $gdbm) { die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!"); } # - write the command to GDBM $gdbm_writer = $data . "\n"; $gdbm_reader = ''; #rint "[debug] sending command to gdbmcli\n"; pump($gdbm) while length($gdbm_writer); #rint "[debug] reading output from gdbmcli\n"; pump($gdbm) until $gdbm_reader =~ /-{70}/; $value = $gdbm_reader; # trim value chomp($value); #rint "[debug] result: " . $value . "\n"; #unlock($accessing_gdbm); } # Synchronized debug log writing if ($debug) { lock($debug_log); $|++; print "[" . time() . "|" . $tid . "|SEND] " . $value . "\n\n"; $|--; } return $value; }