source: gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl@ 24847

Last change on this file since 24847 was 24847, checked in by jmt12, 12 years ago

Added the ability for the server to detect if it's calling process has gone away, and shutdown if so. This test is run every 5 seconds or so

  • Property svn:executable set to *
File size: 16.7 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Configuration
7my $thread_count = 10;
8
9# Setup Environment
10BEGIN
11{
12 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
13 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
14
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
19 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
20
21 if (defined $ENV{'GSDLEXTS'})
22 {
23 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
24 foreach my $e (@extensions)
25 {
26 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
27 unshift (@INC, "$ext_prefix/perllib");
28 unshift (@INC, "$ext_prefix/perllib/cpan");
29 unshift (@INC, "$ext_prefix/perllib/plugins");
30 unshift (@INC, "$ext_prefix/perllib/classify");
31 }
32 }
33 if (defined $ENV{'GSDL3EXTS'})
34 {
35 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
36 foreach my $e (@extensions)
37 {
38 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
39 unshift (@INC, "$ext_prefix/perllib");
40 unshift (@INC, "$ext_prefix/perllib/cpan");
41 unshift (@INC, "$ext_prefix/perllib/plugins");
42 unshift (@INC, "$ext_prefix/perllib/classify");
43 }
44 }
45
46 # Manually installed CPAN package in GEXT*INSTALL
47 unshift (@INC, $ENV{'GEXTPARALLELBUILDING_INSTALLED'} . "/share/perl5");
48}
49
50use Cwd;
51# Locking is required (otherwise other threads might glom onto the lock file
52# before we've written our port number to it).
53use Fcntl qw(:flock);
54# advanced child process control
55use IPC::Run qw(harness start pump finish);
56# the GDBMCLI tool accepts commands on STDIN and write results on STDOUT
57# so we'll need a double ended pipe
58# @note couldn't get this to reliably work when passing between threads
59#use IPC::Open2;
60# we need to run as a daemon
61use Proc::Daemon;
62# and the whole thing will need to accept requests from multiple threads, and
63# so will need threads in and of itself
64use threads;
65use threads::shared;
66
67# Greenstone utility functions (filename_cat)
68use util;
69# A simple server that listens on a socket and 'forks' off child threads to
70# handle each incoming request
71use SocketsSwimmingThreadPoolServer;
72
73# Globally available
74my $parent_pid = 0;
75my $infodb_file_path = '';
76my $remove_old = '';
77my $gdbm;
78my $gdbm_reader;
79my $gdbm_writer;
80my $debug = 1;
81my $server;
82# - shared and, more importantly, lockable
83my %listeners :shared;
84my $accessing_gdbm :shared;
85my $should_stop :shared;
86my $debug_log :shared;
87
88print "===== GDBM Server =====\n";
89print "Provides a persistent connection to one or more GDBM databases via a\n";
90print "pool of threads listening on a specific socket.\n";
91
92MAIN:
93{
94 $accessing_gdbm = 0;
95 $should_stop = 0;
96 $debug_log = 0;
97
98 # Check arguments
99 if (!defined $ARGV[0] || !defined $ARGV[1])
100 {
101 print "Error! Missing parent process id or path to database\n\n";
102 print "Usage: GDBMServer.pl <pid name> <path to database> [-removeold] [-nodaemon] [-debug]\n\n";
103 exit(0);
104 }
105 $parent_pid = $ARGV[0];
106 $infodb_file_path = $ARGV[1];
107 my $no_daemon = 0;
108 my $i = 2;
109 while (defined $ARGV[$i])
110 {
111 if ($ARGV[$i] eq "-nodaemon")
112 {
113 $no_daemon = 1;
114 }
115 if ($ARGV[$i] eq "-removeold")
116 {
117 $remove_old = '-removeold ';
118 }
119 if ($ARGV[$i] eq "-debug")
120 {
121 $debug = 1;
122 }
123 $i++;
124 }
125
126 if ($debug)
127 {
128 print " - parent pid: " . $parent_pid . "\n";
129 print " - infodb: " . $infodb_file_path . "\n";
130 print " - no daemon? " . $no_daemon . "\n";
131 print " - remove old? " . $remove_old . "\n";
132 print " - debug? yes\n";
133 print "\n";
134 }
135
136 # Information about any running GDBMServer is stored in a lockfile in
137 # Greenstone's tmp directory (and based on the database opened)
138 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
139 my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i;
140 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock');
141
142 # If already running, then exit
143 print " * Testing for other GDBMServers already running... ";
144 if (-e $server_lockfile_path)
145 {
146 print "Error! GDBMServer already running!\n";
147 print "Lockfile found at: " . $server_lockfile_path . "\n";
148 exit(0);
149 }
150 print "All clear!\n";
151
152 # Ensure we can see gdbmcli on the path
153 print " * Testing for GDBMCLI... ";
154 my $result = `gdbmcli 2>&1`;
155 if ($result !~ /GDBM Command Line Interface/)
156 {
157 print "Error! GDBMCLI not available - check path.\n";
158 exit(0);
159 }
160 print "Found!\n";
161
162 # @note Easiest way to figure out the open file descriptors is to close them.
163 # You start by figure out what the maximum number of file handles is
164 # for your system. You then iterate through trying to close them using
165 # the POSIX close function - which returns true iif the file descriptor
166 # existed and was successfully closed. You can then keep track of the
167 # highest file descriptor number successfully closed. Do this before and
168 # after your desired function call (like start() below) and the
169 # difference in hc_fd gives your new file descriptors in use! [jmt12]
170 #my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
171 #$openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
172 #my $hc_fd = 2;
173 #foreach ( 3 .. $openmax )
174 #{
175 # $hc_fd = $_ if POSIX::close( $_ );
176 #}
177 #print "[debug] After start fd count = " . $hc_fd . "\n";
178 #exit(0);
179
180 # @note As mentioned above, I couldn't get the file handles produced by open2
181 # realiably shared between handles (even using the tricks mentioned on
182 # the perlmonks site - http://perlmonks.org/?node_id=395513). Typically
183 # they'd work the first time they were used (read or write) but
184 # subsequent actions would block indefinitely. Moreover, I couldn't get
185 # open2 to work as advertised and accept arguments (despite several
186 # hours of dicking around) - and the work-around to make the dbpath the
187 # first lot of IO works, but then definately causes the next action to
188 # block forever. [jmt12]
189# #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice?
190# my @cmd = ('gdbmcli');
191# $gdbm_reader = IO::Handle->new();
192# $gdbm_writer = IO::Handle->new();
193# print " * Opening GDBM database\n";
194# $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli');
195# if (!$gdbm_writer)
196# {
197# die("Error! Failed to open GDBMCLI for writing\n");
198# }
199# if (!$gdbm_reader)
200# {
201# die("Error! Failed to open GDBMCLI for reading\n");
202# }
203# # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any
204# # arguments - so instead the first command to the GDBMCLI is the path to the
205# # GDBM database to load.
206# print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n");
207# my $load_result;
208# if ($load_result = <$gdbm_reader>)
209# {
210# print $load_result;
211# }
212# else
213# {
214# die("Error! Failed to actually read something from GDBMCLI\n");
215# }
216
217 # Open the database connection
218 my @cmd = ('gdbmcli',$infodb_file_path);
219 $gdbm_writer = '';
220 $gdbm_reader = '';
221 # @note start opens a total of four file descriptors to the 'cmd', but
222 # we never get to know their names (maybe $gdbm->{'WIN'} etc but
223 # I'm note sure) so instead we just have to hope that the number
224 # of file descriptors already open doesn't change, in which case
225 # these are fd 3, 4, 5, and 6.
226 $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader;
227 # - start opens four handles!
228
229 # Daemonize
230 my $pid = 0;
231 if (!$no_daemon)
232 {
233 # Determine the anonymous array of file descriptors *not* to close
234 my $dont_close_fd = [];
235 # Building upon the "POSIX::Close()" test above, we need to explicitly
236 # determine the new file descriptors opened by the start command.
237 my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
238 $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
239 # - then figure out the file descriptors currently open. We do this
240 # by attempting to 'copy' each file descriptor.
241 for (my $fd = 3; $fd <= $openmax; $fd++)
242 {
243 #rint "Checking file descriptor: $fd -> ";
244 my $tmpfh;
245 if (open $tmpfh, ">&$fd")
246 {
247 #rint "writable!\n";
248 push(@{$dont_close_fd}, $fd);
249 close($tmpfh);
250 }
251 elsif (open $tmpfh, "<&$fd")
252 {
253 #rint "readable!\n";
254 push(@{$dont_close_fd}, $fd);
255 close($tmpfh);
256 }
257 #else
258 #{
259 # print "not open\n";
260 #}
261 }
262 print " * When forking don't close these filehandles: [" . join(",", @{$dont_close_fd}) . "]\n";
263
264 print " * Spawning Daemon...\n" unless (!$debug);
265 my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out');
266 my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err');
267 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
268 child_STDOUT => $daemon_out_path,
269 child_STDERR => $daemon_err_path,
270 # @note as mentioned above, start creates four file descriptors that we need
271 # to keep open even through the separation of the daemon process.
272 dont_close_fd => $dont_close_fd,
273 } );
274 }
275
276 # Parent process has pid > 0
277 if ($pid == 0)
278 {
279
280 # Perform initializes here
281 # - database connection is now handled as a special command, as there may
282 # be multiple databases handled by this server
283 # - localhost is good enough for now
284 my $host = 'localhost';
285 # - determine a suitable port (checking that they aren't already in use)
286 # @note this isn't at all portable, but then neither is the daemon the
287 # way I've written it.
288 my $port = 8190;
289 my $result = `netstat -tnl | grep :$port`;
290 while ($result =~ /LISTEN/)
291 {
292 $port++;
293 $result = `netstat -tnl | grep :$port`;
294 }
295 # - create server object
296 print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n";
297 $server = SocketsSwimmingThreadPoolServer->new(host=>$host,
298 port=>$port,
299 main_cb => \&exitCheck,
300 processor_cb => \&process);
301
302 # - write our port number into the lockfile so that other threads can figure
303 # out where we are
304 print " * Writing port number to lock file: " . $server_lockfile_path . "\n";
305 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
306 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
307 print SLFH $host . ':' . $port;
308 flock(SLFH, LOCK_UN);
309 close(SLFH);
310
311 # Perform main loop
312 # - loop is actually in Server code. start() only returns once server's stop
313 # command has been called
314 print " * Listening:\n";
315 $server->start;
316 print " * Stopping...\n";
317
318 # Perform deinitializes here
319 # - remove server lockfile
320 print " * Removing lock file...\n";
321 unlink($server_lockfile_path);
322 # - now close database handles (forcing flush)
323 print " * Closing GDBMCLI\n";
324 finish($gdbm);
325 print "Done!\n";
326 }
327 else
328 {
329 print " * Waiting for GDBMServer lockfile to be created";
330 while (!-e $server_lockfile_path)
331 {
332 print '.';
333 sleep(1);
334 }
335 print "\n * GDBMServer lockfile created.\n";
336 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
337 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
338 my $line = <SLFH>;
339 if ($line =~ /(^.+):(\d+)$/)
340 {
341 print " => Server now listening on " . $1 . ":" . $2 . "\n";
342 }
343 else
344 {
345 die ("Error! Failed to retrieve host and port information from lockfile!");
346 }
347 flock(SLFH, LOCK_UN);
348 close(SLFH);
349 }
350
351 print "===== Complete! =====\n";
352}
353exit(0);
354
355# @function exitCheck
356# A callback function, called every 5 seconds (default) by the socket server,
357# to see whether the parent process (by pid) is actually still running. This
358# will cover the case where the parent process (import.pl or build.pl) dies
359# without properly asking the server to shutdown.
360sub exitCheck
361{
362 my $counter = shift @_;
363 # note: kill, when passed a first argument of 0, checks whether it's possible
364 # to send a signal to the pid given as the second argument, and returns true
365 # if it is. Thus it provides a means to determine if the parent process is
366 # still running (and hence can be signalled) In newer versions of Perl
367 # (5.8.9) it should even work cross-platform.
368 if (!kill(0, $parent_pid))
369 {
370 print " * Parent processs gone away... forcing server shutdown\n";
371 $server->stop;
372 if ($debug)
373 {
374 lock($debug_log);
375 $|++;
376 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
377 $|--;
378 }
379 }
380}
381
382# /** @function process
383# * A horribly named function that is called back to process each of the
384# * requests to alter the GDBM database. It expects either a typical GDBM
385# * text blob, or one of a limited number of special commands (which start
386# * with the sentinel character "!"). Note that synchronization over the
387# * open GDBM handle is used to ensure only one edit occurs at a time.
388# */
389sub process
390{
391 my $data = shift @_;
392 my $ip = shift @_;
393 my $tid = shift @_;
394 my $value = "#ERROR#";
395 # Synchronized debug log writing
396 if ($debug)
397 {
398 lock($debug_log);
399 $|++;
400 print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
401 $|--;
402 }
403 # process special commands first
404 if ($data =~ /^!(.*):(.*)$/)
405 {
406 my $command = $1;
407 my $argument = $2;
408 # addlistener(<pid>)
409 if ($command eq "addlistener")
410 {
411 lock(%listeners);
412 $listeners{$argument} = 1;
413 my $listener_count = scalar(keys(%listeners));
414 $value = "[SUCCESS] added listener [" . $listener_count . " listeners]";
415 # unlock(%listeners)
416 }
417 # removelistener(<pid>)
418 elsif ($command eq "removelistener")
419 {
420 lock(%listeners);
421 if (defined $listeners{$argument})
422 {
423 delete $listeners{$argument};
424 }
425 my $listener_count = scalar(keys(%listeners));
426 lock($should_stop);
427 if ($should_stop == 1 && $listener_count == 0)
428 {
429 # server isn't shared, but the stop data member is!
430 $server->stop;
431 $value = "[SUCCESS] removed last listener, stopping";
432 }
433 else
434 {
435 $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
436 }
437 # unlock($should_stop)
438 # unlock(%listeners)
439 }
440 # we may be asked to stop the server, but only by the process that created
441 # us. If there are no listeners registered, we stop straight away,
442 # otherwise we set a flag so that as soon as there are no listeners we
443 # stop.
444 elsif ($command eq "stop")
445 {
446 if ($argument ne $parent_pid && $argument ne "*")
447 {
448 $value = "[IGNORED] can only be stopped by parent process";
449 }
450 else
451 {
452 my $listener_count = 0;
453 {
454 lock(%listeners);
455 $listener_count = scalar(keys(%listeners));
456 # unlock(%listeners)
457 }
458 if ($listener_count == 0)
459 {
460 # server isn't shared, but the stop data member is!
461 $server->stop;
462 $value = "[SUCCESS] stopping";
463 }
464 else
465 {
466 lock($should_stop);
467 $should_stop = 1;
468 $value = "[PENDING] will stop when no more listeners";
469 # unlock($should_stop)
470 }
471 }
472 }
473 }
474 # Everything thing else should be a GDBMCLI command
475 else
476 {
477 lock($accessing_gdbm);
478 # lets check that we can still access the GDBM bidirectional pump
479 if (!pumpable $gdbm)
480 {
481 die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!");
482 }
483 # - write the command to GDBM
484 $gdbm_writer = $data . "\n";
485 $gdbm_reader = '';
486 #rint "[debug] sending command to gdbmcli\n";
487 pump($gdbm) while length($gdbm_writer);
488 #rint "[debug] reading output from gdbmcli\n";
489 pump($gdbm) until $gdbm_reader =~ /-{70}/;
490 $value = $gdbm_reader;
491 # trim value
492 chomp($value);
493 #rint "[debug] result: " . $value . "\n";
494 #unlock($accessing_gdbm);
495 }
496 # Synchronized debug log writing
497 if ($debug)
498 {
499 lock($debug_log);
500 $|++;
501 print "[" . time() . "|" . $tid . "|SEND] " . $value . "\n\n";
502 $|--;
503 }
504 return $value;
505}
Note: See TracBrowser for help on using the repository browser.