source: gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl

Last change on this file was 30306, checked in by jmt12, 9 years ago

Making the setup of CPAN path more robust based on the better control I now have over where CPAN packages are installed (in the OS-specific install directory)

  • Property svn:executable set to *
File size: 16.7 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Configuration
7my $thread_count = 10;
8
9# Setup Environment
10BEGIN
11{
12 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
13 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
14
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
19 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
20
21 my $ext_prefix;
22 my @extensions;
23 if (defined $ENV{'GSDLEXTS'})
24 {
25 $ext_prefix = $ENV{'GSDLHOME'};
26 @extensions = split(/:/, $ENV{'GSDLEXTS'});
27 }
28 elsif (defined $ENV{'GSDL3EXTS'})
29 {
30 $ext_prefix = $ENV{'GSDL3SRCHOME'};
31 @extensions = split(/:/, $ENV{'GSDL3EXTS'});
32 }
33 foreach my $e (@extensions)
34 {
35 my $perllib_path = $ext_prefix . '/ext/' . $e . '/perllib';
36 unshift (@INC, $perllib_path);
37 unshift (@INC, $perllib_path . '/cpan');
38 unshift (@INC, $perllib_path . '/plugins');
39 unshift (@INC, $perllib_path . '/plugouts');
40 unshift (@INC, $perllib_path . '/classify');
41 }
42
43 # Installed CPAN packages for GEXT*INSTALL
44 my $perl_version = `perl-version.pl`;
45 my $perl_path = sprintf("%s/lib/perl/%s", $ENV{'GEXTPARALLELBUILDING_INSTALLED'}, $perl_version);
46 ###rint STDERR "[DEBUG] CPAN Path: $perl_path\n";
47 unshift (@INC, $perl_path);
48}
49
50use Cwd;
51# Locking is required (otherwise other threads might glom onto the lock file
52# before we've written our port number to it).
53use Fcntl qw(:flock);
54# advanced child process control
55use IPC::Run qw(harness start pump finish);
56# the GDBMCLI tool accepts commands on STDIN and write results on STDOUT
57# so we'll need a double ended pipe
58# @note couldn't get this to reliably work when passing between threads
59#use IPC::Open2;
60# we need to run as a daemon
61use Proc::Daemon;
62# and the whole thing will need to accept requests from multiple threads, and
63# so will need threads in and of itself
64use threads;
65use threads::shared;
66
67# Greenstone utility functions (filename_cat)
68use util;
69# A simple server that listens on a socket and 'forks' off child threads to
70# handle each incoming request
71use SocketsSwimmingThreadPoolServer;
72
73# Globally available
74my $parent_pid = 0;
75my $infodb_file_path = '';
76my $remove_old = '';
77my $gdbm;
78my $gdbm_reader;
79my $gdbm_writer;
80my $debug = 1;
81my $server;
82# - shared and, more importantly, lockable
83my %listeners :shared;
84my $accessing_gdbm :shared;
85my $should_stop :shared;
86my $debug_log :shared;
87
88print "===== GDBM Server =====\n";
89print "Provides a persistent connection to one or more GDBM databases via a\n";
90print "pool of threads listening on a specific socket.\n";
91
92MAIN:
93{
94 $accessing_gdbm = 0;
95 $should_stop = 0;
96 $debug_log = 0;
97
98 # Check arguments
99 if (!defined $ARGV[0] || !defined $ARGV[1])
100 {
101 print "Error! Missing parent process id or path to database\n\n";
102 print "Usage: GDBMServer.pl <pid name> <path to database> [-removeold] [-nodaemon] [-debug]\n\n";
103 exit(0);
104 }
105 $parent_pid = $ARGV[0];
106 $infodb_file_path = $ARGV[1];
107 my $no_daemon = 0;
108 my $i = 2;
109 while (defined $ARGV[$i])
110 {
111 if ($ARGV[$i] eq "-nodaemon")
112 {
113 $no_daemon = 1;
114 }
115 if ($ARGV[$i] eq "-removeold")
116 {
117 $remove_old = '-removeold ';
118 }
119 if ($ARGV[$i] eq "-debug")
120 {
121 $debug = 1;
122 }
123 $i++;
124 }
125
126 if ($debug)
127 {
128 print " - parent pid: " . $parent_pid . "\n";
129 print " - infodb: " . $infodb_file_path . "\n";
130 print " - no daemon? " . $no_daemon . "\n";
131 print " - remove old? " . $remove_old . "\n";
132 print " - debug? yes\n";
133 print "\n";
134 }
135
136 # Information about any running GDBMServer is stored in a lockfile in
137 # Greenstone's tmp directory (and based on the database opened)
138 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
139 my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i;
140 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock');
141
142 # If already running, then exit
143 print " * Testing for other GDBMServers already running... ";
144 if (-e $server_lockfile_path)
145 {
146 print "Error! GDBMServer already running!\n";
147 print "Lockfile found at: " . $server_lockfile_path . "\n";
148 exit(0);
149 }
150 print "All clear!\n";
151
152 # Ensure we can see gdbmcli on the path
153 print " * Testing for GDBMCLI... ";
154 my $result = `gdbmcli 2>&1`;
155 if ($result !~ /GDBM Command Line Interface/)
156 {
157 print "Error! GDBMCLI not available - check path.\n";
158 exit(0);
159 }
160 print "Found!\n";
161
162 # @note Easiest way to figure out the open file descriptors is to close them.
163 # You start by figure out what the maximum number of file handles is
164 # for your system. You then iterate through trying to close them using
165 # the POSIX close function - which returns true iif the file descriptor
166 # existed and was successfully closed. You can then keep track of the
167 # highest file descriptor number successfully closed. Do this before and
168 # after your desired function call (like start() below) and the
169 # difference in hc_fd gives your new file descriptors in use! [jmt12]
170 #my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
171 #$openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
172 #my $hc_fd = 2;
173 #foreach ( 3 .. $openmax )
174 #{
175 # $hc_fd = $_ if POSIX::close( $_ );
176 #}
177 #print "[debug] After start fd count = " . $hc_fd . "\n";
178 #exit(0);
179
180 # @note As mentioned above, I couldn't get the file handles produced by open2
181 # realiably shared between handles (even using the tricks mentioned on
182 # the perlmonks site - http://perlmonks.org/?node_id=395513). Typically
183 # they'd work the first time they were used (read or write) but
184 # subsequent actions would block indefinitely. Moreover, I couldn't get
185 # open2 to work as advertised and accept arguments (despite several
186 # hours of dicking around) - and the work-around to make the dbpath the
187 # first lot of IO works, but then definately causes the next action to
188 # block forever. [jmt12]
189# #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice?
190# my @cmd = ('gdbmcli');
191# $gdbm_reader = IO::Handle->new();
192# $gdbm_writer = IO::Handle->new();
193# print " * Opening GDBM database\n";
194# $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli');
195# if (!$gdbm_writer)
196# {
197# die("Error! Failed to open GDBMCLI for writing\n");
198# }
199# if (!$gdbm_reader)
200# {
201# die("Error! Failed to open GDBMCLI for reading\n");
202# }
203# # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any
204# # arguments - so instead the first command to the GDBMCLI is the path to the
205# # GDBM database to load.
206# print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n");
207# my $load_result;
208# if ($load_result = <$gdbm_reader>)
209# {
210# print $load_result;
211# }
212# else
213# {
214# die("Error! Failed to actually read something from GDBMCLI\n");
215# }
216
217 # Open the database connection
218 my @cmd = ('gdbmcli',$infodb_file_path);
219 $gdbm_writer = '';
220 $gdbm_reader = '';
221 # @note start opens a total of four file descriptors to the 'cmd', but
222 # we never get to know their names (maybe $gdbm->{'WIN'} etc but
223 # I'm note sure) so instead we just have to hope that the number
224 # of file descriptors already open doesn't change, in which case
225 # these are fd 3, 4, 5, and 6.
226 $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader;
227 # - start opens four handles!
228
229 # Daemonize
230 my $pid = 0;
231 if (!$no_daemon)
232 {
233 # Determine the anonymous array of file descriptors *not* to close
234 my $dont_close_fd = [];
235 # Building upon the "POSIX::Close()" test above, we need to explicitly
236 # determine the new file descriptors opened by the start command.
237 my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
238 $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
239 # - then figure out the file descriptors currently open. We do this
240 # by attempting to 'copy' each file descriptor.
241 for (my $fd = 3; $fd <= $openmax; $fd++)
242 {
243 #rint "Checking file descriptor: $fd -> ";
244 my $tmpfh;
245 if (open $tmpfh, ">&$fd")
246 {
247 #rint "writable!\n";
248 push(@{$dont_close_fd}, $fd);
249 close($tmpfh);
250 }
251 elsif (open $tmpfh, "<&$fd")
252 {
253 #rint "readable!\n";
254 push(@{$dont_close_fd}, $fd);
255 close($tmpfh);
256 }
257 #else
258 #{
259 # print "not open\n";
260 #}
261 }
262 print " * When forking don't close these filehandles: [" . join(",", @{$dont_close_fd}) . "]\n";
263
264 print " * Spawning Daemon...\n" unless (!$debug);
265 my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out');
266 my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err');
267 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
268 child_STDOUT => $daemon_out_path,
269 child_STDERR => $daemon_err_path,
270 # @note as mentioned above, start creates four file descriptors that we need
271 # to keep open even through the separation of the daemon process.
272 dont_close_fd => $dont_close_fd,
273 } );
274 }
275
276 # Parent process has pid > 0
277 if ($pid == 0)
278 {
279
280 # Perform initializes here
281 # - database connection is now handled as a special command, as there may
282 # be multiple databases handled by this server
283 # - localhost is good enough for now
284 my $host = 'localhost';
285 # - determine a suitable port (checking that they aren't already in use)
286 # @note this isn't at all portable, but then neither is the daemon the
287 # way I've written it.
288 my $port = 8190;
289 my $result = `netstat -tnl | grep :$port`;
290 while ($result =~ /LISTEN/)
291 {
292 $port++;
293 $result = `netstat -tnl | grep :$port`;
294 }
295 # - create server object
296 print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n";
297 $server = SocketsSwimmingThreadPoolServer->new(host=>$host,
298 port=>$port,
299 main_cb => \&exitCheck,
300 processor_cb => \&process);
301
302 # - write our port number into the lockfile so that other threads can figure
303 # out where we are
304 print " * Writing port number to lock file: " . $server_lockfile_path . "\n";
305 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
306 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
307 print SLFH $host . ':' . $port;
308 flock(SLFH, LOCK_UN);
309 close(SLFH);
310
311 # Perform main loop
312 # - loop is actually in Server code. start() only returns once server's stop
313 # command has been called
314 print " * Listening:\n";
315 $server->start;
316 print " * Stopping...\n";
317
318 # Perform deinitializes here
319 # - remove server lockfile
320 print " * Removing lock file...\n";
321 unlink($server_lockfile_path);
322 # - now close database handles (forcing flush)
323 print " * Closing GDBMCLI\n";
324 finish($gdbm);
325 print "Done!\n";
326 }
327 else
328 {
329 print " * Waiting for GDBMServer lockfile to be created";
330 while (!-e $server_lockfile_path)
331 {
332 print '.';
333 sleep(1);
334 }
335 print "\n * GDBMServer lockfile created.\n";
336 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
337 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
338 my $line = <SLFH>;
339 if ($line =~ /(^.+):(\d+)$/)
340 {
341 print " => Server now listening on " . $1 . ":" . $2 . "\n";
342 }
343 else
344 {
345 die ("Error! Failed to retrieve host and port information from lockfile!");
346 }
347 flock(SLFH, LOCK_UN);
348 close(SLFH);
349 }
350
351 print "===== Complete! =====\n";
352}
353exit(0);
354
355# @function exitCheck
356# A callback function, called every 5 seconds (default) by the socket server,
357# to see whether the parent process (by pid) is actually still running. This
358# will cover the case where the parent process (import.pl or build.pl) dies
359# without properly asking the server to shutdown.
360sub exitCheck
361{
362 my $counter = shift @_;
363 # note: kill, when passed a first argument of 0, checks whether it's possible
364 # to send a signal to the pid given as the second argument, and returns true
365 # if it is. Thus it provides a means to determine if the parent process is
366 # still running (and hence can be signalled) In newer versions of Perl
367 # (5.8.9) it should even work cross-platform.
368 if (!kill(0, $parent_pid))
369 {
370 print " * Parent processs gone away... forcing server shutdown\n";
371 $server->stop;
372 if ($debug)
373 {
374 lock($debug_log);
375 $|++;
376 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
377 $|--;
378 }
379 }
380}
381
382# /** @function process
383# * A horribly named function that is called back to process each of the
384# * requests to alter the GDBM database. It expects either a typical GDBM
385# * text blob, or one of a limited number of special commands (which start
386# * with the sentinel character "!"). Note that synchronization over the
387# * open GDBM handle is used to ensure only one edit occurs at a time.
388# */
389sub process
390{
391 my $data = shift @_;
392 my $ip = shift @_;
393 my $tid = shift @_;
394 my $value = "#ERROR#";
395 # Synchronized debug log writing
396 if ($debug)
397 {
398 lock($debug_log);
399 $|++;
400 print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
401 $|--;
402 }
403 # process special commands first
404 if ($data =~ /^!(.*):(.*)$/)
405 {
406 my $command = $1;
407 my $argument = $2;
408 # addlistener(<pid>)
409 if ($command eq "addlistener")
410 {
411 lock(%listeners);
412 $listeners{$argument} = 1;
413 my $listener_count = scalar(keys(%listeners));
414 $value = "[SUCCESS] added listener [" . $listener_count . " listeners]";
415 # unlock(%listeners)
416 }
417 # removelistener(<pid>)
418 elsif ($command eq "removelistener")
419 {
420 lock(%listeners);
421 if (defined $listeners{$argument})
422 {
423 delete $listeners{$argument};
424 }
425 my $listener_count = scalar(keys(%listeners));
426 lock($should_stop);
427 if ($should_stop == 1 && $listener_count == 0)
428 {
429 # server isn't shared, but the stop data member is!
430 $server->stop;
431 $value = "[SUCCESS] removed last listener, stopping";
432 }
433 else
434 {
435 $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
436 }
437 # unlock($should_stop)
438 # unlock(%listeners)
439 }
440 # we may be asked to stop the server, but only by the process that created
441 # us. If there are no listeners registered, we stop straight away,
442 # otherwise we set a flag so that as soon as there are no listeners we
443 # stop.
444 elsif ($command eq "stop")
445 {
446 if ($argument ne $parent_pid && $argument ne "*")
447 {
448 $value = "[IGNORED] can only be stopped by parent process";
449 }
450 else
451 {
452 my $listener_count = 0;
453 {
454 lock(%listeners);
455 $listener_count = scalar(keys(%listeners));
456 # unlock(%listeners)
457 }
458 if ($listener_count == 0)
459 {
460 # server isn't shared, but the stop data member is!
461 $server->stop;
462 $value = "[SUCCESS] stopping";
463 }
464 else
465 {
466 lock($should_stop);
467 $should_stop = 1;
468 $value = "[PENDING] will stop when no more listeners";
469 # unlock($should_stop)
470 }
471 }
472 }
473 }
474 # Everything thing else should be a GDBMCLI command
475 else
476 {
477 lock($accessing_gdbm);
478 # lets check that we can still access the GDBM bidirectional pump
479 if (!pumpable $gdbm)
480 {
481 die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!");
482 }
483 # - write the command to GDBM
484 $gdbm_writer = $data . "\n";
485 $gdbm_reader = '';
486 #rint "[debug] sending command to gdbmcli\n";
487 pump($gdbm) while length($gdbm_writer);
488 #rint "[debug] reading output from gdbmcli\n";
489 pump($gdbm) until $gdbm_reader =~ /-{70}/;
490 $value = $gdbm_reader;
491 # trim value
492 chomp($value);
493 #rint "[debug] result: " . $value . "\n";
494 #unlock($accessing_gdbm);
495 }
496 # Synchronized debug log writing
497 if ($debug)
498 {
499 lock($debug_log);
500 $|++;
501 print "[" . time() . "|" . $tid . "|SEND] " . $value . "\n\n";
502 $|--;
503 }
504 return $value;
505}
Note: See TracBrowser for help on using the repository browser.