source: gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl@ 24679

Last change on this file since 24679 was 24679, checked in by jmt12, 13 years ago

Added code to determine the open file handles and persist them through the daemon fork (previously hardcoded - which failed to work when parallel importing) and extended debug information with timing and thread identifiers

  • Property svn:executable set to *
File size: 15.6 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Configuration
7my $thread_count = 10;
8
9# Setup Environment
10BEGIN
11{
12 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
13 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
14
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
19 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
20
21 if (defined $ENV{'GSDLEXTS'})
22 {
23 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
24 foreach my $e (@extensions)
25 {
26 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
27 unshift (@INC, "$ext_prefix/perllib");
28 unshift (@INC, "$ext_prefix/perllib/cpan");
29 unshift (@INC, "$ext_prefix/perllib/plugins");
30 unshift (@INC, "$ext_prefix/perllib/classify");
31 }
32 }
33 if (defined $ENV{'GSDL3EXTS'})
34 {
35 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
36 foreach my $e (@extensions)
37 {
38 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
39 unshift (@INC, "$ext_prefix/perllib");
40 unshift (@INC, "$ext_prefix/perllib/cpan");
41 unshift (@INC, "$ext_prefix/perllib/plugins");
42 unshift (@INC, "$ext_prefix/perllib/classify");
43 }
44 }
45
46 # Manually installed CPAN package in GEXT*INSTALL
47 unshift (@INC, $ENV{'GEXTPARALLELBUILDING_INSTALLED'} . "/share/perl5");
48}
49
50use Cwd;
51# Locking is required (otherwise other threads might glom onto the lock file
52# before we've written our port number to it).
53use Fcntl qw(:flock);
54# advanced child process control
55use IPC::Run qw(harness start pump finish);
56# the GDBMCLI tool accepts commands on STDIN and write results on STDOUT
57# so we'll need a double ended pipe
58# @note couldn't get this to reliably work when passing between threads
59#use IPC::Open2;
60# we need to run as a daemon
61use Proc::Daemon;
62# and the whole thing will need to accept requests from multiple threads, and
63# so will need threads in and of itself
64use threads;
65use threads::shared;
66
67# Greenstone utility functions (filename_cat)
68use util;
69# A simple server that listens on a socket and 'forks' off child threads to
70# handle each incoming request
71use SocketsSwimmingThreadPoolServer;
72
73# Globally available
74my $parent_pid = 0;
75my $infodb_file_path = '';
76my $remove_old = '';
77my $gdbm;
78my $gdbm_reader;
79my $gdbm_writer;
80my $debug = 1;
81my $server;
82# - shared and, more importantly, lockable
83my %listeners :shared;
84my $accessing_gdbm :shared;
85my $should_stop :shared;
86my $debug_log :shared;
87
88print "===== GDBM Server =====\n";
89print "Provides a persistent connection to one or more GDBM databases via a\n";
90print "pool of threads listening on a specific socket.\n";
91
92MAIN:
93{
94 $accessing_gdbm = 0;
95 $should_stop = 0;
96 $debug_log = 0;
97
98 # Check arguments
99 if (!defined $ARGV[0] || !defined $ARGV[1])
100 {
101 print "Error! Missing parent process id or path to database\n\n";
102 print "Usage: GDBMServer.pl <pid name> <path to database> [-removeold] [-nodaemon] [-debug]\n\n";
103 exit(0);
104 }
105 $parent_pid = $ARGV[0];
106 $infodb_file_path = $ARGV[1];
107 my $no_daemon = 0;
108 my $i = 2;
109 while (defined $ARGV[$i])
110 {
111 if ($ARGV[$i] eq "-nodaemon")
112 {
113 $no_daemon = 1;
114 }
115 if ($ARGV[$i] eq "-removeold")
116 {
117 $remove_old = '-removeold ';
118 }
119 if ($ARGV[$i] eq "-debug")
120 {
121 $debug = 1;
122 }
123 $i++;
124 }
125
126 if ($debug)
127 {
128 print " - parent pid: " . $parent_pid . "\n";
129 print " - infodb: " . $infodb_file_path . "\n";
130 print " - no daemon? " . $no_daemon . "\n";
131 print " - remove old? " . $remove_old . "\n";
132 print " - debug? yes\n";
133 print "\n";
134 }
135
136 # Information about any running GDBMServer is stored in a lockfile in
137 # Greenstone's tmp directory (and based on the database opened)
138 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
139 my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i;
140 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock');
141
142 # If already running, then exit
143 print " * Testing for other GDBMServers already running... ";
144 if (-e $server_lockfile_path)
145 {
146 print "Error! GDBMServer already running!\n";
147 print "Lockfile found at: " . $server_lockfile_path . "\n";
148 exit(0);
149 }
150 print "All clear!\n";
151
152 # Ensure we can see gdbmcli on the path
153 print " * Testing for GDBMCLI... ";
154 my $result = `gdbmcli 2>&1`;
155 if ($result !~ /GDBM Command Line Interface/)
156 {
157 print "Error! GDBMCLI not available - check path.\n";
158 exit(0);
159 }
160 print "Found!\n";
161
162 # @note Easiest way to figure out the open file descriptors is to close them.
163 # You start by figure out what the maximum number of file handles is
164 # for your system. You then iterate through trying to close them using
165 # the POSIX close function - which returns true iif the file descriptor
166 # existed and was successfully closed. You can then keep track of the
167 # highest file descriptor number successfully closed. Do this before and
168 # after your desired function call (like start() below) and the
169 # difference in hc_fd gives your new file descriptors in use! [jmt12]
170 #my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
171 #$openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
172 #my $hc_fd = 2;
173 #foreach ( 3 .. $openmax )
174 #{
175 # $hc_fd = $_ if POSIX::close( $_ );
176 #}
177 #print "[debug] After start fd count = " . $hc_fd . "\n";
178 #exit(0);
179
180 # @note As mentioned above, I couldn't get the file handles produced by open2
181 # realiably shared between handles (even using the tricks mentioned on
182 # the perlmonks site - http://perlmonks.org/?node_id=395513). Typically
183 # they'd work the first time they were used (read or write) but
184 # subsequent actions would block indefinitely. Moreover, I couldn't get
185 # open2 to work as advertised and accept arguments (despite several
186 # hours of dicking around) - and the work-around to make the dbpath the
187 # first lot of IO works, but then definately causes the next action to
188 # block forever. [jmt12]
189# #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice?
190# my @cmd = ('gdbmcli');
191# $gdbm_reader = IO::Handle->new();
192# $gdbm_writer = IO::Handle->new();
193# print " * Opening GDBM database\n";
194# $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli');
195# if (!$gdbm_writer)
196# {
197# die("Error! Failed to open GDBMCLI for writing\n");
198# }
199# if (!$gdbm_reader)
200# {
201# die("Error! Failed to open GDBMCLI for reading\n");
202# }
203# # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any
204# # arguments - so instead the first command to the GDBMCLI is the path to the
205# # GDBM database to load.
206# print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n");
207# my $load_result;
208# if ($load_result = <$gdbm_reader>)
209# {
210# print $load_result;
211# }
212# else
213# {
214# die("Error! Failed to actually read something from GDBMCLI\n");
215# }
216
217 # Open the database connection
218 my @cmd = ('gdbmcli',$infodb_file_path);
219 $gdbm_writer = '';
220 $gdbm_reader = '';
221 # @note start opens a total of four file descriptors to the 'cmd', but
222 # we never get to know their names (maybe $gdbm->{'WIN'} etc but
223 # I'm note sure) so instead we just have to hope that the number
224 # of file descriptors already open doesn't change, in which case
225 # these are fd 3, 4, 5, and 6.
226 $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader;
227 # - start opens four handles!
228
229 # Daemonize
230 my $pid = 0;
231 if (!$no_daemon)
232 {
233 # Determine the anonymous array of file descriptors *not* to close
234 my $dont_close_fd = [];
235 # Building upon the "POSIX::Close()" test above, we need to explicitly
236 # determine the new file descriptors opened by the start command.
237 my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
238 $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
239 # - then figure out the file descriptors currently open. We do this
240 # by attempting to 'copy' each file descriptor.
241 for (my $fd = 3; $fd <= $openmax; $fd++)
242 {
243 #rint "Checking file descriptor: $fd -> ";
244 my $tmpfh;
245 if (open $tmpfh, ">&$fd")
246 {
247 #rint "writable!\n";
248 push(@{$dont_close_fd}, $fd);
249 close($tmpfh);
250 }
251 elsif (open $tmpfh, "<&$fd")
252 {
253 #rint "readable!\n";
254 push(@{$dont_close_fd}, $fd);
255 close($tmpfh);
256 }
257 #else
258 #{
259 # print "not open\n";
260 #}
261 }
262 print " * When forking don't close these filehandles: [" . join(",", @{$dont_close_fd}) . "]\n";
263
264 print " * Spawning Daemon...\n" unless (!$debug);
265 my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out');
266 my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err');
267 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
268 child_STDOUT => $daemon_out_path,
269 child_STDERR => $daemon_err_path,
270 # @note as mentioned above, start creates four file descriptors that we need
271 # to keep open even through the separation of the daemon process.
272 dont_close_fd => $dont_close_fd,
273 } );
274 }
275
276 # Parent process has pid > 0
277 if ($pid == 0)
278 {
279
280 # Perform initializes here
281 # - database connection is now handled as a special command, as there may
282 # be multiple databases handled by this server
283 # - localhost is good enough for now
284 my $host = 'localhost';
285 # - determine a suitable port (checking that they aren't already in use)
286 # @note this isn't at all portable, but then neither is the daemon the
287 # way I've written it.
288 my $port = 8190;
289 my $result = `netstat -tnl | grep :$port`;
290 while ($result =~ /LISTEN/)
291 {
292 $port++;
293 $result = `netstat -tnl | grep :$port`;
294 }
295 # - create server object
296 print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n";
297 $server = SocketsSwimmingThreadPoolServer->new(host=>$host,
298 port=>$port,
299 processor_cb => \&process);
300
301 # - write our port number into the lockfile so that other threads can figure
302 # out where we are
303 print " * Writing port number to lock file: " . $server_lockfile_path . "\n";
304 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
305 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
306 print SLFH $host . ':' . $port;
307 flock(SLFH, LOCK_UN);
308 close(SLFH);
309
310 # Perform main loop
311 # - loop is actually in Server code. start() only returns once server's stop
312 # command has been called
313 print " * Listening:\n";
314 $server->start;
315 print " * Stopping...\n";
316
317 # Perform deinitializes here
318 # - remove server lockfile
319 print " * Removing lock file...\n";
320 unlink($server_lockfile_path);
321 # - now close database handles (forcing flush)
322 print " * Closing GDBMCLI\n";
323 finish($gdbm);
324 print "Done!\n";
325 }
326 else
327 {
328 print " * Waiting for GDBMServer lockfile to be created";
329 while (!-e $server_lockfile_path)
330 {
331 print '.';
332 sleep(1);
333 }
334 print "\n * GDBMServer lockfile created.\n";
335 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
336 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
337 my $line = <SLFH>;
338 if ($line =~ /(^.+):(\d+)$/)
339 {
340 print " => Server now listening on " . $1 . ":" . $2 . "\n";
341 }
342 else
343 {
344 die ("Error! Failed to retrieve host and port information from lockfile!");
345 }
346 flock(SLFH, LOCK_UN);
347 close(SLFH);
348 }
349
350 print "===== Complete! =====\n";
351}
352exit(0);
353
354# /** @function process
355# * A horribly named function that is called back to process each of the
356# * requests to alter the GDBM database. It expects either a typical GDBM
357# * text blob, or one of a limited number of special commands (which start
358# * with the sentinel character "!"). Note that synchronization over the
359# * open GDBM handle is used to ensure only one edit occurs at a time.
360# */
361sub process
362{
363 my $data = shift @_;
364 my $ip = shift @_;
365 my $tid = shift @_;
366 my $value = "#ERROR#";
367 # Synchronized debug log writing
368 if ($debug)
369 {
370 lock($debug_log);
371 $|++;
372 print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
373 $|--;
374 }
375 # process special commands first
376 if ($data =~ /^!(.*):(.*)$/)
377 {
378 my $command = $1;
379 my $argument = $2;
380 # addlistener(<pid>)
381 if ($command eq "addlistener")
382 {
383 lock(%listeners);
384 $listeners{$argument} = 1;
385 my $listener_count = scalar(keys(%listeners));
386 $value = "[SUCCESS] added listener [" . $listener_count . " listeners]";
387 # unlock(%listeners)
388 }
389 # removelistener(<pid>)
390 elsif ($command eq "removelistener")
391 {
392 lock(%listeners);
393 if (defined $listeners{$argument})
394 {
395 delete $listeners{$argument};
396 }
397 my $listener_count = scalar(keys(%listeners));
398 lock($should_stop);
399 if ($should_stop == 1 && $listener_count == 0)
400 {
401 # server isn't shared, but the stop data member is!
402 $server->stop;
403 $value = "[SUCCESS] removed last listener, stopping";
404 }
405 else
406 {
407 $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
408 }
409 # unlock($should_stop)
410 # unlock(%listeners)
411 }
412 # we may be asked to stop the server, but only by the process that created
413 # us. If there are no listeners registered, we stop straight away,
414 # otherwise we set a flag so that as soon as there are no listeners we
415 # stop.
416 elsif ($command eq "stop")
417 {
418 if ($argument ne $parent_pid && $argument ne "*")
419 {
420 $value = "[IGNORED] can only be stopped by parent process";
421 }
422 else
423 {
424 my $listener_count = 0;
425 {
426 lock(%listeners);
427 $listener_count = scalar(keys(%listeners));
428 # unlock(%listeners)
429 }
430 if ($listener_count == 0)
431 {
432 # server isn't shared, but the stop data member is!
433 $server->stop;
434 $value = "[SUCCESS] stopping";
435 }
436 else
437 {
438 lock($should_stop);
439 $should_stop = 1;
440 $value = "[PENDING] will stop when no more listeners";
441 # unlock($should_stop)
442 }
443 }
444 }
445 }
446 # Everything thing else should be a GDBMCLI command
447 else
448 {
449 lock($accessing_gdbm);
450 # lets check that we can still access the GDBM bidirectional pump
451 if (!pumpable $gdbm)
452 {
453 die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!");
454 }
455 # - write the command to GDBM
456 $gdbm_writer = $data . "\n";
457 $gdbm_reader = '';
458 #rint "[debug] sending command to gdbmcli\n";
459 pump($gdbm) while length($gdbm_writer);
460 #rint "[debug] reading output from gdbmcli\n";
461 pump($gdbm) until $gdbm_reader =~ /-{70}/;
462 $value = $gdbm_reader;
463 # trim value
464 chomp($value);
465 #rint "[debug] result: " . $value . "\n";
466 #unlock($accessing_gdbm);
467 }
468 # Synchronized debug log writing
469 if ($debug)
470 {
471 lock($debug_log);
472 $|++;
473 print "[" . time() . "|" . $tid . "|SEND] " . $value . "\n\n";
474 $|--;
475 }
476 return $value;
477}
Note: See TracBrowser for help on using the repository browser.