source: gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl@ 24666

Last change on this file since 24666 was 24666, checked in by jmt12, 13 years ago

A daemonizable server that modifies a GDBM database (via the GDBMCLI tool) and provides a socket for multiple clients to communicate with it on

  • Property svn:executable set to *
File size: 14.4 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Configuration
7my $thread_count = 10;
8
9# Setup Environment
10BEGIN
11{
12 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
13 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
14
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
19 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
20
21 if (defined $ENV{'GSDLEXTS'})
22 {
23 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
24 foreach my $e (@extensions)
25 {
26 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
27 unshift (@INC, "$ext_prefix/perllib");
28 unshift (@INC, "$ext_prefix/perllib/cpan");
29 unshift (@INC, "$ext_prefix/perllib/plugins");
30 unshift (@INC, "$ext_prefix/perllib/classify");
31 }
32 }
33 if (defined $ENV{'GSDL3EXTS'})
34 {
35 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
36 foreach my $e (@extensions)
37 {
38 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
39 unshift (@INC, "$ext_prefix/perllib");
40 unshift (@INC, "$ext_prefix/perllib/cpan");
41 unshift (@INC, "$ext_prefix/perllib/plugins");
42 unshift (@INC, "$ext_prefix/perllib/classify");
43 }
44 }
45
46 # Manually installed CPAN package in GEXT*INSTALL
47 unshift (@INC, $ENV{'GEXTPARALLELBUILDING_INSTALLED'} . "/share/perl5");
48}
49
50use Cwd;
51# Locking is required (otherwise other threads might glom onto the lock file
52# before we've written our port number to it).
53use Fcntl qw(:flock);
54# advanced child process control
55use IPC::Run qw(harness start pump finish);
56# the GDBMCLI tool accepts commands on STDIN and write results on STDOUT
57# so we'll need a double ended pipe
58# @note couldn't get this to reliably work when passing between threads
59#use IPC::Open2;
60# we need to run as a daemon
61use Proc::Daemon;
62# and the whole thing will need to accept requests from multiple threads, and
63# so will need threads in and of itself
64use threads;
65use threads::shared;
66
67# Greenstone utility functions (filename_cat)
68use util;
69# A simple server that listens on a socket and 'forks' off child threads to
70# handle each incoming request
71use SocketsSwimmingThreadPoolServer;
72
73# Globally available
74my $parent_pid = 0;
75my $infodb_file_path = '';
76my $remove_old = '';
77my $gdbm;
78my $gdbm_reader;
79my $gdbm_writer;
80my $debug = 1;
81my $server;
82# - shared and, more importantly, lockable
83my %listeners :shared;
84my $accessing_gdbm :shared;
85my $should_stop :shared;
86my $debug_log :shared;
87
88print "===== GDBM Server =====\n";
89print "Provides a persistent connection to one or more GDBM databases via a\n";
90print "pool of threads listening on a specific socket.\n";
91
92MAIN:
93{
94 $accessing_gdbm = 0;
95 $should_stop = 0;
96 $debug_log = 0;
97
98 # Check arguments
99 if (!defined $ARGV[0] || !defined $ARGV[1])
100 {
101 print "Error! Missing parent process id or path to database\n\n";
102 print "Usage: GDBMServer.pl <pid name> <path to database> [-removeold] [-nodaemon] [-debug]\n\n";
103 exit(0);
104 }
105 $parent_pid = $ARGV[0];
106 $infodb_file_path = $ARGV[1];
107 my $no_daemon = 0;
108 my $i = 2;
109 while (defined $ARGV[$i])
110 {
111 if ($ARGV[$i] eq "-nodaemon")
112 {
113 $no_daemon = 1;
114 }
115 if ($ARGV[$i] eq "-removeold")
116 {
117 $remove_old = '-removeold ';
118 }
119 if ($ARGV[$i] eq "-debug")
120 {
121 $debug = 1;
122 }
123 $i++;
124 }
125
126 if ($debug)
127 {
128 print " - parent pid: " . $parent_pid . "\n";
129 print " - infodb: " . $infodb_file_path . "\n";
130 print " - no daemon? " . $no_daemon . "\n";
131 print " - remove old? " . $remove_old . "\n";
132 print " - debug? yes\n";
133 print "\n";
134 }
135
136 # Information about any running GDBMServer is stored in a lockfile in
137 # Greenstone's tmp directory (and based on the database opened)
138 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
139 my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i;
140 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock');
141
142 # If already running, then exit
143 print " * Testing for other GDBMServers already running... ";
144 if (-e $server_lockfile_path)
145 {
146 print "Error! GDBMServer already running!\n";
147 print "Lockfile found at: " . $server_lockfile_path . "\n";
148 exit(0);
149 }
150 print "All clear!\n";
151
152 # Ensure we can see gdbmcli on the path
153 print " * Testing for GDBMCLI... ";
154 my $result = `gdbmcli 2>&1`;
155 if ($result !~ /GDBM Command Line Interface/)
156 {
157 print "Error! GDBMCLI not available - check path.\n";
158 exit(0);
159 }
160 print "Found!\n";
161
162 # @note Easiest way to figure out the open file descriptors is to close them.
163 # You start by figure out what the maximum number of file handles is
164 # for your system. You then iterate through trying to close them using
165 # the POSIX close function - which returns true iif the file descriptor
166 # existed and was successfully closed. You can then keep track of the
167 # highest file descriptor number successfully closed. Do this before and
168 # after your desired function call (like start() below) and the
169 # difference in hc_fd gives your new file descriptors in use! [jmt12]
170# my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
171# $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
172# my $hc_fd = 2;
173# foreach ( 3 .. $openmax )
174# {
175# $hc_fd = $_ if POSIX::close( $_ );
176# }
177# print "[debug] After start fd count = " . $hc_fd . "\n";
178# exit(0);
179
180 # @note As mentioned above, I couldn't get the file handles produced by open2
181 # realiably shared between handles (even using the tricks mentioned on
182 # the perlmonks site - http://perlmonks.org/?node_id=395513). Typically
183 # they'd work the first time they were used (read or write) but
184 # subsequent actions would block indefinitely. Moreover, I couldn't get
185 # open2 to work as advertised and accept arguments (despite several
186 # hours of dicking around) - and the work-around to make the dbpath the
187 # first lot of IO works, but then definately causes the next action to
188 # block forever. [jmt12]
189# #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice?
190# my @cmd = ('gdbmcli');
191# $gdbm_reader = IO::Handle->new();
192# $gdbm_writer = IO::Handle->new();
193# print " * Opening GDBM database\n";
194# $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli');
195# if (!$gdbm_writer)
196# {
197# die("Error! Failed to open GDBMCLI for writing\n");
198# }
199# if (!$gdbm_reader)
200# {
201# die("Error! Failed to open GDBMCLI for reading\n");
202# }
203# # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any
204# # arguments - so instead the first command to the GDBMCLI is the path to the
205# # GDBM database to load.
206# print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n");
207# my $load_result;
208# if ($load_result = <$gdbm_reader>)
209# {
210# print $load_result;
211# }
212# else
213# {
214# die("Error! Failed to actually read something from GDBMCLI\n");
215# }
216
217 # Open the database connection
218 my @cmd = ('gdbmcli',$infodb_file_path);
219 $gdbm_writer = '';
220 $gdbm_reader = '';
221 # @note start opens a total of four file descriptors to the 'cmd', but
222 # we never get to know their names (maybe $gdbm->{'WIN'} etc but
223 # I'm note sure) so instead we just have to hope that the number
224 # of file descriptors already open doesn't change, in which case
225 # these are fd 3, 4, 5, and 6.
226 $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader;
227
228 # Daemonize
229 my $pid = 0;
230 if (!$no_daemon)
231 {
232 print " * Spawning Daemon...\n" unless (!$debug);
233 my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out');
234 my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err');
235 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
236 child_STDOUT => $daemon_out_path,
237 child_STDERR => $daemon_err_path,
238 # @note as mentioned above, start creates four file descriptors that we need
239 # to keep open even through the separation of the daemon process.
240 dont_close_fd => [3,4,5,6]
241 } );
242 }
243
244 # Parent process has pid > 0
245 if ($pid == 0)
246 {
247
248 # Perform initializes here
249 # - database connection is now handled as a special command, as there may
250 # be multiple databases handled by this server
251 # - localhost is good enough for now
252 my $host = 'localhost';
253 # - determine a suitable port (checking that they aren't already in use)
254 # @note this isn't at all portable, but then neither is the daemon the
255 # way I've written it.
256 my $port = 8190;
257 my $result = `netstat -tnl | grep :$port`;
258 while ($result =~ /LISTEN/)
259 {
260 $port++;
261 $result = `netstat -tnl | grep :$port`;
262 }
263 # - create server object
264 print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n";
265 $server = SocketsSwimmingThreadPoolServer->new(host=>$host,
266 port=>$port,
267 processor_cb => \&process);
268
269 # - write our port number into the lockfile so that other threads can figure
270 # out where we are
271 print " * Writing port number to lock file: " . $server_lockfile_path . "\n";
272 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
273 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
274 print SLFH $host . ':' . $port;
275 flock(SLFH, LOCK_UN);
276 close(SLFH);
277
278 # Perform main loop
279 # - loop is actually in Server code. start() only returns once server's stop
280 # command has been called
281 print " * Listening:\n";
282 $server->start;
283 print " * Stopping...\n";
284
285 # Perform deinitializes here
286 # - remove server lockfile
287 print " * Removing lock file...\n";
288 unlink($server_lockfile_path);
289 # - now close database handles (forcing flush)
290 print " * Closing GDBMCLI\n";
291 finish($gdbm);
292 print "Done!\n";
293 }
294 else
295 {
296 print " * Waiting for GDBMServer lockfile to be created";
297 while (!-e $server_lockfile_path)
298 {
299 print '.';
300 sleep(1);
301 }
302 print "\n * GDBMServer lockfile created.\n";
303 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
304 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
305 my $line = <SLFH>;
306 if ($line =~ /(^.+):(\d+)$/)
307 {
308 print " => Server now listening on " . $1 . ":" . $2 . "\n";
309 }
310 else
311 {
312 die ("Error! Failed to retrieve host and port information from lockfile!");
313 }
314 flock(SLFH, LOCK_UN);
315 close(SLFH);
316 }
317
318 print "===== Complete! =====\n";
319}
320exit(0);
321
322# /** @function process
323# * A horribly named function that is called back to process each of the
324# * requests to alter the GDBM database. It expects either a typical GDBM
325# * text blob, or one of a limited number of special commands (which start
326# * with the sentinel character "!"). Note that synchronization over the
327# * open GDBM handle is used to ensure only one edit occurs at a time.
328# */
329sub process
330{
331 my $data = shift @_;
332 my $value = "#ERROR#";
333 # Synchronized debug log writing
334 if ($debug)
335 {
336 lock($debug_log);
337 $|++;
338 print " << " . $data . "\n";
339 $|--;
340 }
341 # process special commands first
342 if ($data =~ /^!(.*):(.*)$/)
343 {
344 my $command = $1;
345 my $argument = $2;
346 # addlistener(<pid>)
347 if ($command eq "addlistener")
348 {
349 lock(%listeners);
350 $listeners{$argument} = 1;
351 my $listener_count = scalar(keys(%listeners));
352 $value = "[SUCCESS] added listener [" . $listener_count . " listeners]";
353 # unlock(%listeners)
354 }
355 # removelistener(<pid>)
356 elsif ($command eq "removelistener")
357 {
358 lock(%listeners);
359 if (defined $listeners{$argument})
360 {
361 delete $listeners{$argument};
362 }
363 my $listener_count = scalar(keys(%listeners));
364 lock($should_stop);
365 if ($should_stop == 1 && $listener_count == 0)
366 {
367 # server isn't shared, but the stop data member is!
368 $server->stop;
369 $value = "[SUCCESS] removed last listener, stopping";
370 }
371 else
372 {
373 $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
374 }
375 # unlock($should_stop)
376 # unlock(%listeners)
377 }
378 # we may be asked to stop the server, but only by the process that created
379 # us. If there are no listeners registered, we stop straight away,
380 # otherwise we set a flag so that as soon as there are no listeners we
381 # stop.
382 elsif ($command eq "stop")
383 {
384 if ($argument ne $parent_pid && $argument ne "*")
385 {
386 $value = "[IGNORED] can only be stopped by parent process";
387 }
388 else
389 {
390 my $listener_count = 0;
391 {
392 lock(%listeners);
393 $listener_count = scalar(keys(%listeners));
394 # unlock(%listeners)
395 }
396 if ($listener_count == 0)
397 {
398 # server isn't shared, but the stop data member is!
399 $server->stop;
400 $value = "[SUCCESS] stopping";
401 }
402 else
403 {
404 lock($should_stop);
405 $should_stop = 1;
406 $value = "[PENDING] will stop when no more listeners";
407 # unlock($should_stop)
408 }
409 }
410 }
411 }
412 # Everything thing else should be a GDBMCLI command
413 else
414 {
415 lock($accessing_gdbm);
416 # lets check that we can still access the GDBM bidirectional pump
417 if (!pumpable $gdbm)
418 {
419 die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!");
420 }
421 # - write the command to GDBM
422 $gdbm_writer = $data . "\n";
423 $gdbm_reader = '';
424 #rint "[debug] sending command to gdbmcli\n";
425 pump($gdbm) while length($gdbm_writer);
426 #rint "[debug] reading output from gdbmcli\n";
427 pump($gdbm) until $gdbm_reader =~ /-{70}/;
428 $value = $gdbm_reader;
429 # trim value
430 chomp($value);
431 #rint "[debug] result: " . $value . "\n";
432 #unlock($accessing_gdbm);
433 }
434 # Synchronized debug log writing
435 if ($debug)
436 {
437 lock($debug_log);
438 $|++;
439 print " >> " . $value . "\n\n";
440 $|--;
441 }
442 return $value;
443}
Note: See TracBrowser for help on using the repository browser.