source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 27397

Last change on this file since 27397 was 27397, checked in by jmt12, 11 years ago

Allowing server out and err streams to be redirected to dev null

  • Property svn:executable set to *
File size: 17.6 KB
Line 
1#!/usr/bin/perl
2
3# jmt12
4
5use strict;
6use warnings;
7
8# Setup Environment
9BEGIN
10{
11 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
12 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
13
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
19
20 if (defined $ENV{'GSDLEXTS'})
21 {
22 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
23 foreach my $e (@extensions)
24 {
25 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
26 unshift (@INC, "$ext_prefix/perllib");
27 unshift (@INC, "$ext_prefix/perllib/cpan");
28 unshift (@INC, "$ext_prefix/perllib/plugins");
29 unshift (@INC, "$ext_prefix/perllib/classify");
30 }
31 }
32 if (defined $ENV{'GSDL3EXTS'})
33 {
34 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
35 foreach my $e (@extensions)
36 {
37 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
38 unshift (@INC, "$ext_prefix/perllib");
39 unshift (@INC, "$ext_prefix/perllib/cpan");
40 unshift (@INC, "$ext_prefix/perllib/plugins");
41 unshift (@INC, "$ext_prefix/perllib/classify");
42 }
43 }
44
45 # Manually installed CPAN package in GEXT*INSTALL
46 # - parse up version number
47 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
48 # - get rid of leading zeros by making them integers
49 $major += 0;
50 $minor += 0;
51 $revision += 0;
52 # - and add to Perl's path
53 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
54 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
55}
56
57use Cwd;
58
59# We need to do a little file locking
60use Fcntl qw(:flock); #import LOCK_* constants
61# Advanced child process control allowing bidirectional pipes
62use IPC::Run qw(harness start pump finish);
63# we need to run as a daemon
64use Proc::Daemon;
65# Try and find the hostname
66use Sys::Hostname;
67
68# The server will need to accept requests from multiple threads, and
69# so will need threads in and of itself
70use threads;
71use threads::shared;
72
73# Greenstone utility functions (filename_cat)
74use util;
75
76# A simple server that listens on a socket and 'forks' off child threads to
77# handle each incoming request
78use SocketsSwimmingThreadPoolServer;
79
80# Globally available - but once set these are read-only - so locking isn't
81# an issue
82my $use_harness = 0;
83my $tdbexe = 'tdbcli';
84my $parent_pid = 0;
85my $collection = '';
86my $no_daemon = 0;
87my $debug = 1;
88my $server;
89my $machine_name = (`hostname -s` || `hostname -a` || `hostname` || $ENV{'HOSTNAME'});
90chomp($machine_name);
91my $server_host = $machine_name . '.local';
92my $server_port;
93my $server_threads;
94# - shared and, more importantly, lockable
95my %listeners :shared;
96my $should_stop :shared = 0;
97my $debug_log :shared = 0;
98
99my $msg_counter :shared = 0;
100
101print "===== TDB Server =====\n";
102print "Provides a server to allow multiple remote machines to simultaenously\n";
103print "edit one or more TDB databases on the local machine. This is to work\n";
104print "around NFS file locking issues when parallel processing on a cluster.\n";
105
106MAIN:
107{
108 # Check arguments
109 # - compulsory
110 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
111 {
112 &printUsageAndExit('Error! Missing parent process ID or not a PID');
113 }
114 $parent_pid = $ARGV[0];
115 if (!defined $ARGV[1])
116 {
117 &printUsageAndExit('Error! Missing active Greenstone collection name');
118 }
119 $collection = $ARGV[1];
120 # - optional
121 my $i = 2;
122 while (defined $ARGV[$i])
123 {
124 if ($ARGV[$i] eq "-nodaemon")
125 {
126 $no_daemon = 1;
127 }
128 if ($ARGV[$i] eq "-debug")
129 {
130 $debug = 1;
131 }
132 $i++;
133 }
134
135 # Read in the collection specific configuration
136 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
137 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
138 my $line = '';
139 while (($line = <CFGIN>))
140 {
141 if ($line =~ /^(\w+)\s+(.*)$/)
142 {
143 my $key = $1;
144 my $value = $2;
145 # Allow the override of serverhost
146 if ($key eq "serverhost")
147 {
148 $server_host = $value;
149 }
150 if ($key eq "serverport")
151 {
152 $server_port = $value;
153 }
154 if ($key eq "threads")
155 {
156 $server_threads = $value;
157 }
158 }
159 }
160 close(CFGIN);
161
162 if ($debug)
163 {
164 print " - collection: " . $collection . "\n";
165 print " - parent pid: " . $parent_pid . "\n";
166 print " - no daemon? " . $no_daemon . "\n";
167 print " - debug? " . $debug . "\n";
168 print " - serverhost: " . $server_host . "\n";
169 print " - serverport: " . $server_port . "\n";
170 print " - threads: " . $server_threads . "\n";
171 print "\n";
172 }
173
174 # Information about any running TDBServer is stored in a lockfile in
175 # Greenstone's tmp directory (named after the active collection)
176 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
177 if (!-d $tmp_dir)
178 {
179 mkdir($tmp_dir, 0755);
180 }
181 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
182
183 # If already running, then exit
184 print " * Testing if TDBServer for this collection already running... ";
185 if (-e $server_lockfile_path)
186 {
187 die("Error! TDBServer already running!\nLockfile found at: " . $server_lockfile_path);
188 }
189 print "All clear!\n";
190
191 # Ensure we can see tdb edit tools on the path
192 print " * Testing for tool: " . $tdbexe . "... ";
193 my $result = `$tdbexe 2>&1`;
194 if ($result !~ /usage:\s+$tdbexe/)
195 {
196 die("Error! " . $tdbexe . " not available - check path.");
197 }
198 print "Found!\n";
199
200 # Daemonize
201 my $pid = 0;
202 if (!$no_daemon)
203 {
204 print " * Spawning Daemon...\n" unless (!$debug);
205 if ($debug)
206 {
207 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
208 if (!-d $logs_dir)
209 {
210 mkdir($logs_dir, 0755);
211 }
212 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
213 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
214 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
215 child_STDOUT => $daemon_out_path,
216 child_STDERR => $daemon_err_path,
217 } );
218 }
219 else
220 {
221 # Streams to /dev/null
222 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
223 } );
224 }
225 }
226
227 # Master process has pid > 0
228 if ($pid == 0)
229 {
230 print "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
231 print " * Starting server on " . $server_host . ":" . $server_port . "\n";
232 # - create server object
233 print " * Creating pool of " . $server_threads . " threads listening on socket\n";
234 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
235 port=>$server_port,
236 thread_count=>$server_threads,
237 main_cb => \&exitCheck,
238 processor_cb => \&process);
239
240 # - write a lockfile
241 print " * Creating lock file: " . $server_lockfile_path . "\n";
242 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
243 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
244 print SLFH $server_host . ':' . $server_port;
245 flock(SLFH, LOCK_UN);
246 close(SLFH);
247
248 # Perform main loop
249 # - loop is actually in Server code. start() only returns once server's stop
250 # command has been called
251 print " * Listening:\n";
252 $server->start;
253 print " * Stopping...\n";
254
255 # Perform deinitializes here
256 # - remove server lockfile
257 print " * Removing lock file...\n";
258 unlink($server_lockfile_path);
259 print "Done!\n";
260 }
261 # Forked child processes
262 else
263 {
264 print " * Waiting for lockfile to be created";
265 while (!-e $server_lockfile_path)
266 {
267 print '.';
268 sleep(1);
269 }
270 print "\n * TDBServer lockfile created.\n";
271 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
272 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
273 my $line = <SLFH>;
274 if ($line =~ /(^.+):(\d+)$/)
275 {
276 print " => Server now listening on " . $1 . ":" . $2 . "\n";
277 }
278 else
279 {
280 die ("Error! Failed to retrieve host and port information from lockfile!");
281 }
282 flock(SLFH, LOCK_UN);
283 close(SLFH);
284 }
285
286 print "===== Complete! =====\n";
287}
288exit(0);
289
290# @function exitCheck
291# A callback function, called every 5 seconds (default) by the socket server,
292# to see whether the parent process (by pid) is actually still running. This
293# will cover the case where the parent process (import.pl or build.pl) dies
294# without properly asking the server to shutdown.
295sub exitCheck
296{
297 my $counter = shift @_;
298 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
299 # Parent PID not available or we aren't allowed to talk to it (debugging)
300 if ($parent_pid == 0)
301 {
302 return;
303 }
304 # note: kill, when passed a first argument of 0, checks whether it's possible
305 # to send a signal to the pid given as the second argument, and returns true
306 # if it is. Thus it provides a means to determine if the parent process is
307 # still running (and hence can be signalled) In newer versions of Perl
308 # (5.8.9) it should even work cross-platform.
309 if (!kill(0, $parent_pid))
310 {
311 print " * Parent processs gone away... forcing server shutdown\n";
312 $server->stop;
313 if ($debug)
314 {
315 lock($debug_log);
316 $|++;
317 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
318 $|--;
319 }
320 }
321}
322
323# /** @function process
324# * A horribly named function that is called back to process each of the
325# * requests to alter the TDB databases. It expects a complete TDB CLI
326# * command as a text blob, or one of a limited number of special commands
327# * ([a]dd or [r]emove listener, or [q]uit).
328# */
329sub process
330{
331 my $data = shift @_;
332 my $ip = shift @_;
333 my $tid = shift @_;
334 my $result = "#ERROR#";
335 my $the_count = 0;
336 {
337 lock($msg_counter);
338 $msg_counter++;
339 $the_count = $msg_counter + 0;
340 # //unlock($msg_counter);
341 }
342 &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
343 # process special commands first
344 if ($data =~ /^#([arq]):(.*)$/)
345 {
346 my $command = $1;
347 my $argument = $2;
348 # addlistener(<pid>)
349 if ($command eq "a")
350 {
351 lock(%listeners);
352 $listeners{$argument} = 1;
353 my $listener_count = scalar(keys(%listeners));
354 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
355 # //unlock(%listeners)
356 }
357 # removelistener(<pid>)
358 elsif ($command eq "r")
359 {
360 my $listener_count = 0;
361 {
362 lock(%listeners);
363 if (defined $listeners{$argument})
364 {
365 delete $listeners{$argument};
366 }
367 $listener_count = scalar(keys(%listeners));
368 # //unlock(%listeners)
369 }
370 lock($should_stop);
371 if ($should_stop == 1 && $listener_count == 0)
372 {
373 # server isn't shared, but the stop data member is!
374 $server->stop;
375 $result = "[SUCCESS] removed last listener, stopping";
376 }
377 else
378 {
379 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
380 }
381 # //unlock($should_stop)
382 }
383 # we may be asked to stop the server, but only by the process that created
384 # us. If there are no listeners registered, we stop straight away,
385 # otherwise we set a flag so that as soon as there are no listeners we
386 # stop.
387 elsif ($command eq "q")
388 {
389 if ($argument ne $parent_pid && $argument ne "*")
390 {
391 $result = "[IGNORED] can only be stopped by parent process";
392 }
393 else
394 {
395 my $listener_count = 0;
396 {
397 lock(%listeners);
398 $listener_count = scalar(keys(%listeners));
399 # //unlock(%listeners)
400 }
401 if ($listener_count == 0)
402 {
403 # server isn't shared, but the stop data member is!
404 $server->stop;
405 $result = "[SUCCESS] stopping";
406 }
407 else
408 {
409 lock($should_stop);
410 $should_stop = 1;
411 $result = "[PENDING] will stop when no more listeners";
412 # //unlock($should_stop)
413 }
414 }
415 }
416 }
417 # Everything thing else should be a TDB command
418 # form <database>:<key>:<value>
419 # where: database is [d]oc, [i]ndex, or [s]rc
420 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
421 {
422 my $database = $1;
423 my $key = $2;
424 my $action = $3;
425 # by default we add for backwards compatibility
426 if (!defined $action || $action eq '')
427 {
428 print STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
429 $action = '+';
430 }
431 my $payload = $4;
432 $payload =~ s/^\s+|\s+$//g;
433 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
434 # Build path to database file
435 my $tdb_path = '';
436 if ($database eq 'd')
437 {
438 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
439 }
440 elsif ($database eq 's')
441 {
442 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
443 }
444 else
445 {
446 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
447 }
448 # Harnesses seem like goodly magic - but unfortunately may be broken
449 # magic. Testing on Medusa randomly hangs on the finish() function.
450 if ($use_harness)
451 {
452 my $record = '[' . $key . ']' . $action . $payload;
453 # Open harness to TDBCLI
454 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
455 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
456 my $buffer_to_tdb = '';
457 my $buffer_from_tdb = '';
458 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
459 # Check the harness worked
460 if (!pumpable $tdb_harness)
461 {
462 die("Error! Harness to " . $tdbexe . " has gone away!");
463 }
464 # - write the data to the TDBCLI
465 $buffer_to_tdb = $record . "\n";
466 while (length($buffer_to_tdb))
467 {
468 pump($tdb_harness);
469 }
470 # - read any response from TDBCLI
471 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
472 while ($buffer_from_tdb !~ /-{70}/)
473 {
474 pump($tdb_harness);
475 }
476 # - explicitly tell the pipe to quit (empty key)
477 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
478 $buffer_to_tdb = "[]\n";
479 while (length($buffer_to_tdb))
480 {
481 pump($tdb_harness);
482 }
483 # - not that this result doesn't include the [Server] prefix as it
484 # may be parsed for data by the client
485 $result = $buffer_from_tdb;
486 chomp($result);
487 # Finished with harness
488 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
489 finish($tdb_harness);
490 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
491 }
492 # Use different TDB tools depending on arguments
493 # - lookups using TDBGET
494 elsif ($action eq '?')
495 {
496 my $command_name = '';
497 my $command1 = '';
498 # Special case for retrieve all keys (indicated by *)
499 if ($key eq '*')
500 {
501 $command_name = 'TDBKEYS';
502 $command1 = 'tdbkeys "' . $tdb_path . '"';
503 }
504 else
505 {
506 $command_name = 'TDBGET';
507 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
508 }
509 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
510 if (-e $tdb_path)
511 {
512 $result = `$command1`;
513 }
514 else
515 {
516 &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
517 $result = '';
518 }
519 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
520 if ($result !~ /-{70}/)
521 {
522 $result .= "-"x70 . "\n";
523 }
524 }
525 # - adds, updates and deletes using TXT2TDB
526 elsif ($action eq '+' || $action eq '-')
527 {
528 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
529 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
530 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
531 print $infodb_handle '[' . $key . ']';
532 if ($action eq '-')
533 {
534 print $infodb_handle $action;
535 }
536 print $infodb_handle $payload;
537 close($infodb_handle);
538 $result = "-"x70 . "\n";
539 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
540 }
541 else
542 {
543 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
544 }
545 }
546 # Synchronized debug log writing
547 &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
548 return $result;
549}
550
551sub debugPrint
552{
553 my ($the_count, $tid, $type, $msg) = @_;
554 if ($debug)
555 {
556 lock($debug_log);
557 $|++;
558 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
559 $|--;
560 # //unlock($debug_log);
561 }
562}
563
564sub printUsageAndExit
565{
566 my ($msg) = @_;
567 print "$msg\n\n";
568 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
569 exit(0);
570}
571
5721;
Note: See TracBrowser for help on using the repository browser.