source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 26989

Last change on this file since 26989 was 26989, checked in by jmt12, 11 years ago

Hostname is now automatically determined when the script is run - rather than being read in from a config file (which, co-incidently I forgot to update when I moved from Karearea, and spent a while tracking down why TDBServer wasn't starting up properly

  • Property svn:executable set to *
File size: 17.3 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Setup Environment
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
11
12 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
13 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
17
18 if (defined $ENV{'GSDLEXTS'})
19 {
20 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
21 foreach my $e (@extensions)
22 {
23 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
24 unshift (@INC, "$ext_prefix/perllib");
25 unshift (@INC, "$ext_prefix/perllib/cpan");
26 unshift (@INC, "$ext_prefix/perllib/plugins");
27 unshift (@INC, "$ext_prefix/perllib/classify");
28 }
29 }
30 if (defined $ENV{'GSDL3EXTS'})
31 {
32 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
33 foreach my $e (@extensions)
34 {
35 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
36 unshift (@INC, "$ext_prefix/perllib");
37 unshift (@INC, "$ext_prefix/perllib/cpan");
38 unshift (@INC, "$ext_prefix/perllib/plugins");
39 unshift (@INC, "$ext_prefix/perllib/classify");
40 }
41 }
42
43 # Manually installed CPAN package in GEXT*INSTALL
44 # - parse up version number
45 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
46 # - get rid of leading zeros by making them integers
47 $major += 0;
48 $minor += 0;
49 $revision += 0;
50 # - and add to Perl's path
51 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
52 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
53}
54
55use Cwd;
56
57# We need to do a little file locking
58use Fcntl qw(:flock); #import LOCK_* constants
59
60# Advanced child process control allowing bidirectional pipes
61use IPC::Run qw(harness start pump finish);
62
63# we need to run as a daemon
64use Proc::Daemon;
65
66# The server will need to accept requests from multiple threads, and
67# so will need threads in and of itself
68use threads;
69use threads::shared;
70
71# Greenstone utility functions (filename_cat)
72use util;
73
74# A simple server that listens on a socket and 'forks' off child threads to
75# handle each incoming request
76use SocketsSwimmingThreadPoolServer;
77
78# Globally available - but once set these are read-only - so locking isn't
79# an issue
80my $use_harness = 0;
81my $tdbexe = 'tdbcli';
82my $parent_pid = 0;
83my $collection = '';
84my $no_daemon = 0;
85my $debug = 1;
86my $server;
87my $machine_name = `hostname -s`;
88chomp($machine_name);
89my $server_host = $machine_name . '.local';
90my $server_port;
91my $server_threads;
92# - shared and, more importantly, lockable
93my %listeners :shared;
94my $should_stop :shared = 0;
95my $debug_log :shared = 0;
96
97my $msg_counter :shared = 0;
98
99print "===== TDB Server =====\n";
100print "Provides a server to allow multiple remote machines to simultaenously\n";
101print "edit one or more TDB databases on the local machine. This is to work\n";
102print "around NFS file locking issues when parallel processing on a cluster.\n";
103
104MAIN:
105{
106 # Check arguments
107 # - compulsory
108 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
109 {
110 &printUsageAndExit('Error! Missing parent process ID or not a PID');
111 }
112 $parent_pid = $ARGV[0];
113 if (!defined $ARGV[1])
114 {
115 &printUsageAndExit('Error! Missing active Greenstone collection name');
116 }
117 $collection = $ARGV[1];
118 # - optional
119 my $i = 2;
120 while (defined $ARGV[$i])
121 {
122 if ($ARGV[$i] eq "-nodaemon")
123 {
124 $no_daemon = 1;
125 }
126 if ($ARGV[$i] eq "-debug")
127 {
128 $debug = 1;
129 }
130 $i++;
131 }
132
133 # Read in the collection specific configuration
134 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
135 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
136 my $line = '';
137 while (($line = <CFGIN>))
138 {
139 if ($line =~ /^(\w+)\s+(.*)$/)
140 {
141 my $key = $1;
142 my $value = $2;
143 # Allow the override of serverhost
144 if ($key eq "serverhost")
145 {
146 $server_host = $value;
147 }
148 if ($key eq "serverport")
149 {
150 $server_port = $value;
151 }
152 if ($key eq "threads")
153 {
154 $server_threads = $value;
155 }
156 }
157 }
158 close(CFGIN);
159
160 if ($debug)
161 {
162 print " - collection: " . $collection . "\n";
163 print " - parent pid: " . $parent_pid . "\n";
164 print " - no daemon? " . $no_daemon . "\n";
165 print " - debug? " . $debug . "\n";
166 print " - serverhost: " . $server_host . "\n";
167 print " - serverport: " . $server_port . "\n";
168 print " - threads: " . $server_threads . "\n";
169 print "\n";
170 }
171
172 # Information about any running TDBServer is stored in a lockfile in
173 # Greenstone's tmp directory (named after the active collection)
174 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
175 if (!-d $tmp_dir)
176 {
177 mkdir($tmp_dir, 0755);
178 }
179 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
180
181 # If already running, then exit
182 print " * Testing if TDBServer for this collection already running... ";
183 if (-e $server_lockfile_path)
184 {
185 print "Error! TDBServer already running!\n";
186 print "Lockfile found at: " . $server_lockfile_path . "\n";
187 exit(0);
188 }
189 print "All clear!\n";
190
191 # Ensure we can see tdb edit tools on the path
192 print " * Testing for tool: " . $tdbexe . "... ";
193 my $result = `$tdbexe 2>&1`;
194 if ($result !~ /usage:\s+$tdbexe/)
195 {
196 print "Error! " . $tdbexe . " not available - check path.\n";
197 exit(0);
198 }
199 print "Found!\n";
200
201 # Daemonize
202 my $pid = 0;
203 if (!$no_daemon)
204 {
205 print " * Spawning Daemon...\n" unless (!$debug);
206 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
207 if (!-d $logs_dir)
208 {
209 mkdir($logs_dir, 0755);
210 }
211 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
212 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
213 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
214 child_STDOUT => $daemon_out_path,
215 child_STDERR => $daemon_err_path,
216 } );
217 }
218
219 # Master process has pid > 0
220 if ($pid == 0)
221 {
222 print STDERR "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
223 print " * Starting server on " . $server_host . ":" . $server_port . "\n";
224 # - create server object
225 print " * Creating pool of " . $server_threads . " threads listening on socket\n";
226 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
227 port=>$server_port,
228 thread_count=>$server_threads,
229 main_cb => \&exitCheck,
230 processor_cb => \&process);
231
232 # - write a lockfile
233 print " * Creating lock file: " . $server_lockfile_path . "\n";
234 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
235 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
236 print SLFH $server_host . ':' . $server_port;
237 flock(SLFH, LOCK_UN);
238 close(SLFH);
239
240 # Perform main loop
241 # - loop is actually in Server code. start() only returns once server's stop
242 # command has been called
243 print " * Listening:\n";
244 $server->start;
245 print " * Stopping...\n";
246
247 # Perform deinitializes here
248 # - remove server lockfile
249 print " * Removing lock file...\n";
250 unlink($server_lockfile_path);
251 print "Done!\n";
252 }
253 # Forked child processes
254 else
255 {
256 print " * Waiting for lockfile to be created";
257 while (!-e $server_lockfile_path)
258 {
259 print '.';
260 sleep(1);
261 }
262 print "\n * TDBServer lockfile created.\n";
263 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
264 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
265 my $line = <SLFH>;
266 if ($line =~ /(^.+):(\d+)$/)
267 {
268 print " => Server now listening on " . $1 . ":" . $2 . "\n";
269 }
270 else
271 {
272 die ("Error! Failed to retrieve host and port information from lockfile!");
273 }
274 flock(SLFH, LOCK_UN);
275 close(SLFH);
276 }
277
278 print "===== Complete! =====\n";
279}
280exit(0);
281
282# @function exitCheck
283# A callback function, called every 5 seconds (default) by the socket server,
284# to see whether the parent process (by pid) is actually still running. This
285# will cover the case where the parent process (import.pl or build.pl) dies
286# without properly asking the server to shutdown.
287sub exitCheck
288{
289 my $counter = shift @_;
290 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
291 # Parent PID not available or we aren't allowed to talk to it (debugging)
292 if ($parent_pid == 0)
293 {
294 return;
295 }
296 # note: kill, when passed a first argument of 0, checks whether it's possible
297 # to send a signal to the pid given as the second argument, and returns true
298 # if it is. Thus it provides a means to determine if the parent process is
299 # still running (and hence can be signalled) In newer versions of Perl
300 # (5.8.9) it should even work cross-platform.
301 if (!kill(0, $parent_pid))
302 {
303 print " * Parent processs gone away... forcing server shutdown\n";
304 $server->stop;
305 if ($debug)
306 {
307 lock($debug_log);
308 $|++;
309 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
310 $|--;
311 }
312 }
313}
314
315# /** @function process
316# * A horribly named function that is called back to process each of the
317# * requests to alter the TDB databases. It expects a complete TDB CLI
318# * command as a text blob, or one of a limited number of special commands
319# * ([a]dd or [r]emove listener, or [q]uit).
320# */
321sub process
322{
323 my $data = shift @_;
324 my $ip = shift @_;
325 my $tid = shift @_;
326 my $result = "#ERROR#";
327 my $the_count = 0;
328 {
329 lock($msg_counter);
330 $msg_counter++;
331 $the_count = $msg_counter + 0;
332 # //unlock($msg_counter);
333 }
334 &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
335 # process special commands first
336 if ($data =~ /^#([arq]):(.*)$/)
337 {
338 my $command = $1;
339 my $argument = $2;
340 # addlistener(<pid>)
341 if ($command eq "a")
342 {
343 lock(%listeners);
344 $listeners{$argument} = 1;
345 my $listener_count = scalar(keys(%listeners));
346 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
347 # //unlock(%listeners)
348 }
349 # removelistener(<pid>)
350 elsif ($command eq "r")
351 {
352 my $listener_count = 0;
353 {
354 lock(%listeners);
355 if (defined $listeners{$argument})
356 {
357 delete $listeners{$argument};
358 }
359 $listener_count = scalar(keys(%listeners));
360 # //unlock(%listeners)
361 }
362 lock($should_stop);
363 if ($should_stop == 1 && $listener_count == 0)
364 {
365 # server isn't shared, but the stop data member is!
366 $server->stop;
367 $result = "[SUCCESS] removed last listener, stopping";
368 }
369 else
370 {
371 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
372 }
373 # //unlock($should_stop)
374 }
375 # we may be asked to stop the server, but only by the process that created
376 # us. If there are no listeners registered, we stop straight away,
377 # otherwise we set a flag so that as soon as there are no listeners we
378 # stop.
379 elsif ($command eq "q")
380 {
381 if ($argument ne $parent_pid && $argument ne "*")
382 {
383 $result = "[IGNORED] can only be stopped by parent process";
384 }
385 else
386 {
387 my $listener_count = 0;
388 {
389 lock(%listeners);
390 $listener_count = scalar(keys(%listeners));
391 # //unlock(%listeners)
392 }
393 if ($listener_count == 0)
394 {
395 # server isn't shared, but the stop data member is!
396 $server->stop;
397 $result = "[SUCCESS] stopping";
398 }
399 else
400 {
401 lock($should_stop);
402 $should_stop = 1;
403 $result = "[PENDING] will stop when no more listeners";
404 # //unlock($should_stop)
405 }
406 }
407 }
408 }
409 # Everything thing else should be a TDB command
410 # form <database>:<key>:<value>
411 # where: database is [d]oc, [i]ndex, or [s]rc
412 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
413 {
414 my $database = $1;
415 my $key = $2;
416 my $action = $3;
417 # by default we add for backwards compatibility
418 if (!defined $action || $action eq '')
419 {
420 #rint STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
421 $action = '+';
422 }
423 my $payload = $4;
424 $payload =~ s/^\s+|\s+$//g;
425 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
426 # Build path to database file
427 my $tdb_path = '';
428 if ($database eq 'd')
429 {
430 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
431 }
432 elsif ($database eq 's')
433 {
434 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
435 }
436 else
437 {
438 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
439 }
440 # Harnesses seem like goodly magic - but unfortunately may be broken
441 # magic. Testing on Medusa randomly hangs on the finish() function.
442 if ($use_harness)
443 {
444 my $record = '[' . $key . ']' . $action . $payload;
445 # Open harness to TDBCLI
446 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
447 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
448 my $buffer_to_tdb = '';
449 my $buffer_from_tdb = '';
450 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
451 # Check the harness worked
452 if (!pumpable $tdb_harness)
453 {
454 die("Error! Harness to " . $tdbexe . " has gone away!");
455 }
456 # - write the data to the TDBCLI
457 $buffer_to_tdb = $record . "\n";
458 while (length($buffer_to_tdb))
459 {
460 pump($tdb_harness);
461 }
462 # - read any response from TDBCLI
463 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
464 while ($buffer_from_tdb !~ /-{70}/)
465 {
466 pump($tdb_harness);
467 }
468 # - explicitly tell the pipe to quit (empty key)
469 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
470 $buffer_to_tdb = "[]\n";
471 while (length($buffer_to_tdb))
472 {
473 pump($tdb_harness);
474 }
475 # - not that this result doesn't include the [Server] prefix as it
476 # may be parsed for data by the client
477 $result = $buffer_from_tdb;
478 chomp($result);
479 # Finished with harness
480 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
481 finish($tdb_harness);
482 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
483 }
484 # Use different TDB tools depending on arguments
485 # - lookups using TDBGET
486 elsif ($action eq '?')
487 {
488 my $command_name = '';
489 my $command1 = '';
490 # Special case for retrieve all keys (indicated by *)
491 if ($key eq '*')
492 {
493 $command_name = 'TDBKEYS';
494 $command1 = 'tdbkeys "' . $tdb_path . '"';
495 }
496 else
497 {
498 $command_name = 'TDBGET';
499 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
500 }
501 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
502 if (-e $tdb_path)
503 {
504 $result = `$command1`;
505 }
506 else
507 {
508 &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
509 $result = '';
510 }
511 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
512 if ($result !~ /-{70}/)
513 {
514 $result .= "-"x70 . "\n";
515 }
516 }
517 # - adds, updates and deletes using TXT2TDB
518 elsif ($action eq '+' || $action eq '-')
519 {
520 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
521 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
522 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
523 print $infodb_handle '[' . $key . ']';
524 if ($action eq '-')
525 {
526 print $infodb_handle $action;
527 }
528 print $infodb_handle $payload;
529 close($infodb_handle);
530 $result = "-"x70 . "\n";
531 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
532 }
533 else
534 {
535 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
536 }
537 }
538 # Synchronized debug log writing
539 &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
540 return $result;
541}
542
543sub debugPrint
544{
545 my ($the_count, $tid, $type, $msg) = @_;
546 if ($debug)
547 {
548 lock($debug_log);
549 $|++;
550 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
551 $|--;
552 # //unlock($debug_log);
553 }
554}
555
556sub printUsageAndExit
557{
558 my ($msg) = @_;
559 print "$msg\n\n";
560 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
561 exit(0);
562}
563
5641;
Note: See TracBrowser for help on using the repository browser.