source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 26995

Last change on this file since 26995 was 26995, checked in by jmt12, 11 years ago

Ensuring that errors go to STDERR and other messages go to STDOUT

  • Property svn:executable set to *
File size: 17.3 KB
Line 
1#!/usr/bin/perl
2
3# jmt12
4
5use strict;
6use warnings;
7
8# Setup Environment
9BEGIN
10{
11 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
12 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
13
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
19
20 if (defined $ENV{'GSDLEXTS'})
21 {
22 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
23 foreach my $e (@extensions)
24 {
25 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
26 unshift (@INC, "$ext_prefix/perllib");
27 unshift (@INC, "$ext_prefix/perllib/cpan");
28 unshift (@INC, "$ext_prefix/perllib/plugins");
29 unshift (@INC, "$ext_prefix/perllib/classify");
30 }
31 }
32 if (defined $ENV{'GSDL3EXTS'})
33 {
34 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
35 foreach my $e (@extensions)
36 {
37 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
38 unshift (@INC, "$ext_prefix/perllib");
39 unshift (@INC, "$ext_prefix/perllib/cpan");
40 unshift (@INC, "$ext_prefix/perllib/plugins");
41 unshift (@INC, "$ext_prefix/perllib/classify");
42 }
43 }
44
45 # Manually installed CPAN package in GEXT*INSTALL
46 # - parse up version number
47 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
48 # - get rid of leading zeros by making them integers
49 $major += 0;
50 $minor += 0;
51 $revision += 0;
52 # - and add to Perl's path
53 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
54 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
55}
56
57use Cwd;
58
59# We need to do a little file locking
60use Fcntl qw(:flock); #import LOCK_* constants
61
62# Advanced child process control allowing bidirectional pipes
63use IPC::Run qw(harness start pump finish);
64
65# we need to run as a daemon
66use Proc::Daemon;
67
68# The server will need to accept requests from multiple threads, and
69# so will need threads in and of itself
70use threads;
71use threads::shared;
72
73# Greenstone utility functions (filename_cat)
74use util;
75
76# A simple server that listens on a socket and 'forks' off child threads to
77# handle each incoming request
78use SocketsSwimmingThreadPoolServer;
79
80# Globally available - but once set these are read-only - so locking isn't
81# an issue
82my $use_harness = 0;
83my $tdbexe = 'tdbcli';
84my $parent_pid = 0;
85my $collection = '';
86my $no_daemon = 0;
87my $debug = 1;
88my $server;
89my $machine_name = `hostname -s`;
90chomp($machine_name);
91my $server_host = $machine_name . '.local';
92my $server_port;
93my $server_threads;
94# - shared and, more importantly, lockable
95my %listeners :shared;
96my $should_stop :shared = 0;
97my $debug_log :shared = 0;
98
99my $msg_counter :shared = 0;
100
101print "===== TDB Server =====\n";
102print "Provides a server to allow multiple remote machines to simultaenously\n";
103print "edit one or more TDB databases on the local machine. This is to work\n";
104print "around NFS file locking issues when parallel processing on a cluster.\n";
105
106MAIN:
107{
108 # Check arguments
109 # - compulsory
110 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
111 {
112 &printUsageAndExit('Error! Missing parent process ID or not a PID');
113 }
114 $parent_pid = $ARGV[0];
115 if (!defined $ARGV[1])
116 {
117 &printUsageAndExit('Error! Missing active Greenstone collection name');
118 }
119 $collection = $ARGV[1];
120 # - optional
121 my $i = 2;
122 while (defined $ARGV[$i])
123 {
124 if ($ARGV[$i] eq "-nodaemon")
125 {
126 $no_daemon = 1;
127 }
128 if ($ARGV[$i] eq "-debug")
129 {
130 $debug = 1;
131 }
132 $i++;
133 }
134
135 # Read in the collection specific configuration
136 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
137 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
138 my $line = '';
139 while (($line = <CFGIN>))
140 {
141 if ($line =~ /^(\w+)\s+(.*)$/)
142 {
143 my $key = $1;
144 my $value = $2;
145 # Allow the override of serverhost
146 if ($key eq "serverhost")
147 {
148 $server_host = $value;
149 }
150 if ($key eq "serverport")
151 {
152 $server_port = $value;
153 }
154 if ($key eq "threads")
155 {
156 $server_threads = $value;
157 }
158 }
159 }
160 close(CFGIN);
161
162 if ($debug)
163 {
164 print " - collection: " . $collection . "\n";
165 print " - parent pid: " . $parent_pid . "\n";
166 print " - no daemon? " . $no_daemon . "\n";
167 print " - debug? " . $debug . "\n";
168 print " - serverhost: " . $server_host . "\n";
169 print " - serverport: " . $server_port . "\n";
170 print " - threads: " . $server_threads . "\n";
171 print "\n";
172 }
173
174 # Information about any running TDBServer is stored in a lockfile in
175 # Greenstone's tmp directory (named after the active collection)
176 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
177 if (!-d $tmp_dir)
178 {
179 mkdir($tmp_dir, 0755);
180 }
181 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
182
183 # If already running, then exit
184 print " * Testing if TDBServer for this collection already running... ";
185 if (-e $server_lockfile_path)
186 {
187 die("Error! TDBServer already running!\nLockfile found at: " . $server_lockfile_path);
188 }
189 print "All clear!\n";
190
191 # Ensure we can see tdb edit tools on the path
192 print " * Testing for tool: " . $tdbexe . "... ";
193 my $result = `$tdbexe 2>&1`;
194 if ($result !~ /usage:\s+$tdbexe/)
195 {
196 die("Error! " . $tdbexe . " not available - check path.");
197 }
198 print "Found!\n";
199
200 # Daemonize
201 my $pid = 0;
202 if (!$no_daemon)
203 {
204 print " * Spawning Daemon...\n" unless (!$debug);
205 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
206 if (!-d $logs_dir)
207 {
208 mkdir($logs_dir, 0755);
209 }
210 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
211 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
212 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
213 child_STDOUT => $daemon_out_path,
214 child_STDERR => $daemon_err_path,
215 } );
216 }
217
218 # Master process has pid > 0
219 if ($pid == 0)
220 {
221 print "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
222 print " * Starting server on " . $server_host . ":" . $server_port . "\n";
223 # - create server object
224 print " * Creating pool of " . $server_threads . " threads listening on socket\n";
225 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
226 port=>$server_port,
227 thread_count=>$server_threads,
228 main_cb => \&exitCheck,
229 processor_cb => \&process);
230
231 # - write a lockfile
232 print " * Creating lock file: " . $server_lockfile_path . "\n";
233 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
234 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
235 print SLFH $server_host . ':' . $server_port;
236 flock(SLFH, LOCK_UN);
237 close(SLFH);
238
239 # Perform main loop
240 # - loop is actually in Server code. start() only returns once server's stop
241 # command has been called
242 print " * Listening:\n";
243 $server->start;
244 print " * Stopping...\n";
245
246 # Perform deinitializes here
247 # - remove server lockfile
248 print " * Removing lock file...\n";
249 unlink($server_lockfile_path);
250 print "Done!\n";
251 }
252 # Forked child processes
253 else
254 {
255 print " * Waiting for lockfile to be created";
256 while (!-e $server_lockfile_path)
257 {
258 print '.';
259 sleep(1);
260 }
261 print "\n * TDBServer lockfile created.\n";
262 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
263 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
264 my $line = <SLFH>;
265 if ($line =~ /(^.+):(\d+)$/)
266 {
267 print " => Server now listening on " . $1 . ":" . $2 . "\n";
268 }
269 else
270 {
271 die ("Error! Failed to retrieve host and port information from lockfile!");
272 }
273 flock(SLFH, LOCK_UN);
274 close(SLFH);
275 }
276
277 print "===== Complete! =====\n";
278}
279exit(0);
280
281# @function exitCheck
282# A callback function, called every 5 seconds (default) by the socket server,
283# to see whether the parent process (by pid) is actually still running. This
284# will cover the case where the parent process (import.pl or build.pl) dies
285# without properly asking the server to shutdown.
286sub exitCheck
287{
288 my $counter = shift @_;
289 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
290 # Parent PID not available or we aren't allowed to talk to it (debugging)
291 if ($parent_pid == 0)
292 {
293 return;
294 }
295 # note: kill, when passed a first argument of 0, checks whether it's possible
296 # to send a signal to the pid given as the second argument, and returns true
297 # if it is. Thus it provides a means to determine if the parent process is
298 # still running (and hence can be signalled) In newer versions of Perl
299 # (5.8.9) it should even work cross-platform.
300 if (!kill(0, $parent_pid))
301 {
302 print " * Parent processs gone away... forcing server shutdown\n";
303 $server->stop;
304 if ($debug)
305 {
306 lock($debug_log);
307 $|++;
308 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
309 $|--;
310 }
311 }
312}
313
314# /** @function process
315# * A horribly named function that is called back to process each of the
316# * requests to alter the TDB databases. It expects a complete TDB CLI
317# * command as a text blob, or one of a limited number of special commands
318# * ([a]dd or [r]emove listener, or [q]uit).
319# */
320sub process
321{
322 my $data = shift @_;
323 my $ip = shift @_;
324 my $tid = shift @_;
325 my $result = "#ERROR#";
326 my $the_count = 0;
327 {
328 lock($msg_counter);
329 $msg_counter++;
330 $the_count = $msg_counter + 0;
331 # //unlock($msg_counter);
332 }
333 &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
334 # process special commands first
335 if ($data =~ /^#([arq]):(.*)$/)
336 {
337 my $command = $1;
338 my $argument = $2;
339 # addlistener(<pid>)
340 if ($command eq "a")
341 {
342 lock(%listeners);
343 $listeners{$argument} = 1;
344 my $listener_count = scalar(keys(%listeners));
345 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
346 # //unlock(%listeners)
347 }
348 # removelistener(<pid>)
349 elsif ($command eq "r")
350 {
351 my $listener_count = 0;
352 {
353 lock(%listeners);
354 if (defined $listeners{$argument})
355 {
356 delete $listeners{$argument};
357 }
358 $listener_count = scalar(keys(%listeners));
359 # //unlock(%listeners)
360 }
361 lock($should_stop);
362 if ($should_stop == 1 && $listener_count == 0)
363 {
364 # server isn't shared, but the stop data member is!
365 $server->stop;
366 $result = "[SUCCESS] removed last listener, stopping";
367 }
368 else
369 {
370 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
371 }
372 # //unlock($should_stop)
373 }
374 # we may be asked to stop the server, but only by the process that created
375 # us. If there are no listeners registered, we stop straight away,
376 # otherwise we set a flag so that as soon as there are no listeners we
377 # stop.
378 elsif ($command eq "q")
379 {
380 if ($argument ne $parent_pid && $argument ne "*")
381 {
382 $result = "[IGNORED] can only be stopped by parent process";
383 }
384 else
385 {
386 my $listener_count = 0;
387 {
388 lock(%listeners);
389 $listener_count = scalar(keys(%listeners));
390 # //unlock(%listeners)
391 }
392 if ($listener_count == 0)
393 {
394 # server isn't shared, but the stop data member is!
395 $server->stop;
396 $result = "[SUCCESS] stopping";
397 }
398 else
399 {
400 lock($should_stop);
401 $should_stop = 1;
402 $result = "[PENDING] will stop when no more listeners";
403 # //unlock($should_stop)
404 }
405 }
406 }
407 }
408 # Everything thing else should be a TDB command
409 # form <database>:<key>:<value>
410 # where: database is [d]oc, [i]ndex, or [s]rc
411 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
412 {
413 my $database = $1;
414 my $key = $2;
415 my $action = $3;
416 # by default we add for backwards compatibility
417 if (!defined $action || $action eq '')
418 {
419 print STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
420 $action = '+';
421 }
422 my $payload = $4;
423 $payload =~ s/^\s+|\s+$//g;
424 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
425 # Build path to database file
426 my $tdb_path = '';
427 if ($database eq 'd')
428 {
429 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
430 }
431 elsif ($database eq 's')
432 {
433 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
434 }
435 else
436 {
437 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
438 }
439 # Harnesses seem like goodly magic - but unfortunately may be broken
440 # magic. Testing on Medusa randomly hangs on the finish() function.
441 if ($use_harness)
442 {
443 my $record = '[' . $key . ']' . $action . $payload;
444 # Open harness to TDBCLI
445 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
446 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
447 my $buffer_to_tdb = '';
448 my $buffer_from_tdb = '';
449 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
450 # Check the harness worked
451 if (!pumpable $tdb_harness)
452 {
453 die("Error! Harness to " . $tdbexe . " has gone away!");
454 }
455 # - write the data to the TDBCLI
456 $buffer_to_tdb = $record . "\n";
457 while (length($buffer_to_tdb))
458 {
459 pump($tdb_harness);
460 }
461 # - read any response from TDBCLI
462 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
463 while ($buffer_from_tdb !~ /-{70}/)
464 {
465 pump($tdb_harness);
466 }
467 # - explicitly tell the pipe to quit (empty key)
468 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
469 $buffer_to_tdb = "[]\n";
470 while (length($buffer_to_tdb))
471 {
472 pump($tdb_harness);
473 }
474 # - not that this result doesn't include the [Server] prefix as it
475 # may be parsed for data by the client
476 $result = $buffer_from_tdb;
477 chomp($result);
478 # Finished with harness
479 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
480 finish($tdb_harness);
481 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
482 }
483 # Use different TDB tools depending on arguments
484 # - lookups using TDBGET
485 elsif ($action eq '?')
486 {
487 my $command_name = '';
488 my $command1 = '';
489 # Special case for retrieve all keys (indicated by *)
490 if ($key eq '*')
491 {
492 $command_name = 'TDBKEYS';
493 $command1 = 'tdbkeys "' . $tdb_path . '"';
494 }
495 else
496 {
497 $command_name = 'TDBGET';
498 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
499 }
500 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
501 if (-e $tdb_path)
502 {
503 $result = `$command1`;
504 }
505 else
506 {
507 &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
508 $result = '';
509 }
510 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
511 if ($result !~ /-{70}/)
512 {
513 $result .= "-"x70 . "\n";
514 }
515 }
516 # - adds, updates and deletes using TXT2TDB
517 elsif ($action eq '+' || $action eq '-')
518 {
519 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
520 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
521 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
522 print $infodb_handle '[' . $key . ']';
523 if ($action eq '-')
524 {
525 print $infodb_handle $action;
526 }
527 print $infodb_handle $payload;
528 close($infodb_handle);
529 $result = "-"x70 . "\n";
530 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
531 }
532 else
533 {
534 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
535 }
536 }
537 # Synchronized debug log writing
538 &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
539 return $result;
540}
541
542sub debugPrint
543{
544 my ($the_count, $tid, $type, $msg) = @_;
545 if ($debug)
546 {
547 lock($debug_log);
548 $|++;
549 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
550 $|--;
551 # //unlock($debug_log);
552 }
553}
554
555sub printUsageAndExit
556{
557 my ($msg) = @_;
558 print "$msg\n\n";
559 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
560 exit(0);
561}
562
5631;
Note: See TracBrowser for help on using the repository browser.