source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 29650

Last change on this file since 29650 was 29650, checked in by jmt12, 9 years ago

Just clarifying a few comments

  • Property svn:executable set to *
File size: 17.9 KB
RevLine 
[25402]1#!/usr/bin/perl
2
[26990]3# jmt12
4
[25402]5use strict;
6use warnings;
7
8# Setup Environment
9BEGIN
10{
11 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
12 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
13
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
18 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
19
20 if (defined $ENV{'GSDLEXTS'})
21 {
22 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
23 foreach my $e (@extensions)
24 {
25 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
26 unshift (@INC, "$ext_prefix/perllib");
27 unshift (@INC, "$ext_prefix/perllib/cpan");
28 unshift (@INC, "$ext_prefix/perllib/plugins");
29 unshift (@INC, "$ext_prefix/perllib/classify");
30 }
31 }
32 if (defined $ENV{'GSDL3EXTS'})
33 {
34 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
35 foreach my $e (@extensions)
36 {
37 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
38 unshift (@INC, "$ext_prefix/perllib");
39 unshift (@INC, "$ext_prefix/perllib/cpan");
40 unshift (@INC, "$ext_prefix/perllib/plugins");
41 unshift (@INC, "$ext_prefix/perllib/classify");
42 }
43 }
44
45 # Manually installed CPAN package in GEXT*INSTALL
[25414]46 # - parse up version number
47 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
48 # - get rid of leading zeros by making them integers
49 $major += 0;
50 $minor += 0;
51 $revision += 0;
52 # - and add to Perl's path
[26989]53 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
[25414]54 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
[25402]55}
56
57use Cwd;
58
59# We need to do a little file locking
60use Fcntl qw(:flock); #import LOCK_* constants
61# Advanced child process control allowing bidirectional pipes
62use IPC::Run qw(harness start pump finish);
63# we need to run as a daemon
64use Proc::Daemon;
[27397]65# Try and find the hostname
66use Sys::Hostname;
[25402]67
68# The server will need to accept requests from multiple threads, and
69# so will need threads in and of itself
70use threads;
71use threads::shared;
72
73# Greenstone utility functions (filename_cat)
74use util;
[29650]75use dbutil::tdb;
[25402]76
77# A simple server that listens on a socket and 'forks' off child threads to
78# handle each incoming request
79use SocketsSwimmingThreadPoolServer;
80
81# Globally available - but once set these are read-only - so locking isn't
82# an issue
[25477]83my $use_harness = 0;
[25402]84my $tdbexe = 'tdbcli';
85my $parent_pid = 0;
86my $collection = '';
87my $no_daemon = 0;
[26989]88my $debug = 1;
[25402]89my $server;
[27397]90my $machine_name = (`hostname -s` || `hostname -a` || `hostname` || $ENV{'HOSTNAME'});
[26989]91chomp($machine_name);
92my $server_host = $machine_name . '.local';
[25402]93my $server_port;
94my $server_threads;
95# - shared and, more importantly, lockable
96my %listeners :shared;
97my $should_stop :shared = 0;
98my $debug_log :shared = 0;
99
[25453]100my $msg_counter :shared = 0;
101
[25402]102print "===== TDB Server =====\n";
103print "Provides a server to allow multiple remote machines to simultaenously\n";
104print "edit one or more TDB databases on the local machine. This is to work\n";
105print "around NFS file locking issues when parallel processing on a cluster.\n";
106
107MAIN:
108{
109 # Check arguments
110 # - compulsory
111 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
112 {
113 &printUsageAndExit('Error! Missing parent process ID or not a PID');
114 }
115 $parent_pid = $ARGV[0];
116 if (!defined $ARGV[1])
117 {
118 &printUsageAndExit('Error! Missing active Greenstone collection name');
119 }
120 $collection = $ARGV[1];
121 # - optional
122 my $i = 2;
123 while (defined $ARGV[$i])
124 {
125 if ($ARGV[$i] eq "-nodaemon")
126 {
127 $no_daemon = 1;
128 }
129 if ($ARGV[$i] eq "-debug")
130 {
131 $debug = 1;
132 }
133 $i++;
134 }
135
136 # Read in the collection specific configuration
137 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
138 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
139 my $line = '';
140 while (($line = <CFGIN>))
141 {
142 if ($line =~ /^(\w+)\s+(.*)$/)
143 {
144 my $key = $1;
145 my $value = $2;
[26989]146 # Allow the override of serverhost
[25402]147 if ($key eq "serverhost")
148 {
149 $server_host = $value;
150 }
151 if ($key eq "serverport")
152 {
153 $server_port = $value;
154 }
155 if ($key eq "threads")
156 {
157 $server_threads = $value;
158 }
159 }
160 }
161 close(CFGIN);
162
163 if ($debug)
164 {
165 print " - collection: " . $collection . "\n";
166 print " - parent pid: " . $parent_pid . "\n";
167 print " - no daemon? " . $no_daemon . "\n";
168 print " - debug? " . $debug . "\n";
169 print " - serverhost: " . $server_host . "\n";
170 print " - serverport: " . $server_port . "\n";
171 print " - threads: " . $server_threads . "\n";
172 print "\n";
173 }
174
175 # Information about any running TDBServer is stored in a lockfile in
176 # Greenstone's tmp directory (named after the active collection)
177 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
178 if (!-d $tmp_dir)
179 {
180 mkdir($tmp_dir, 0755);
181 }
182 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
183
184 # If already running, then exit
185 print " * Testing if TDBServer for this collection already running... ";
186 if (-e $server_lockfile_path)
187 {
[26995]188 die("Error! TDBServer already running!\nLockfile found at: " . $server_lockfile_path);
[25402]189 }
190 print "All clear!\n";
191
192 # Ensure we can see tdb edit tools on the path
193 print " * Testing for tool: " . $tdbexe . "... ";
194 my $result = `$tdbexe 2>&1`;
195 if ($result !~ /usage:\s+$tdbexe/)
196 {
[26995]197 die("Error! " . $tdbexe . " not available - check path.");
[25402]198 }
199 print "Found!\n";
200
201 # Daemonize
202 my $pid = 0;
203 if (!$no_daemon)
204 {
205 print " * Spawning Daemon...\n" unless (!$debug);
[27397]206 if ($debug)
[25402]207 {
[27397]208 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
209 if (!-d $logs_dir)
210 {
211 mkdir($logs_dir, 0755);
212 }
213 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
214 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
215 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
216 child_STDOUT => $daemon_out_path,
217 child_STDERR => $daemon_err_path,
218 } );
[25402]219 }
[27397]220 else
221 {
222 # Streams to /dev/null
223 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
224 } );
225 }
[25402]226 }
227
228 # Master process has pid > 0
229 if ($pid == 0)
230 {
[26995]231 print "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
[26989]232 print " * Starting server on " . $server_host . ":" . $server_port . "\n";
[25402]233 # - create server object
[26989]234 print " * Creating pool of " . $server_threads . " threads listening on socket\n";
[25402]235 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
236 port=>$server_port,
237 thread_count=>$server_threads,
[25453]238 main_cb => \&exitCheck,
[25402]239 processor_cb => \&process);
240
241 # - write a lockfile
242 print " * Creating lock file: " . $server_lockfile_path . "\n";
243 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
244 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
245 print SLFH $server_host . ':' . $server_port;
246 flock(SLFH, LOCK_UN);
247 close(SLFH);
248
249 # Perform main loop
250 # - loop is actually in Server code. start() only returns once server's stop
251 # command has been called
252 print " * Listening:\n";
253 $server->start;
254 print " * Stopping...\n";
255
256 # Perform deinitializes here
257 # - remove server lockfile
258 print " * Removing lock file...\n";
259 unlink($server_lockfile_path);
260 print "Done!\n";
261 }
262 # Forked child processes
263 else
264 {
265 print " * Waiting for lockfile to be created";
266 while (!-e $server_lockfile_path)
267 {
268 print '.';
269 sleep(1);
270 }
271 print "\n * TDBServer lockfile created.\n";
272 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
273 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
274 my $line = <SLFH>;
275 if ($line =~ /(^.+):(\d+)$/)
276 {
277 print " => Server now listening on " . $1 . ":" . $2 . "\n";
278 }
279 else
280 {
281 die ("Error! Failed to retrieve host and port information from lockfile!");
282 }
283 flock(SLFH, LOCK_UN);
284 close(SLFH);
285 }
286
287 print "===== Complete! =====\n";
288}
289exit(0);
290
291# @function exitCheck
292# A callback function, called every 5 seconds (default) by the socket server,
293# to see whether the parent process (by pid) is actually still running. This
294# will cover the case where the parent process (import.pl or build.pl) dies
295# without properly asking the server to shutdown.
296sub exitCheck
297{
298 my $counter = shift @_;
[25453]299 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
[25402]300 # Parent PID not available or we aren't allowed to talk to it (debugging)
301 if ($parent_pid == 0)
302 {
303 return;
304 }
305 # note: kill, when passed a first argument of 0, checks whether it's possible
306 # to send a signal to the pid given as the second argument, and returns true
307 # if it is. Thus it provides a means to determine if the parent process is
308 # still running (and hence can be signalled) In newer versions of Perl
309 # (5.8.9) it should even work cross-platform.
310 if (!kill(0, $parent_pid))
311 {
312 print " * Parent processs gone away... forcing server shutdown\n";
313 $server->stop;
314 if ($debug)
315 {
316 lock($debug_log);
317 $|++;
318 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
319 $|--;
320 }
321 }
322}
323
324# /** @function process
325# * A horribly named function that is called back to process each of the
326# * requests to alter the TDB databases. It expects a complete TDB CLI
327# * command as a text blob, or one of a limited number of special commands
328# * ([a]dd or [r]emove listener, or [q]uit).
329# */
330sub process
331{
332 my $data = shift @_;
333 my $ip = shift @_;
334 my $tid = shift @_;
335 my $result = "#ERROR#";
[25454]336 my $the_count = 0;
[25402]337 {
[25454]338 lock($msg_counter);
339 $msg_counter++;
340 $the_count = $msg_counter + 0;
341 # //unlock($msg_counter);
[25402]342 }
[25486]343 &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
[25402]344 # process special commands first
345 if ($data =~ /^#([arq]):(.*)$/)
346 {
347 my $command = $1;
348 my $argument = $2;
349 # addlistener(<pid>)
350 if ($command eq "a")
351 {
352 lock(%listeners);
353 $listeners{$argument} = 1;
354 my $listener_count = scalar(keys(%listeners));
355 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
356 # //unlock(%listeners)
357 }
358 # removelistener(<pid>)
359 elsif ($command eq "r")
360 {
361 my $listener_count = 0;
362 {
363 lock(%listeners);
364 if (defined $listeners{$argument})
365 {
366 delete $listeners{$argument};
367 }
368 $listener_count = scalar(keys(%listeners));
369 # //unlock(%listeners)
370 }
371 lock($should_stop);
372 if ($should_stop == 1 && $listener_count == 0)
373 {
374 # server isn't shared, but the stop data member is!
375 $server->stop;
376 $result = "[SUCCESS] removed last listener, stopping";
377 }
378 else
379 {
380 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
381 }
382 # //unlock($should_stop)
383 }
384 # we may be asked to stop the server, but only by the process that created
385 # us. If there are no listeners registered, we stop straight away,
386 # otherwise we set a flag so that as soon as there are no listeners we
387 # stop.
388 elsif ($command eq "q")
389 {
390 if ($argument ne $parent_pid && $argument ne "*")
391 {
392 $result = "[IGNORED] can only be stopped by parent process";
393 }
394 else
395 {
396 my $listener_count = 0;
397 {
398 lock(%listeners);
399 $listener_count = scalar(keys(%listeners));
400 # //unlock(%listeners)
401 }
402 if ($listener_count == 0)
403 {
404 # server isn't shared, but the stop data member is!
405 $server->stop;
406 $result = "[SUCCESS] stopping";
407 }
408 else
409 {
410 lock($should_stop);
411 $should_stop = 1;
412 $result = "[PENDING] will stop when no more listeners";
413 # //unlock($should_stop)
414 }
415 }
416 }
417 }
[29650]418 # Everything thing else should be a TDB command of the form:
419 # <database>:<key>:<value>
[25402]420 # where: database is [d]oc, [i]ndex, or [s]rc
[25477]421 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
[25402]422 {
423 my $database = $1;
[25454]424 my $key = $2;
[25477]425 my $action = $3;
426 # by default we add for backwards compatibility
427 if (!defined $action || $action eq '')
428 {
[26995]429 print STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
[25477]430 $action = '+';
431 }
[25454]432 my $payload = $4;
[25477]433 $payload =~ s/^\s+|\s+$//g;
[25486]434 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
[29650]435
436 # We need to try and persist the connection to TDB, otherwise the OS quickly
437 # exhausts NFS daemons amongst other issues
438 # Can I make use of the Greenstone DBUtils TDB somehow?
439 # Arg - because there s the potential to mix reads and writes, this isn't
440 # as straightforward as first thought
441
[25402]442 # Build path to database file
443 my $tdb_path = '';
444 if ($database eq 'd')
445 {
446 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
447 }
448 elsif ($database eq 's')
449 {
450 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
451 }
452 else
453 {
454 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
455 }
[25454]456 # Harnesses seem like goodly magic - but unfortunately may be broken
457 # magic. Testing on Medusa randomly hangs on the finish() function.
458 if ($use_harness)
[25453]459 {
[25454]460 my $record = '[' . $key . ']' . $action . $payload;
461 # Open harness to TDBCLI
[25486]462 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
[25454]463 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
464 my $buffer_to_tdb = '';
465 my $buffer_from_tdb = '';
466 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
467 # Check the harness worked
468 if (!pumpable $tdb_harness)
469 {
470 die("Error! Harness to " . $tdbexe . " has gone away!");
471 }
472 # - write the data to the TDBCLI
473 $buffer_to_tdb = $record . "\n";
474 while (length($buffer_to_tdb))
475 {
476 pump($tdb_harness);
477 }
478 # - read any response from TDBCLI
[25486]479 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
[25454]480 while ($buffer_from_tdb !~ /-{70}/)
481 {
482 pump($tdb_harness);
483 }
[25455]484 # - explicitly tell the pipe to quit (empty key)
[25486]485 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
[25455]486 $buffer_to_tdb = "[]\n";
487 while (length($buffer_to_tdb))
488 {
489 pump($tdb_harness);
490 }
[25454]491 # - not that this result doesn't include the [Server] prefix as it
492 # may be parsed for data by the client
493 $result = $buffer_from_tdb;
494 chomp($result);
495 # Finished with harness
[25486]496 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
[25454]497 finish($tdb_harness);
[25486]498 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
[25453]499 }
[25454]500 # Use different TDB tools depending on arguments
501 # - lookups using TDBGET
502 elsif ($action eq '?')
[25402]503 {
[25477]504 my $command_name = '';
505 my $command1 = '';
506 # Special case for retrieve all keys (indicated by *)
507 if ($key eq '*')
508 {
509 $command_name = 'TDBKEYS';
510 $command1 = 'tdbkeys "' . $tdb_path . '"';
511 }
512 else
513 {
514 $command_name = 'TDBGET';
515 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
516 }
[25486]517 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
[26989]518 if (-e $tdb_path)
519 {
520 $result = `$command1`;
521 }
522 else
523 {
524 &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
525 $result = '';
526 }
[25486]527 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
[26989]528 if ($result !~ /-{70}/)
[25454]529 {
530 $result .= "-"x70 . "\n";
531 }
[25402]532 }
[25454]533 # - adds, updates and deletes using TXT2TDB
534 elsif ($action eq '+' || $action eq '-')
[25453]535 {
[25477]536 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
[25486]537 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
[25455]538 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
[25454]539 print $infodb_handle '[' . $key . ']';
540 if ($action eq '-')
541 {
542 print $infodb_handle $action;
543 }
544 print $infodb_handle $payload;
545 close($infodb_handle);
546 $result = "-"x70 . "\n";
[25486]547 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
[25453]548 }
[25454]549 else
[25453]550 {
[25477]551 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
[25453]552 }
[25402]553 }
554 # Synchronized debug log writing
[25486]555 &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
[25454]556 return $result;
557}
558
559sub debugPrint
560{
561 my ($the_count, $tid, $type, $msg) = @_;
[25402]562 if ($debug)
563 {
564 lock($debug_log);
565 $|++;
[25454]566 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
[25402]567 $|--;
568 # //unlock($debug_log);
569 }
570}
571
572sub printUsageAndExit
573{
574 my ($msg) = @_;
575 print "$msg\n\n";
576 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
577 exit(0);
578}
579
5801;
Note: See TracBrowser for help on using the repository browser.