root/gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl @ 29650

Revision 29650, 17.9 KB (checked in by jmt12, 6 years ago)

Just clarifying a few comments

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2
3# jmt12
4
5use strict;
6use warnings;
7
8# Setup Environment
9BEGIN
10{
11  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
12  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
13
14  unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
15  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
16  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
17  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
18  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
19
20  if (defined $ENV{'GSDLEXTS'})
21  {
22    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
23    foreach my $e (@extensions)
24    {
25      my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
26      unshift (@INC, "$ext_prefix/perllib");
27      unshift (@INC, "$ext_prefix/perllib/cpan");
28      unshift (@INC, "$ext_prefix/perllib/plugins");
29      unshift (@INC, "$ext_prefix/perllib/classify");
30    }
31  }
32  if (defined $ENV{'GSDL3EXTS'})
33  {
34    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
35    foreach my $e (@extensions)
36    {
37      my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
38      unshift (@INC, "$ext_prefix/perllib");
39      unshift (@INC, "$ext_prefix/perllib/cpan");
40      unshift (@INC, "$ext_prefix/perllib/plugins");
41      unshift (@INC, "$ext_prefix/perllib/classify");
42    }
43  }
44
45  # Manually installed CPAN package in GEXT*INSTALL
46  # - parse up version number
47  my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
48  # - get rid of leading zeros by making them integers
49  $major += 0;
50  $minor += 0;
51  $revision += 0;
52  # - and add to Perl's path
53  unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
54  unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
55}
56
57use Cwd;
58
59# We need to do a little file locking
60use Fcntl qw(:flock); #import LOCK_* constants
61# Advanced child process control allowing bidirectional pipes
62use IPC::Run qw(harness start pump finish);
63# we need to run as a daemon
64use Proc::Daemon;
65# Try and find the hostname
66use Sys::Hostname;
67
68# The server will need to accept requests from multiple threads, and
69# so will need threads in and of itself
70use threads;
71use threads::shared;
72
73# Greenstone utility functions (filename_cat)
74use util;
75use dbutil::tdb;
76
77# A simple server that listens on a socket and 'forks' off child threads to
78# handle each incoming request
79use SocketsSwimmingThreadPoolServer;
80
81# Globally available - but once set these are read-only - so locking isn't
82# an issue
83my $use_harness = 0;
84my $tdbexe = 'tdbcli';
85my $parent_pid = 0;
86my $collection = '';
87my $no_daemon = 0;
88my $debug = 1;
89my $server;
90my $machine_name = (`hostname -s` || `hostname -a` || `hostname` || $ENV{'HOSTNAME'});
91chomp($machine_name);
92my $server_host = $machine_name . '.local';
93my $server_port;
94my $server_threads;
95# - shared and, more importantly, lockable
96my %listeners :shared;
97my $should_stop :shared = 0;
98my $debug_log :shared = 0;
99
100my $msg_counter :shared = 0;
101
102print "===== TDB Server =====\n";
103print "Provides a server to allow multiple remote machines to simultaenously\n";
104print "edit one or more TDB databases on the local machine. This is to work\n";
105print "around NFS file locking issues when parallel processing on a cluster.\n";
106
107MAIN:
108{
109  # Check arguments
110  # - compulsory
111  if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
112  {
113    &printUsageAndExit('Error! Missing parent process ID or not a PID');
114  }
115  $parent_pid = $ARGV[0];
116  if (!defined $ARGV[1])
117  {
118    &printUsageAndExit('Error! Missing active Greenstone collection name');
119  }
120  $collection = $ARGV[1];
121  # - optional
122  my $i = 2;
123  while (defined $ARGV[$i])
124  {
125    if ($ARGV[$i] eq "-nodaemon")
126    {
127      $no_daemon = 1;
128    }
129    if ($ARGV[$i] eq "-debug")
130    {
131      $debug = 1;
132    }
133    $i++;
134  }
135
136  # Read in the collection specific configuration
137  my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
138  open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
139  my $line = '';
140  while (($line = <CFGIN>))
141  {
142    if ($line =~ /^(\w+)\s+(.*)$/)
143    {
144      my $key = $1;
145      my $value = $2;
146      # Allow the override of serverhost
147      if ($key eq "serverhost")
148      {
149        $server_host = $value;
150      }
151      if ($key eq "serverport")
152      {
153        $server_port = $value;
154      }
155      if ($key eq "threads")
156      {
157        $server_threads = $value;
158      }
159    }
160  }
161  close(CFGIN);
162
163  if ($debug)
164  {
165    print " - collection: " . $collection . "\n";
166    print " - parent pid: " . $parent_pid . "\n";
167    print " - no daemon? " . $no_daemon . "\n";
168    print " - debug? " . $debug . "\n";
169    print " - serverhost: " . $server_host . "\n";
170    print " - serverport: " . $server_port . "\n";
171    print " - threads: " . $server_threads . "\n";
172    print "\n";
173  }
174
175  # Information about any running TDBServer is stored in a lockfile in
176  # Greenstone's tmp directory (named after the active collection)
177  my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
178  if (!-d $tmp_dir)
179  {
180    mkdir($tmp_dir, 0755);
181  }
182  my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
183
184  # If already running, then exit
185  print " * Testing if TDBServer for this collection already running... ";
186  if (-e $server_lockfile_path)
187  {
188    die("Error! TDBServer already running!\nLockfile found at: " . $server_lockfile_path);
189  }
190  print "All clear!\n";
191
192  # Ensure we can see tdb edit tools on the path
193  print " * Testing for tool: " . $tdbexe . "... ";
194  my $result = `$tdbexe 2>&1`;
195  if ($result !~ /usage:\s+$tdbexe/)
196  {
197    die("Error! " . $tdbexe . " not available - check path.");
198  }
199  print "Found!\n";
200
201  # Daemonize
202  my $pid = 0;
203  if (!$no_daemon)
204  {
205    print " * Spawning Daemon...\n" unless (!$debug);
206    if ($debug)
207    {
208      my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
209      if (!-d $logs_dir)
210      {
211        mkdir($logs_dir, 0755);
212      }
213      my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
214      my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
215      $pid = Proc::Daemon::Init( { work_dir => getcwd(),
216                                   child_STDOUT => $daemon_out_path,
217                                   child_STDERR => $daemon_err_path,
218                                 } );
219    }
220    else
221    {
222      # Streams to /dev/null
223      $pid = Proc::Daemon::Init( { work_dir => getcwd(),
224                                 } );
225    }
226  }
227
228  # Master process has pid > 0
229  if ($pid == 0)
230  {
231    print "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
232    print " * Starting server on " . $server_host . ":" . $server_port . "\n";
233    # - create server object
234    print " * Creating pool of " . $server_threads . " threads listening on socket\n";
235    $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
236                                                   port=>$server_port,
237                                                   thread_count=>$server_threads,
238                                                   main_cb => \&exitCheck,
239                                                   processor_cb => \&process);
240
241    # - write a lockfile
242    print " * Creating lock file: " . $server_lockfile_path . "\n";
243    open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
244    flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
245    print SLFH $server_host . ':' . $server_port;
246    flock(SLFH, LOCK_UN);
247    close(SLFH);
248
249    # Perform main loop
250    # - loop is actually in Server code. start() only returns once server's stop
251    #   command has been called
252    print " * Listening:\n";
253    $server->start;
254    print " * Stopping...\n";
255
256    # Perform deinitializes here
257    # - remove server lockfile
258    print " * Removing lock file...\n";
259    unlink($server_lockfile_path);
260    print "Done!\n";
261  }
262  # Forked child processes
263  else
264  {
265    print " * Waiting for lockfile to be created";
266    while (!-e $server_lockfile_path)
267    {
268      print '.';
269      sleep(1);
270    }
271    print "\n * TDBServer lockfile created.\n";
272    open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
273    flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
274    my $line = <SLFH>;
275    if ($line =~ /(^.+):(\d+)$/)
276    {
277      print " => Server now listening on " . $1 . ":" . $2 . "\n";
278    }
279    else
280    {
281      die ("Error! Failed to retrieve host and port information from lockfile!");
282    }
283    flock(SLFH, LOCK_UN);
284    close(SLFH);
285  }
286
287  print "===== Complete! =====\n";
288}
289exit(0);
290
291# @function exitCheck
292# A callback function, called every 5 seconds (default) by the socket server,
293# to see whether the parent process (by pid) is actually still running. This
294# will cover the case where the parent process (import.pl or build.pl) dies
295# without properly asking the server to shutdown.
296sub exitCheck
297{
298  my $counter = shift @_;
299  #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
300  # Parent PID not available or we aren't allowed to talk to it (debugging)
301  if ($parent_pid == 0)
302  {
303    return;
304  }
305  # note: kill, when passed a first argument of 0, checks whether it's possible
306  # to send a signal to the pid given as the second argument, and returns true
307  # if it is. Thus it provides a means to determine if the parent process is
308  # still running (and hence can be signalled) In newer versions of Perl
309  # (5.8.9) it should even work cross-platform.
310  if (!kill(0, $parent_pid))
311  {
312    print " * Parent processs gone away... forcing server shutdown\n";
313    $server->stop;
314    if ($debug)
315    {
316      lock($debug_log);
317      $|++;
318      print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
319      $|--;
320    }
321  }
322}
323
324# /** @function process
325#  *  A horribly named function that is called back to process each of the
326#  *  requests to alter the TDB databases. It expects a complete TDB CLI
327#  *  command as a text blob, or one of a limited number of special commands
328#  *  ([a]dd or [r]emove listener, or [q]uit).
329#  */
330sub process
331{
332  my $data = shift @_;
333  my $ip = shift @_;
334  my $tid = shift @_;
335  my $result = "#ERROR#";
336  my $the_count = 0;
337  {
338    lock($msg_counter);
339    $msg_counter++;
340    $the_count = $msg_counter + 0;
341    # //unlock($msg_counter);
342  }
343  &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
344  # process special commands first
345  if ($data =~ /^#([arq]):(.*)$/)
346  {
347    my $command = $1;
348    my $argument = $2;
349    # addlistener(<pid>)
350    if ($command eq "a")
351    {
352      lock(%listeners);
353      $listeners{$argument} = 1;
354      my $listener_count = scalar(keys(%listeners));
355      $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
356      # //unlock(%listeners)
357    }
358    # removelistener(<pid>)
359    elsif ($command eq "r")
360    {
361      my $listener_count = 0;
362      {
363        lock(%listeners);
364        if (defined $listeners{$argument})
365        {
366          delete $listeners{$argument};
367        }
368        $listener_count = scalar(keys(%listeners));
369        # //unlock(%listeners)
370      }
371      lock($should_stop);
372      if ($should_stop == 1 && $listener_count == 0)
373      {
374        # server isn't shared, but the stop data member is!
375        $server->stop;
376        $result = "[SUCCESS] removed last listener, stopping";
377      }
378      else
379      {
380        $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
381      }
382      # //unlock($should_stop)
383    }
384    # we may be asked to stop the server, but only by the process that created
385    # us. If there are no listeners registered, we stop straight away,
386    # otherwise we set a flag so that as soon as there are no listeners we
387    # stop.
388    elsif ($command eq "q")
389    {
390      if ($argument ne $parent_pid && $argument ne "*")
391      {
392        $result = "[IGNORED] can only be stopped by parent process";
393      }
394      else
395      {
396        my $listener_count = 0;
397        {
398          lock(%listeners);
399          $listener_count = scalar(keys(%listeners));
400          # //unlock(%listeners)
401        }
402        if ($listener_count == 0)
403        {
404          # server isn't shared, but the stop data member is!
405          $server->stop;
406          $result = "[SUCCESS] stopping";
407        }
408        else
409        {
410          lock($should_stop);
411          $should_stop = 1;
412          $result = "[PENDING] will stop when no more listeners";
413          # //unlock($should_stop)
414        }
415      }
416    }
417  }
418  # Everything thing else should be a TDB command of the form:
419  #   <database>:<key>:<value>
420  # where: database is [d]oc, [i]ndex, or [s]rc
421  elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
422  {
423    my $database = $1;
424    my $key = $2;
425    my $action = $3;
426    # by default we add for backwards compatibility
427    if (!defined $action || $action eq '')
428    {
429      print STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
430      $action = '+';
431    }
432    my $payload = $4;
433    $payload =~ s/^\s+|\s+$//g;
434    &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;
435
436    # We need to try and persist the connection to TDB, otherwise the OS quickly
437    # exhausts NFS daemons amongst other issues
438    # Can I make use of the Greenstone DBUtils TDB somehow?
439    # Arg - because there s the potential to mix reads and writes, this isn't
440    # as straightforward as first thought
441
442    # Build path to database file
443    my $tdb_path = '';
444    if ($database eq 'd')
445    {
446      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
447    }
448    elsif ($database eq 's')
449    {
450      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
451    }
452    else
453    {
454      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
455    }
456    # Harnesses seem like goodly magic - but unfortunately may be broken
457    # magic. Testing on Medusa randomly hangs on the finish() function.
458    if ($use_harness)
459    {
460      my $record = '[' . $key . ']' . $action . $payload;
461      # Open harness to TDBCLI
462      &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
463      my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
464      my $buffer_to_tdb = '';
465      my $buffer_from_tdb = '';
466      my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
467      # Check the harness worked
468      if (!pumpable $tdb_harness)
469      {
470        die("Error! Harness to " . $tdbexe . " has gone away!");
471      }
472      # - write the data to the TDBCLI
473      $buffer_to_tdb = $record . "\n";
474      while (length($buffer_to_tdb))
475      {
476        pump($tdb_harness);
477      }
478      # - read any response from TDBCLI
479      &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
480      while ($buffer_from_tdb !~ /-{70}/)
481      {
482        pump($tdb_harness);
483      }
484      # - explicitly tell the pipe to quit (empty key)
485      &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
486      $buffer_to_tdb = "[]\n";
487      while (length($buffer_to_tdb))
488      {
489        pump($tdb_harness);
490      }
491      # - not that this result doesn't include the [Server] prefix as it
492      #   may be parsed for data by the client
493      $result = $buffer_from_tdb;
494      chomp($result);
495      # Finished with harness
496      &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
497      finish($tdb_harness);
498      &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
499    }
500    # Use different TDB tools depending on arguments
501    # - lookups using TDBGET
502    elsif ($action eq '?')
503    {
504      my $command_name = '';
505      my $command1 = '';
506      # Special case for retrieve all keys (indicated by *)
507      if ($key eq '*')
508      {
509        $command_name = 'TDBKEYS';
510        $command1 = 'tdbkeys "' . $tdb_path . '"';
511      }
512      else
513      {
514        $command_name = 'TDBGET';
515        $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
516      }
517      &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
518      if (-e $tdb_path)
519      {
520        $result = `$command1`;
521      }
522      else
523      {
524        &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
525        $result = '';
526      }
527      &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
528      if ($result !~ /-{70}/)
529      {
530        $result .= "-"x70 . "\n";
531      }
532    }
533    # - adds, updates and deletes using TXT2TDB
534    elsif ($action eq '+' || $action eq '-')
535    {
536      my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
537      &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
538      open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
539      print $infodb_handle '[' . $key . ']';
540      if ($action eq '-')
541      {
542        print $infodb_handle $action;
543      }
544      print $infodb_handle $payload;
545      close($infodb_handle);
546      $result = "-"x70 . "\n";
547      &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
548    }
549    else
550    {
551      print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
552    }
553  }
554  # Synchronized debug log writing
555  &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
556  return $result;
557}
558
559sub debugPrint
560{
561  my ($the_count, $tid, $type, $msg) = @_;
562  if ($debug)
563  {
564    lock($debug_log);
565    $|++;
566    print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
567    $|--;
568    # //unlock($debug_log);
569  }
570}
571
572sub printUsageAndExit
573{
574  my ($msg) = @_;
575  print "$msg\n\n";
576  print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
577  exit(0);
578}
579
5801;
Note: See TracBrowser for help on using the browser.