source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 25477

Last change on this file since 25477 was 25477, checked in by jmt12, 12 years ago

Debugged non-harness mode and fixed a few issues (for instance, needing to call TDBKEYS when * is given as the target key)

  • Property svn:executable set to *
File size: 16.7 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Setup Environment
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
11
12 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
13 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
17
18 if (defined $ENV{'GSDLEXTS'})
19 {
20 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
21 foreach my $e (@extensions)
22 {
23 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
24 unshift (@INC, "$ext_prefix/perllib");
25 unshift (@INC, "$ext_prefix/perllib/cpan");
26 unshift (@INC, "$ext_prefix/perllib/plugins");
27 unshift (@INC, "$ext_prefix/perllib/classify");
28 }
29 }
30 if (defined $ENV{'GSDL3EXTS'})
31 {
32 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
33 foreach my $e (@extensions)
34 {
35 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
36 unshift (@INC, "$ext_prefix/perllib");
37 unshift (@INC, "$ext_prefix/perllib/cpan");
38 unshift (@INC, "$ext_prefix/perllib/plugins");
39 unshift (@INC, "$ext_prefix/perllib/classify");
40 }
41 }
42
43 # Manually installed CPAN package in GEXT*INSTALL
44 # - parse up version number
45 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
46 # - get rid of leading zeros by making them integers
47 $major += 0;
48 $minor += 0;
49 $revision += 0;
50 # - and add to Perl's path
51 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
52}
53
54use Cwd;
55
56# We need to do a little file locking
57use Fcntl qw(:flock); #import LOCK_* constants
58
59# Advanced child process control allowing bidirectional pipes
60use IPC::Run qw(harness start pump finish);
61
62# we need to run as a daemon
63use Proc::Daemon;
64
65# The server will need to accept requests from multiple threads, and
66# so will need threads in and of itself
67use threads;
68use threads::shared;
69
70# Greenstone utility functions (filename_cat)
71use util;
72
73# A simple server that listens on a socket and 'forks' off child threads to
74# handle each incoming request
75use SocketsSwimmingThreadPoolServer;
76
77# Globally available - but once set these are read-only - so locking isn't
78# an issue
79my $use_harness = 0;
80my $tdbexe = 'tdbcli';
81my $parent_pid = 0;
82my $collection = '';
83my $no_daemon = 0;
84my $debug = 1;
85my $server;
86my $server_host;
87my $server_port;
88my $server_threads;
89# - shared and, more importantly, lockable
90my %listeners :shared;
91my $should_stop :shared = 0;
92my $debug_log :shared = 0;
93
94my $msg_counter :shared = 0;
95
96print "===== TDB Server =====\n";
97print "Provides a server to allow multiple remote machines to simultaenously\n";
98print "edit one or more TDB databases on the local machine. This is to work\n";
99print "around NFS file locking issues when parallel processing on a cluster.\n";
100
101MAIN:
102{
103 # Check arguments
104 # - compulsory
105 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
106 {
107 &printUsageAndExit('Error! Missing parent process ID or not a PID');
108 }
109 $parent_pid = $ARGV[0];
110 if (!defined $ARGV[1])
111 {
112 &printUsageAndExit('Error! Missing active Greenstone collection name');
113 }
114 $collection = $ARGV[1];
115 # - optional
116 my $i = 2;
117 while (defined $ARGV[$i])
118 {
119 if ($ARGV[$i] eq "-nodaemon")
120 {
121 $no_daemon = 1;
122 }
123 if ($ARGV[$i] eq "-debug")
124 {
125 $debug = 1;
126 }
127 $i++;
128 }
129
130 # Read in the collection specific configuration
131 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
132 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
133 my $line = '';
134 while (($line = <CFGIN>))
135 {
136 if ($line =~ /^(\w+)\s+(.*)$/)
137 {
138 my $key = $1;
139 my $value = $2;
140 if ($key eq "serverhost")
141 {
142 $server_host = $value;
143 }
144 if ($key eq "serverport")
145 {
146 $server_port = $value;
147 }
148 if ($key eq "threads")
149 {
150 $server_threads = $value;
151 }
152 }
153 }
154 close(CFGIN);
155
156 if ($debug)
157 {
158 print " - collection: " . $collection . "\n";
159 print " - parent pid: " . $parent_pid . "\n";
160 print " - no daemon? " . $no_daemon . "\n";
161 print " - debug? " . $debug . "\n";
162 print " - serverhost: " . $server_host . "\n";
163 print " - serverport: " . $server_port . "\n";
164 print " - threads: " . $server_threads . "\n";
165 print "\n";
166 }
167
168 # Information about any running TDBServer is stored in a lockfile in
169 # Greenstone's tmp directory (named after the active collection)
170 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
171 if (!-d $tmp_dir)
172 {
173 mkdir($tmp_dir, 0755);
174 }
175 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
176
177 # If already running, then exit
178 print " * Testing if TDBServer for this collection already running... ";
179 if (-e $server_lockfile_path)
180 {
181 print "Error! TDBServer already running!\n";
182 print "Lockfile found at: " . $server_lockfile_path . "\n";
183 exit(0);
184 }
185 print "All clear!\n";
186
187 # Ensure we can see tdb edit tools on the path
188 print " * Testing for tool: " . $tdbexe . "... ";
189 my $result = `$tdbexe 2>&1`;
190 if ($result !~ /usage:\s+$tdbexe/)
191 {
192 print "Error! " . $tdbexe . " not available - check path.\n";
193 exit(0);
194 }
195 print "Found!\n";
196
197 # Daemonize
198 my $pid = 0;
199 if (!$no_daemon)
200 {
201 print " * Spawning Daemon...\n" unless (!$debug);
202 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
203 if (!-d $logs_dir)
204 {
205 mkdir($logs_dir, 0755);
206 }
207 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
208 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
209 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
210 child_STDOUT => $daemon_out_path,
211 child_STDERR => $daemon_err_path,
212 } );
213 }
214
215 # Master process has pid > 0
216 if ($pid == 0)
217 {
218 # - create server object
219 print " * Creating pool of " . $server_threads . " threads listening on socket: " . $server_host . ":" . $server_port . "\n";
220 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
221 port=>$server_port,
222 thread_count=>$server_threads,
223 main_cb => \&exitCheck,
224 processor_cb => \&process);
225
226 # - write a lockfile
227 print " * Creating lock file: " . $server_lockfile_path . "\n";
228 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
229 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
230 print SLFH $server_host . ':' . $server_port;
231 flock(SLFH, LOCK_UN);
232 close(SLFH);
233
234 # Perform main loop
235 # - loop is actually in Server code. start() only returns once server's stop
236 # command has been called
237 print " * Listening:\n";
238 $server->start;
239 print " * Stopping...\n";
240
241 # Perform deinitializes here
242 # - remove server lockfile
243 print " * Removing lock file...\n";
244 unlink($server_lockfile_path);
245 print "Done!\n";
246 }
247 # Forked child processes
248 else
249 {
250 print " * Waiting for lockfile to be created";
251 while (!-e $server_lockfile_path)
252 {
253 print '.';
254 sleep(1);
255 }
256 print "\n * TDBServer lockfile created.\n";
257 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
258 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
259 my $line = <SLFH>;
260 if ($line =~ /(^.+):(\d+)$/)
261 {
262 print " => Server now listening on " . $1 . ":" . $2 . "\n";
263 }
264 else
265 {
266 die ("Error! Failed to retrieve host and port information from lockfile!");
267 }
268 flock(SLFH, LOCK_UN);
269 close(SLFH);
270 }
271
272 print "===== Complete! =====\n";
273}
274exit(0);
275
276# @function exitCheck
277# A callback function, called every 5 seconds (default) by the socket server,
278# to see whether the parent process (by pid) is actually still running. This
279# will cover the case where the parent process (import.pl or build.pl) dies
280# without properly asking the server to shutdown.
281sub exitCheck
282{
283 my $counter = shift @_;
284 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
285 # Parent PID not available or we aren't allowed to talk to it (debugging)
286 if ($parent_pid == 0)
287 {
288 return;
289 }
290 # note: kill, when passed a first argument of 0, checks whether it's possible
291 # to send a signal to the pid given as the second argument, and returns true
292 # if it is. Thus it provides a means to determine if the parent process is
293 # still running (and hence can be signalled) In newer versions of Perl
294 # (5.8.9) it should even work cross-platform.
295 if (!kill(0, $parent_pid))
296 {
297 print " * Parent processs gone away... forcing server shutdown\n";
298 $server->stop;
299 if ($debug)
300 {
301 lock($debug_log);
302 $|++;
303 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
304 $|--;
305 }
306 }
307}
308
309# /** @function process
310# * A horribly named function that is called back to process each of the
311# * requests to alter the TDB databases. It expects a complete TDB CLI
312# * command as a text blob, or one of a limited number of special commands
313# * ([a]dd or [r]emove listener, or [q]uit).
314# */
315sub process
316{
317 my $data = shift @_;
318 my $ip = shift @_;
319 my $tid = shift @_;
320 my $result = "#ERROR#";
321 my $the_count = 0;
322 {
323 lock($msg_counter);
324 $msg_counter++;
325 $the_count = $msg_counter + 0;
326 # //unlock($msg_counter);
327 }
328 &debugPrint($the_count, $tid, 'RECV', $data);
329 # process special commands first
330 if ($data =~ /^#([arq]):(.*)$/)
331 {
332 my $command = $1;
333 my $argument = $2;
334 # addlistener(<pid>)
335 if ($command eq "a")
336 {
337 lock(%listeners);
338 $listeners{$argument} = 1;
339 my $listener_count = scalar(keys(%listeners));
340 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
341 # //unlock(%listeners)
342 }
343 # removelistener(<pid>)
344 elsif ($command eq "r")
345 {
346 my $listener_count = 0;
347 {
348 lock(%listeners);
349 if (defined $listeners{$argument})
350 {
351 delete $listeners{$argument};
352 }
353 $listener_count = scalar(keys(%listeners));
354 # //unlock(%listeners)
355 }
356 lock($should_stop);
357 if ($should_stop == 1 && $listener_count == 0)
358 {
359 # server isn't shared, but the stop data member is!
360 $server->stop;
361 $result = "[SUCCESS] removed last listener, stopping";
362 }
363 else
364 {
365 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
366 }
367 # //unlock($should_stop)
368 }
369 # we may be asked to stop the server, but only by the process that created
370 # us. If there are no listeners registered, we stop straight away,
371 # otherwise we set a flag so that as soon as there are no listeners we
372 # stop.
373 elsif ($command eq "q")
374 {
375 if ($argument ne $parent_pid && $argument ne "*")
376 {
377 $result = "[IGNORED] can only be stopped by parent process";
378 }
379 else
380 {
381 my $listener_count = 0;
382 {
383 lock(%listeners);
384 $listener_count = scalar(keys(%listeners));
385 # //unlock(%listeners)
386 }
387 if ($listener_count == 0)
388 {
389 # server isn't shared, but the stop data member is!
390 $server->stop;
391 $result = "[SUCCESS] stopping";
392 }
393 else
394 {
395 lock($should_stop);
396 $should_stop = 1;
397 $result = "[PENDING] will stop when no more listeners";
398 # //unlock($should_stop)
399 }
400 }
401 }
402 }
403 # Everything thing else should be a TDB command
404 # form <database>:<key>:<value>
405 # where: database is [d]oc, [i]ndex, or [s]rc
406 elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
407 {
408 my $database = $1;
409 my $key = $2;
410 my $action = $3;
411 # by default we add for backwards compatibility
412 if (!defined $action || $action eq '')
413 {
414 #rint STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
415 $action = '+';
416 }
417 my $payload = $4;
418 $payload =~ s/^\s+|\s+$//g;
419 &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload);
420 # Build path to database file
421 my $tdb_path = '';
422 if ($database eq 'd')
423 {
424 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
425 }
426 elsif ($database eq 's')
427 {
428 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
429 }
430 else
431 {
432 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
433 }
434 # Harnesses seem like goodly magic - but unfortunately may be broken
435 # magic. Testing on Medusa randomly hangs on the finish() function.
436 if ($use_harness)
437 {
438 my $record = '[' . $key . ']' . $action . $payload;
439 # Open harness to TDBCLI
440 &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness');
441 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
442 my $buffer_to_tdb = '';
443 my $buffer_from_tdb = '';
444 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
445 # Check the harness worked
446 if (!pumpable $tdb_harness)
447 {
448 die("Error! Harness to " . $tdbexe . " has gone away!");
449 }
450 # - write the data to the TDBCLI
451 $buffer_to_tdb = $record . "\n";
452 while (length($buffer_to_tdb))
453 {
454 pump($tdb_harness);
455 }
456 # - read any response from TDBCLI
457 &debugPrint($the_count, $tid, 'TDBCLI', 'Reading');
458 while ($buffer_from_tdb !~ /-{70}/)
459 {
460 pump($tdb_harness);
461 }
462 # - explicitly tell the pipe to quit (empty key)
463 &debugPrint($the_count, $tid, 'TDBCLI', 'Closing');
464 $buffer_to_tdb = "[]\n";
465 while (length($buffer_to_tdb))
466 {
467 pump($tdb_harness);
468 }
469 # - not that this result doesn't include the [Server] prefix as it
470 # may be parsed for data by the client
471 $result = $buffer_from_tdb;
472 chomp($result);
473 # Finished with harness
474 &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness');
475 finish($tdb_harness);
476 &debugPrint($the_count, $tid, 'TDBCLI', 'Complete');
477 }
478 # Use different TDB tools depending on arguments
479 # - lookups using TDBGET
480 elsif ($action eq '?')
481 {
482 my $command_name = '';
483 my $command1 = '';
484 # Special case for retrieve all keys (indicated by *)
485 if ($key eq '*')
486 {
487 $command_name = 'TDBKEYS';
488 $command1 = 'tdbkeys "' . $tdb_path . '"';
489 }
490 else
491 {
492 $command_name = 'TDBGET';
493 $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
494 }
495 &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1);
496 $result = `$command1`;
497 &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result);
498 if ($result =~ /-{70}/)
499 {
500 $result .= "-"x70 . "\n";
501 }
502 }
503 # - adds, updates and deletes using TXT2TDB
504 elsif ($action eq '+' || $action eq '-')
505 {
506 my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
507 &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2);
508 open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
509 print $infodb_handle '[' . $key . ']';
510 if ($action eq '-')
511 {
512 print $infodb_handle $action;
513 }
514 print $infodb_handle $payload;
515 close($infodb_handle);
516 $result = "-"x70 . "\n";
517 &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result);
518 }
519 else
520 {
521 print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
522 }
523 }
524 # Synchronized debug log writing
525 &debugPrint($the_count, $tid, 'SEND', $result);
526 return $result;
527}
528
529sub debugPrint
530{
531 my ($the_count, $tid, $type, $msg) = @_;
532 if ($debug)
533 {
534 lock($debug_log);
535 $|++;
536 print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
537 $|--;
538 # //unlock($debug_log);
539 }
540}
541
542sub printUsageAndExit
543{
544 my ($msg) = @_;
545 print "$msg\n\n";
546 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
547 exit(0);
548}
549
5501;
Note: See TracBrowser for help on using the repository browser.