source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 25453

Last change on this file since 25453 was 25453, checked in by jmt12, 12 years ago

Instrumenting in order to try and track down race condition/hang during GS import

  • Property svn:executable set to *
File size: 14.2 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Setup Environment
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
11
12 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
13 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
17
18 if (defined $ENV{'GSDLEXTS'})
19 {
20 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
21 foreach my $e (@extensions)
22 {
23 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
24 unshift (@INC, "$ext_prefix/perllib");
25 unshift (@INC, "$ext_prefix/perllib/cpan");
26 unshift (@INC, "$ext_prefix/perllib/plugins");
27 unshift (@INC, "$ext_prefix/perllib/classify");
28 }
29 }
30 if (defined $ENV{'GSDL3EXTS'})
31 {
32 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
33 foreach my $e (@extensions)
34 {
35 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
36 unshift (@INC, "$ext_prefix/perllib");
37 unshift (@INC, "$ext_prefix/perllib/cpan");
38 unshift (@INC, "$ext_prefix/perllib/plugins");
39 unshift (@INC, "$ext_prefix/perllib/classify");
40 }
41 }
42
43 # Manually installed CPAN package in GEXT*INSTALL
44 # - parse up version number
45 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
46 # - get rid of leading zeros by making them integers
47 $major += 0;
48 $minor += 0;
49 $revision += 0;
50 # - and add to Perl's path
51 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
52}
53
54use Cwd;
55
56# We need to do a little file locking
57use Fcntl qw(:flock); #import LOCK_* constants
58
59# Advanced child process control allowing bidirectional pipes
60use IPC::Run qw(harness start pump finish);
61
62# we need to run as a daemon
63use Proc::Daemon;
64
65# The server will need to accept requests from multiple threads, and
66# so will need threads in and of itself
67use threads;
68use threads::shared;
69
70# Greenstone utility functions (filename_cat)
71use util;
72
73# A simple server that listens on a socket and 'forks' off child threads to
74# handle each incoming request
75use SocketsSwimmingThreadPoolServer;
76
77# Globally available - but once set these are read-only - so locking isn't
78# an issue
79my $tdbexe = 'tdbcli';
80my $parent_pid = 0;
81my $collection = '';
82my $no_daemon = 0;
83my $debug = 0;
84my $server;
85my $server_host;
86my $server_port;
87my $server_threads;
88# - shared and, more importantly, lockable
89my %listeners :shared;
90my $should_stop :shared = 0;
91my $debug_log :shared = 0;
92
93my $msg_counter :shared = 0;
94
95print "===== TDB Server =====\n";
96print "Provides a server to allow multiple remote machines to simultaenously\n";
97print "edit one or more TDB databases on the local machine. This is to work\n";
98print "around NFS file locking issues when parallel processing on a cluster.\n";
99
100MAIN:
101{
102 # Check arguments
103 # - compulsory
104 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
105 {
106 &printUsageAndExit('Error! Missing parent process ID or not a PID');
107 }
108 $parent_pid = $ARGV[0];
109 if (!defined $ARGV[1])
110 {
111 &printUsageAndExit('Error! Missing active Greenstone collection name');
112 }
113 $collection = $ARGV[1];
114 # - optional
115 my $i = 2;
116 while (defined $ARGV[$i])
117 {
118 if ($ARGV[$i] eq "-nodaemon")
119 {
120 $no_daemon = 1;
121 }
122 if ($ARGV[$i] eq "-debug")
123 {
124 $debug = 1;
125 }
126 $i++;
127 }
128
129 # Read in the collection specific configuration
130 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
131 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
132 my $line = '';
133 while (($line = <CFGIN>))
134 {
135 if ($line =~ /^(\w+)\s+(.*)$/)
136 {
137 my $key = $1;
138 my $value = $2;
139 if ($key eq "serverhost")
140 {
141 $server_host = $value;
142 }
143 if ($key eq "serverport")
144 {
145 $server_port = $value;
146 }
147 if ($key eq "threads")
148 {
149 $server_threads = $value;
150 }
151 }
152 }
153 close(CFGIN);
154
155 if ($debug)
156 {
157 print " - collection: " . $collection . "\n";
158 print " - parent pid: " . $parent_pid . "\n";
159 print " - no daemon? " . $no_daemon . "\n";
160 print " - debug? " . $debug . "\n";
161 print " - serverhost: " . $server_host . "\n";
162 print " - serverport: " . $server_port . "\n";
163 print " - threads: " . $server_threads . "\n";
164 print "\n";
165 }
166
167 # Information about any running TDBServer is stored in a lockfile in
168 # Greenstone's tmp directory (named after the active collection)
169 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
170 if (!-d $tmp_dir)
171 {
172 mkdir($tmp_dir, 0755);
173 }
174 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
175
176 # If already running, then exit
177 print " * Testing if TDBServer for this collection already running... ";
178 if (-e $server_lockfile_path)
179 {
180 print "Error! TDBServer already running!\n";
181 print "Lockfile found at: " . $server_lockfile_path . "\n";
182 exit(0);
183 }
184 print "All clear!\n";
185
186 # Ensure we can see tdb edit tools on the path
187 print " * Testing for tool: " . $tdbexe . "... ";
188 my $result = `$tdbexe 2>&1`;
189 if ($result !~ /usage:\s+$tdbexe/)
190 {
191 print "Error! " . $tdbexe . " not available - check path.\n";
192 exit(0);
193 }
194 print "Found!\n";
195
196 # Daemonize
197 my $pid = 0;
198 if (!$no_daemon)
199 {
200 print " * Spawning Daemon...\n" unless (!$debug);
201 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
202 if (!-d $logs_dir)
203 {
204 mkdir($logs_dir, 0755);
205 }
206 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
207 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
208 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
209 child_STDOUT => $daemon_out_path,
210 child_STDERR => $daemon_err_path,
211 } );
212 }
213
214 # Master process has pid > 0
215 if ($pid == 0)
216 {
217 # - create server object
218 print " * Creating pool of " . $server_threads . " threads listening on socket: " . $server_host . ":" . $server_port . "\n";
219 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
220 port=>$server_port,
221 thread_count=>$server_threads,
222 main_cb => \&exitCheck,
223 processor_cb => \&process);
224
225 # - write a lockfile
226 print " * Creating lock file: " . $server_lockfile_path . "\n";
227 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
228 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
229 print SLFH $server_host . ':' . $server_port;
230 flock(SLFH, LOCK_UN);
231 close(SLFH);
232
233 # Perform main loop
234 # - loop is actually in Server code. start() only returns once server's stop
235 # command has been called
236 print " * Listening:\n";
237 $server->start;
238 print " * Stopping...\n";
239
240 # Perform deinitializes here
241 # - remove server lockfile
242 print " * Removing lock file...\n";
243 unlink($server_lockfile_path);
244 print "Done!\n";
245 }
246 # Forked child processes
247 else
248 {
249 print " * Waiting for lockfile to be created";
250 while (!-e $server_lockfile_path)
251 {
252 print '.';
253 sleep(1);
254 }
255 print "\n * TDBServer lockfile created.\n";
256 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
257 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
258 my $line = <SLFH>;
259 if ($line =~ /(^.+):(\d+)$/)
260 {
261 print " => Server now listening on " . $1 . ":" . $2 . "\n";
262 }
263 else
264 {
265 die ("Error! Failed to retrieve host and port information from lockfile!");
266 }
267 flock(SLFH, LOCK_UN);
268 close(SLFH);
269 }
270
271 print "===== Complete! =====\n";
272}
273exit(0);
274
275# @function exitCheck
276# A callback function, called every 5 seconds (default) by the socket server,
277# to see whether the parent process (by pid) is actually still running. This
278# will cover the case where the parent process (import.pl or build.pl) dies
279# without properly asking the server to shutdown.
280sub exitCheck
281{
282 my $counter = shift @_;
283 #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
284 # Parent PID not available or we aren't allowed to talk to it (debugging)
285 if ($parent_pid == 0)
286 {
287 return;
288 }
289 # note: kill, when passed a first argument of 0, checks whether it's possible
290 # to send a signal to the pid given as the second argument, and returns true
291 # if it is. Thus it provides a means to determine if the parent process is
292 # still running (and hence can be signalled) In newer versions of Perl
293 # (5.8.9) it should even work cross-platform.
294 if (!kill(0, $parent_pid))
295 {
296 print " * Parent processs gone away... forcing server shutdown\n";
297 $server->stop;
298 if ($debug)
299 {
300 lock($debug_log);
301 $|++;
302 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
303 $|--;
304 }
305 }
306}
307
308# /** @function process
309# * A horribly named function that is called back to process each of the
310# * requests to alter the TDB databases. It expects a complete TDB CLI
311# * command as a text blob, or one of a limited number of special commands
312# * ([a]dd or [r]emove listener, or [q]uit).
313# */
314sub process
315{
316 my $data = shift @_;
317 my $ip = shift @_;
318 my $tid = shift @_;
319 my $result = "#ERROR#";
320 # Synchronized debug log writing
321 if ($debug)
322 {
323 lock($debug_log);
324 $|++;
325 print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
326 $|--;
327 }
328 # process special commands first
329 if ($data =~ /^#([arq]):(.*)$/)
330 {
331 my $command = $1;
332 my $argument = $2;
333 # addlistener(<pid>)
334 if ($command eq "a")
335 {
336 lock(%listeners);
337 $listeners{$argument} = 1;
338 my $listener_count = scalar(keys(%listeners));
339 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
340 # //unlock(%listeners)
341 }
342 # removelistener(<pid>)
343 elsif ($command eq "r")
344 {
345 my $listener_count = 0;
346 {
347 lock(%listeners);
348 if (defined $listeners{$argument})
349 {
350 delete $listeners{$argument};
351 }
352 $listener_count = scalar(keys(%listeners));
353 # //unlock(%listeners)
354 }
355 lock($should_stop);
356 if ($should_stop == 1 && $listener_count == 0)
357 {
358 # server isn't shared, but the stop data member is!
359 $server->stop;
360 $result = "[SUCCESS] removed last listener, stopping";
361 }
362 else
363 {
364 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
365 }
366 # //unlock($should_stop)
367 }
368 # we may be asked to stop the server, but only by the process that created
369 # us. If there are no listeners registered, we stop straight away,
370 # otherwise we set a flag so that as soon as there are no listeners we
371 # stop.
372 elsif ($command eq "q")
373 {
374 if ($argument ne $parent_pid && $argument ne "*")
375 {
376 $result = "[IGNORED] can only be stopped by parent process";
377 }
378 else
379 {
380 my $listener_count = 0;
381 {
382 lock(%listeners);
383 $listener_count = scalar(keys(%listeners));
384 # //unlock(%listeners)
385 }
386 if ($listener_count == 0)
387 {
388 # server isn't shared, but the stop data member is!
389 $server->stop;
390 $result = "[SUCCESS] stopping";
391 }
392 else
393 {
394 lock($should_stop);
395 $should_stop = 1;
396 $result = "[PENDING] will stop when no more listeners";
397 # //unlock($should_stop)
398 }
399 }
400 }
401 }
402 # Everything thing else should be a TDB command
403 # form <database>:<key>:<value>
404 # where: database is [d]oc, [i]ndex, or [s]rc
405 elsif ($data =~ /^([dis]):(.+)$/s)
406 {
407 my $database = $1;
408 my $record = $2;
409 # Build path to database file
410 my $tdb_path = '';
411 if ($database eq 'd')
412 {
413 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
414 }
415 elsif ($database eq 's')
416 {
417 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
418 }
419 else
420 {
421 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
422 }
423 # Open harness to TDBCLI
424 my $the_count = 0;
425 {
426 lock($msg_counter);
427 $msg_counter++;
428 $the_count = $msg_counter + 0;
429 # //unlock($msg_counter);
430 }
431 my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
432 my $buffer_to_tdb = '';
433 my $buffer_from_tdb = '';
434 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
435 # Check the harness worked
436
437 if (!pumpable $tdb_harness)
438 {
439 die("Error! Harness to " . $tdbexe . " has gone away!");
440 }
441 # - write the data to the TDBCLI
442 $buffer_to_tdb = $record . "\n";
443 my $write_msg = '#' . $the_count . ' writing: |' . $record . '| => ';
444 while (length($buffer_to_tdb))
445 {
446 $write_msg .='.';
447 pump($tdb_harness);
448 }
449 print STDERR $write_msg . "\n";
450 # - read any response from TDBCLI
451 my $read_msg = '#' . $the_count . ' reading: ';
452 while ($buffer_from_tdb !~ /-{70}/)
453 {
454 $read_msg .= '.';
455 pump($tdb_harness);
456 }
457 print STDERR $read_msg . ' => |' . $buffer_from_tdb . "|\n";
458 # - not that this result doesn't include the [Server] prefix as it
459 # may be parsed for data by the client
460 $result = $buffer_from_tdb;
461 chomp($result);
462 # Finished with harness
463 finish($tdb_harness);
464 }
465 # Synchronized debug log writing
466 if ($debug)
467 {
468 lock($debug_log);
469 $|++;
470 print "[" . time() . "|" . $tid . "|SEND] " . $result . "\n\n";
471 $|--;
472 # //unlock($debug_log);
473 }
474 return $result;
475}
476
477sub printUsageAndExit
478{
479 my ($msg) = @_;
480 print "$msg\n\n";
481 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
482 exit(0);
483}
484
4851;
Note: See TracBrowser for help on using the repository browser.