source: gs2-extensions/tdb-edit/trunk/src/bin/script/TDBServer.pl@ 25414

Last change on this file since 25414 was 25414, checked in by jmt12, 12 years ago

Removing hardcoded Perl library path in favour of one that (hopefully) uses Perl version to determine appropriate path.

  • Property svn:executable set to *
File size: 13.7 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# Setup Environment
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
11
12 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
13 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
14 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
15 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
16 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
17
18 if (defined $ENV{'GSDLEXTS'})
19 {
20 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
21 foreach my $e (@extensions)
22 {
23 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
24 unshift (@INC, "$ext_prefix/perllib");
25 unshift (@INC, "$ext_prefix/perllib/cpan");
26 unshift (@INC, "$ext_prefix/perllib/plugins");
27 unshift (@INC, "$ext_prefix/perllib/classify");
28 }
29 }
30 if (defined $ENV{'GSDL3EXTS'})
31 {
32 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
33 foreach my $e (@extensions)
34 {
35 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
36 unshift (@INC, "$ext_prefix/perllib");
37 unshift (@INC, "$ext_prefix/perllib/cpan");
38 unshift (@INC, "$ext_prefix/perllib/plugins");
39 unshift (@INC, "$ext_prefix/perllib/classify");
40 }
41 }
42
43 # Manually installed CPAN package in GEXT*INSTALL
44 # - parse up version number
45 my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
46 # - get rid of leading zeros by making them integers
47 $major += 0;
48 $minor += 0;
49 $revision += 0;
50 # - and add to Perl's path
51 unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
52}
53
54use Cwd;
55
56# We need to do a little file locking
57use Fcntl qw(:flock); #import LOCK_* constants
58
59# Advanced child process control allowing bidirectional pipes
60use IPC::Run qw(harness start pump finish);
61
62# we need to run as a daemon
63use Proc::Daemon;
64
65# The server will need to accept requests from multiple threads, and
66# so will need threads in and of itself
67use threads;
68use threads::shared;
69
70# Greenstone utility functions (filename_cat)
71use util;
72
73# A simple server that listens on a socket and 'forks' off child threads to
74# handle each incoming request
75use SocketsSwimmingThreadPoolServer;
76
77# Globally available - but once set these are read-only - so locking isn't
78# an issue
79my $tdbexe = 'tdbcli';
80my $parent_pid = 0;
81my $collection = '';
82my $no_daemon = 0;
83my $debug = 0;
84my $server;
85my $server_host;
86my $server_port;
87my $server_threads;
88# - shared and, more importantly, lockable
89my %listeners :shared;
90my $should_stop :shared = 0;
91my $debug_log :shared = 0;
92
93print "===== TDB Server =====\n";
94print "Provides a server to allow multiple remote machines to simultaenously\n";
95print "edit one or more TDB databases on the local machine. This is to work\n";
96print "around NFS file locking issues when parallel processing on a cluster.\n";
97
98MAIN:
99{
100 # Check arguments
101 # - compulsory
102 if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
103 {
104 &printUsageAndExit('Error! Missing parent process ID or not a PID');
105 }
106 $parent_pid = $ARGV[0];
107 if (!defined $ARGV[1])
108 {
109 &printUsageAndExit('Error! Missing active Greenstone collection name');
110 }
111 $collection = $ARGV[1];
112 # - optional
113 my $i = 2;
114 while (defined $ARGV[$i])
115 {
116 if ($ARGV[$i] eq "-nodaemon")
117 {
118 $no_daemon = 1;
119 }
120 if ($ARGV[$i] eq "-debug")
121 {
122 $debug = 1;
123 }
124 $i++;
125 }
126
127 # Read in the collection specific configuration
128 my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
129 open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
130 my $line = '';
131 while (($line = <CFGIN>))
132 {
133 if ($line =~ /^(\w+)\s+(.*)$/)
134 {
135 my $key = $1;
136 my $value = $2;
137 if ($key eq "serverhost")
138 {
139 $server_host = $value;
140 }
141 if ($key eq "serverport")
142 {
143 $server_port = $value;
144 }
145 if ($key eq "threads")
146 {
147 $server_threads = $value;
148 }
149 }
150 }
151 close(CFGIN);
152
153 if ($debug)
154 {
155 print " - collection: " . $collection . "\n";
156 print " - parent pid: " . $parent_pid . "\n";
157 print " - no daemon? " . $no_daemon . "\n";
158 print " - debug? " . $debug . "\n";
159 print " - serverhost: " . $server_host . "\n";
160 print " - serverport: " . $server_port . "\n";
161 print " - threads: " . $server_threads . "\n";
162 print "\n";
163 }
164
165 # Information about any running TDBServer is stored in a lockfile in
166 # Greenstone's tmp directory (named after the active collection)
167 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
168 if (!-d $tmp_dir)
169 {
170 mkdir($tmp_dir, 0755);
171 }
172 my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');
173
174 # If already running, then exit
175 print " * Testing if TDBServer for this collection already running... ";
176 if (-e $server_lockfile_path)
177 {
178 print "Error! TDBServer already running!\n";
179 print "Lockfile found at: " . $server_lockfile_path . "\n";
180 exit(0);
181 }
182 print "All clear!\n";
183
184 # Ensure we can see tdb edit tools on the path
185 print " * Testing for tool: " . $tdbexe . "... ";
186 my $result = `$tdbexe 2>&1`;
187 if ($result !~ /usage:\s+$tdbexe/)
188 {
189 print "Error! " . $tdbexe . " not available - check path.\n";
190 exit(0);
191 }
192 print "Found!\n";
193
194 # Daemonize
195 my $pid = 0;
196 if (!$no_daemon)
197 {
198 print " * Spawning Daemon...\n" unless (!$debug);
199 my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
200 if (!-d $logs_dir)
201 {
202 mkdir($logs_dir, 0755);
203 }
204 my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
205 my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
206 $pid = Proc::Daemon::Init( { work_dir => getcwd(),
207 child_STDOUT => $daemon_out_path,
208 child_STDERR => $daemon_err_path,
209 } );
210 }
211
212 # Master process has pid > 0
213 if ($pid == 0)
214 {
215 # - create server object
216 print " * Creating pool of " . $server_threads . " threads listening on socket: " . $server_host . ":" . $server_port . "\n";
217 $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
218 port=>$server_port,
219 thread_count=>$server_threads,
220# main_cb => \&exitCheck,
221 processor_cb => \&process);
222
223 # - write a lockfile
224 print " * Creating lock file: " . $server_lockfile_path . "\n";
225 open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
226 flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
227 print SLFH $server_host . ':' . $server_port;
228 flock(SLFH, LOCK_UN);
229 close(SLFH);
230
231 # Perform main loop
232 # - loop is actually in Server code. start() only returns once server's stop
233 # command has been called
234 print " * Listening:\n";
235 $server->start;
236 print " * Stopping...\n";
237
238 # Perform deinitializes here
239 # - remove server lockfile
240 print " * Removing lock file...\n";
241 unlink($server_lockfile_path);
242 print "Done!\n";
243 }
244 # Forked child processes
245 else
246 {
247 print " * Waiting for lockfile to be created";
248 while (!-e $server_lockfile_path)
249 {
250 print '.';
251 sleep(1);
252 }
253 print "\n * TDBServer lockfile created.\n";
254 open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
255 flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
256 my $line = <SLFH>;
257 if ($line =~ /(^.+):(\d+)$/)
258 {
259 print " => Server now listening on " . $1 . ":" . $2 . "\n";
260 }
261 else
262 {
263 die ("Error! Failed to retrieve host and port information from lockfile!");
264 }
265 flock(SLFH, LOCK_UN);
266 close(SLFH);
267 }
268
269 print "===== Complete! =====\n";
270}
271exit(0);
272
273# @function exitCheck
274# A callback function, called every 5 seconds (default) by the socket server,
275# to see whether the parent process (by pid) is actually still running. This
276# will cover the case where the parent process (import.pl or build.pl) dies
277# without properly asking the server to shutdown.
278sub exitCheck
279{
280 my $counter = shift @_;
281 print "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
282 # Parent PID not available or we aren't allowed to talk to it (debugging)
283 if ($parent_pid == 0)
284 {
285 return;
286 }
287 # note: kill, when passed a first argument of 0, checks whether it's possible
288 # to send a signal to the pid given as the second argument, and returns true
289 # if it is. Thus it provides a means to determine if the parent process is
290 # still running (and hence can be signalled) In newer versions of Perl
291 # (5.8.9) it should even work cross-platform.
292 if (!kill(0, $parent_pid))
293 {
294 print " * Parent processs gone away... forcing server shutdown\n";
295 $server->stop;
296 if ($debug)
297 {
298 lock($debug_log);
299 $|++;
300 print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
301 $|--;
302 }
303 }
304}
305
306# /** @function process
307# * A horribly named function that is called back to process each of the
308# * requests to alter the TDB databases. It expects a complete TDB CLI
309# * command as a text blob, or one of a limited number of special commands
310# * ([a]dd or [r]emove listener, or [q]uit).
311# */
312sub process
313{
314 my $data = shift @_;
315 my $ip = shift @_;
316 my $tid = shift @_;
317 my $result = "#ERROR#";
318 # Synchronized debug log writing
319 if ($debug)
320 {
321 lock($debug_log);
322 $|++;
323 print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
324 $|--;
325 }
326 # process special commands first
327 if ($data =~ /^#([arq]):(.*)$/)
328 {
329 my $command = $1;
330 my $argument = $2;
331 # addlistener(<pid>)
332 if ($command eq "a")
333 {
334 lock(%listeners);
335 $listeners{$argument} = 1;
336 my $listener_count = scalar(keys(%listeners));
337 $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
338 # //unlock(%listeners)
339 }
340 # removelistener(<pid>)
341 elsif ($command eq "r")
342 {
343 my $listener_count = 0;
344 {
345 lock(%listeners);
346 if (defined $listeners{$argument})
347 {
348 delete $listeners{$argument};
349 }
350 $listener_count = scalar(keys(%listeners));
351 # //unlock(%listeners)
352 }
353 lock($should_stop);
354 if ($should_stop == 1 && $listener_count == 0)
355 {
356 # server isn't shared, but the stop data member is!
357 $server->stop;
358 $result = "[SUCCESS] removed last listener, stopping";
359 }
360 else
361 {
362 $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
363 }
364 # //unlock($should_stop)
365 }
366 # we may be asked to stop the server, but only by the process that created
367 # us. If there are no listeners registered, we stop straight away,
368 # otherwise we set a flag so that as soon as there are no listeners we
369 # stop.
370 elsif ($command eq "q")
371 {
372 if ($argument ne $parent_pid && $argument ne "*")
373 {
374 $result = "[IGNORED] can only be stopped by parent process";
375 }
376 else
377 {
378 my $listener_count = 0;
379 {
380 lock(%listeners);
381 $listener_count = scalar(keys(%listeners));
382 # //unlock(%listeners)
383 }
384 if ($listener_count == 0)
385 {
386 # server isn't shared, but the stop data member is!
387 $server->stop;
388 $result = "[SUCCESS] stopping";
389 }
390 else
391 {
392 lock($should_stop);
393 $should_stop = 1;
394 $result = "[PENDING] will stop when no more listeners";
395 # //unlock($should_stop)
396 }
397 }
398 }
399 }
400 # Everything thing else should be a TDB command
401 # form <database>:<key>:<value>
402 # where: database is [d]oc, [i]ndex, or [s]rc
403 elsif ($data =~ /^([dis]):(.+)$/s)
404 {
405 my $database = $1;
406 my $record = $2;
407 # Build path to database file
408 my $tdb_path = '';
409 if ($database eq 'd')
410 {
411 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
412 }
413 elsif ($database eq 's')
414 {
415 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
416 }
417 else
418 {
419 $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
420 }
421 # Open harness to TDBCLI
422 my @tdb_command = ($tdbexe, $tdb_path);
423 my $buffer_to_tdb = '';
424 my $buffer_from_tdb = '';
425 my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
426 # Check the harness worked
427 if (!pumpable $tdb_harness)
428 {
429 die("Error! Harness to " . $tdbexe . " has gone away!");
430 }
431 # - write the data to the TDBCLI
432 $buffer_to_tdb = $record . "\n";
433 pump($tdb_harness) while (length($buffer_to_tdb));
434 # - read any response from TDBCLI
435 pump($tdb_harness) until ($buffer_from_tdb =~ /-{70}/);
436 # - not that this result doesn't include the [Server] prefix as it
437 # may be parsed for data by the client
438 $result = $buffer_from_tdb;
439 chomp($result);
440 # Finished with harness
441 finish($tdb_harness);
442 }
443 # Synchronized debug log writing
444 if ($debug)
445 {
446 lock($debug_log);
447 $|++;
448 print "[" . time() . "|" . $tid . "|SEND] " . $result . "\n\n";
449 $|--;
450 # //unlock($debug_log);
451 }
452 return $result;
453}
454
455sub printUsageAndExit
456{
457 my ($msg) = @_;
458 print "$msg\n\n";
459 print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
460 exit(0);
461}
462
4631;
Note: See TracBrowser for help on using the repository browser.