source: gs2-extensions/parallel-building/trunk/src/perllib/dbutil/gdbmserver.pm@ 27179

Last change on this file since 27179 was 27179, checked in by davidb, 11 years ago

Mods to allow code to run with Greenstone3

File size: 11.5 KB
Line 
1###########################################################################
2#
3# dbutil::gdbmserver -- utility functions for writing to gdbm databases but
4# implemented as a server with a single, persistent
5# connection
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29package dbutil::gdbmserver;
30
31use strict;
32use warnings;
33
34# We're going to have to delve into locking (a little) to prevent multiple
35# threads trying to launch the server at once
36use Fcntl qw(:flock);
37
38use GDBMClient;
39use util;
40
41my $hyphen70 = '-' x 70;
42my $debug = 0;
43
44# We have a global reference to all of the GDBM Server lockfiles that this
45# instance has created (as we'll be responsible for closing them)
46my %created_server_lockfile_paths;
47# Keep track of the lockfiles for server we have added ourselves as listeners
48# to.
49my %listener_server_lockfile_paths;
50# We also have a global of all of the listeners we have assigned as we'll
51# be responsible for removing them.
52my %registered_listeners;
53
54sub _spawnClient
55{
56 my ($infodb_file_path) = @_;
57
58 my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'},'tmp');
59 if (! &util::dir_exists($tmp_dir)) {
60 &util::mk_dir($tmp_dir,1);
61 }
62
63 # 1. Check whether the server is already running by trying to locate the
64 # server 'lock' file.
65 my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)/i;
66 my $server_lockfile_path = &util::filename_cat($ENV{'GSDLHOME'},'tmp','gdbmserver-' . $infodb_file . '.lock');
67 #rint " * Searching for lockfile: " . $server_lockfile_path . "\n";
68 # We need to lock here to ensure only one thread enters the following code,
69 # sees a missing GDBMServer, and launches it
70 my $tmp_lockfile_path = &util::filename_cat($ENV{'GSDLHOME'},'tmp','dbutil-gdbmserver.lock');
71 open(TMPFH, '>', $tmp_lockfile_path) or die ("Error! Failed to open file for writing: " . $tmp_lockfile_path . "\nReason: " . $! . "\n");
72 flock(TMPFH, LOCK_EX) or die("Error! Cannot lock file exclusively: " . $tmp_lockfile_path . "\nReason: " . $! . "\n");
73 print TMPFH localtime();
74
75 # - If the file doesn't exist...
76 if (!-e $server_lockfile_path)
77 {
78 # ...start it!
79 my $launch_cmd = 'GDBMServer.pl "' . $$ . '" "' . $infodb_file_path . '"';
80 print "* Starting GDBMServer for: " . $infodb_file_path . "\n";
81 # @note I once had the below pipe ending with 2>&1 |, but that then blocks
82 # indefinitely when looping and reading <SERVERIN>.
83 open(SERVERIN, $launch_cmd . ' |') or die("Error! Failed to run launch command: " . $launch_cmd . "\nReason: " . $! . "\n");
84 # read all the output from the server
85 my $line = '';
86 my $server_lock_file_created = 0;
87 while ($line = <SERVERIN>)
88 {
89 # - watch for the line indicating a lock file has been created and
90 # populated with a sexy port number
91 if ($line =~ /Server now listening/)
92 {
93 $server_lock_file_created = 1;
94 }
95 # - we could also watch for errors here
96 if ($debug)
97 {
98 if ($line !~ /\n/)
99 {
100 $line .= "\n";
101 }
102 $|++; # autoflush
103 print "[gdbmserver] $line";
104 $|--; # disable autoflush
105 }
106 }
107 close(SERVERIN);
108 if (!$server_lock_file_created)
109 {
110 die("Error! GDBMServer failed to create lock file. Check server logs.");
111 }
112 # record this for later
113 $created_server_lockfile_paths{$server_lockfile_path} = 1;
114 }
115 flock(TMPFH, LOCK_UN);
116 close($tmp_lockfile_path);
117 unlink($tmp_lockfile_path);
118 # record this for later
119 $listener_server_lockfile_paths{$server_lockfile_path} = $infodb_file_path;
120 return GDBMClient->new($server_lockfile_path);
121}
122
123END
124{
125 # we ask the server to shutdown, but only the 'creator' thread will actually
126 # be able to, and only once all listeners have deregistered.
127 foreach my $server_lockfile_path (keys (%listener_server_lockfile_paths))
128 {
129 my $infodb_file_path = $listener_server_lockfile_paths{$server_lockfile_path};
130 my $gdbm_client_handle = GDBMClient->new($server_lockfile_path);
131 # Deregister all of our registered listeners
132 foreach my $listener_suffix (keys(%registered_listeners))
133 {
134 $gdbm_client_handle->removeListener($listener_suffix);
135 }
136 # ask the servers we created to shut down (all other threads will have
137 # this request ignored)
138 if (defined $created_server_lockfile_paths{$infodb_file_path})
139 {
140 print "* Attempting to stop GDBMServer for: " . $infodb_file_path . "\n";
141 }
142 $gdbm_client_handle->query('!stop:' . $$);
143 }
144 # we should now wait until all of our server_lockfiles have actually been
145 # removed (otherwise people could mistakenly run import/build again
146 # immediately and things *might* go pearshaped).
147 foreach my $server_lockfile_path (keys (%created_server_lockfile_paths))
148 {
149 # While the file exists, we should wait
150 print "* Waiting for GDBMServer [" . $server_lockfile_path . "] to exit...";
151 if (-e $server_lockfile_path)
152 {
153 while (-e $server_lockfile_path)
154 {
155 print ".";
156 sleep(1);
157 }
158 }
159 print " Done!\n";
160 }
161}
162
163# -----------------------------------------------------------------------------
164# GDBM SERVER IMPLEMENTATION
165# -----------------------------------------------------------------------------
166sub open_infodb_write_handle
167{
168 my $infodb_file_path = shift(@_);
169 my $opt_append = shift(@_);
170 if (defined $opt_append && $opt_append ne "append")
171 {
172 print "Warning! GDBM modes other than 'append' not supported for GDBMServer.\n";
173 }
174 my $gdbm_client_handle = &_spawnClient($infodb_file_path);
175 # Register this client on the server if necessary
176 $gdbm_client_handle->addListener('w');
177 $registered_listeners{'w'} = 1;
178 # and pass the handle to client around
179 return $gdbm_client_handle;
180}
181
182# /** Destructor or near enough.
183# /*
184sub close_infodb_write_handle
185{
186 my $gdbm_client_handle = shift(@_);
187}
188# /** close_infodb_write_handle($infodb_handle) **/
189
190# /** @function get_info_db_file_path
191# * Exactly the same as vanilla GDBM - as we are still using a GDBM database
192# * just accessing it via a persistant server
193# */
194sub get_infodb_file_path
195{
196 my $collection_name = shift(@_);
197 my $infodb_directory_path = shift(@_);
198 my $create_server = shift(@_);
199
200 my $infodb_file_extension = ".gdb";
201 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
202 my $infodb_file_path = &util::filename_cat($infodb_directory_path, $infodb_file_name);
203
204 # Special Case for GDBMServer
205 if (defined $create_server && $create_server == 1)
206 {
207 my $tmp_collect_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},'tmp');
208 if (! &util::dir_exists($tmp_collect_dir)) {
209 &util::mk_dir($tmp_collect_dir,1);
210 }
211
212 my $gdbm_client_handle = &_spawnClient($infodb_file_path);
213 # Register this client on the server if necessary
214 $gdbm_client_handle->addListener('i');
215 $registered_listeners{'i'} = 1;
216 }
217
218 # Resuming our regular programming
219 return $infodb_file_path;
220}
221
222sub read_infodb_file
223{
224 my $infodb_file_path = shift(@_);
225 my $infodb_map = shift(@_);
226 my $gdbm_client_handle = &_spawnClient($infodb_file_path);
227 $gdbm_client_handle->addListener('r');
228 $registered_listeners{'r'} = 1;
229 # retrieves all the keys in the form:
230 # [key1]\n[key2]\n[key3]\n...[keyn]
231 my $raw_infodb_keys = $gdbm_client_handle->query('[*]?');
232
233 my @infodb_keys = split(/\r?\n/, $raw_infodb_keys);
234 foreach my $infodb_key (@infodb_keys)
235 {
236 if ($infodb_key =~ /.+/ && $infodb_key !~ /-{70}/)
237 {
238 # lookup each key
239 my $infodb_value = $gdbm_client_handle->query('[' . $infodb_key . ']?');
240 # store it
241 $infodb_map->{$infodb_key} = $infodb_value;
242 }
243 }
244}
245
246sub read_infodb_keys
247{
248 my $infodb_file_path = shift(@_);
249 my $infodb_map = shift(@_);
250 # spawn client (creating server as necessary)
251 my $gdbm_client_handle = &_spawnClient($infodb_file_path);
252 # register ourself as listener
253 $gdbm_client_handle->addListener('k');
254 $registered_listeners{'k'} = 1;
255 # retrieves all the keys in the form:
256 # [key1]\n[key2]\n[key3]\n...[keyn]
257 my $raw_infodb_keys = $gdbm_client_handle->query('[*]?');
258 my @infodb_keys = split(/\r?\n/, $raw_infodb_keys);
259 foreach my $infodb_key (@infodb_keys)
260 {
261 if ($infodb_key =~ /.+/ && $infodb_key !~ /-{70}/)
262 {
263 $infodb_map->{$infodb_key} = 1;
264 }
265 }
266}
267
268sub write_infodb_entry
269{
270 my $gdbm_client_handle = shift(@_);
271 my $infodb_key = shift(@_);
272 my $infodb_map = shift(@_);
273 # - build up the gdbm command
274 my $gdbm_command = "[" . $infodb_key . "]+\n";
275 foreach my $infodb_value_key (keys(%$infodb_map))
276 {
277 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
278 {
279 if ($infodb_value =~ /-{70,}/)
280 {
281 # if value contains 70 or more hyphens in a row we need to escape them
282 # to prevent txt2db from treating them as a separator
283 $infodb_value =~ s/-/&\#045;/gi;
284 }
285 $gdbm_command .= "<" . $infodb_value_key . ">" . $infodb_value . "\n";
286 }
287 }
288 $gdbm_command .= $hyphen70 . "\n";
289 # - ask the client to transmit the command to the server
290 $gdbm_client_handle->query($gdbm_command);
291}
292
293sub write_infodb_rawentry
294{
295 my $gdbm_client_handle = shift(@_);
296 my $infodb_key = shift(@_);
297 my $infodb_val = shift(@_);
298 # - build up the gdbm command
299 my $gdbm_command = "[" . $infodb_key . "]\n";
300 $gdbm_command .= $infodb_val . "\n";
301 $gdbm_command .= $hyphen70 . "\n";
302 # - ask the client to transmit the command to the server
303 $gdbm_client_handle->query($gdbm_command);
304 return 1;
305}
306
307sub set_infodb_entry
308{
309 my $infodb_file_path = shift(@_);
310 my $infodb_key = shift(@_);
311 my $infodb_map = shift(@_);
312 # spawn client (creating server as necessary)
313 my $gdbm_client_handle = &_spawnClient($infodb_file_path);
314 $gdbm_client_handle->addListener('s');
315 $registered_listeners{'s'} = 1;
316 # Protect metadata values that go inside quotes for gdbmset
317 foreach my $k (keys %$infodb_map)
318 {
319 my @escaped_v = ();
320 foreach my $v (@{$infodb_map->{$k}})
321 {
322 if ($k eq "contains")
323 {
324 # protect quotes in ".2;".3 etc
325 $v =~ s/\"/\\\"/g;
326 push(@escaped_v, $v);
327 }
328 else
329 {
330 my $ev = &ghtml::unescape_html($v);
331 $ev =~ s/\"/\\\"/g;
332 push(@escaped_v, $ev);
333 }
334 }
335 $infodb_map->{$k} = \@escaped_v;
336 }
337 # Generate the record string (GDBM command)
338 my $gdbm_command = "[" . $infodb_key . "]\n";
339 $gdbm_command .= &dbutil::convert_infodb_hash_to_string($infodb_map) . "\n";
340 $gdbm_command .= $hyphen70 . "\n";
341 # Send command to server
342 $gdbm_client_handle->query($gdbm_command);
343}
344
345sub delete_infodb_entry
346{
347 my $gdbm_client_handle = shift(@_);
348 my $infodb_key = shift(@_);
349 # - create command
350 my $gdbm_command = "[" . $infodb_key . "]-\n";
351 # - and send
352 $gdbm_client_handle->query($gdbm_command);
353}
354
355
356
3571;
Note: See TracBrowser for help on using the repository browser.