source: gs2-extensions/parallel-building/trunk/src/perllib/dbutil/tdbcluster.pm@ 29258

Last change on this file since 29258 was 29258, checked in by jmt12, 10 years ago

Initial checkin of a new TDB infodb that allows each worker thread in a parallel import to write its own TDB database, and then merges all the files back together at the end of the import

File size: 8.1 KB
Line 
1###########################################################################
2#
3# dbutil::tdb -- utility functions for writing to tdb databases. Should be
4# hauntingly similar to GDBM utility functions.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2011
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package dbutil::tdbcluster;
29
30# Pragma
31use strict;
32
33# Libraries/Modules
34use Cwd;
35use dbutil::gdbmtxtgz;
36use dbutil::tdb;
37
38# -----------------------------------------------------------------------------
39# TDB IMPLEMENTATION
40# -----------------------------------------------------------------------------
41
42our %handle_pool;
43
44END {
45 # Close any handles left open in the pool
46 foreach my $pool_key (keys %handle_pool)
47 {
48 my $infodb_file_handle = $handle_pool{$pool_key};
49 close_infodb_write_handle($infodb_file_handle, 1);
50 }
51}
52
53# /**
54# */
55sub get_tdb_executable
56{
57 my $program = shift(@_);
58 my $program_exe = &util::filename_cat($program . &util::get_os_exe());
59 #if (!-x $program_exe)
60 #{
61 # die('Fatal Error! File doesn\'t exist or isn\'t executable: ' . $program_exe);
62 #}
63 return $program_exe;
64}
65# /** get_tdb_executable() **/
66
67
68
69# /**
70# */
71sub open_infodb_write_handle
72{
73 my $infodb_file_path = shift(@_);
74 my $opt_append = shift(@_);
75
76 my $txt2tdb_exe = &dbutil::tdb::get_tdb_executable('txt2tdb');
77
78 my $pool_key = $infodb_file_path;
79 #my $cmd = "taskset -c 5 \"$txt2tdb_exe\"";
80 my $cmd = "\"$txt2tdb_exe\"";
81 if ((defined $opt_append) && ($opt_append eq "append"))
82 {
83 $cmd .= " -append";
84 $pool_key .= '-append';
85 }
86 $cmd .= " \"$infodb_file_path\"";
87 #$cmd .= " -debug"; # Uncomment to enable debug timing
88
89 # we're going to pipe the key value pairs, in the appropriate format, from
90 # within the buildproc, so we create a piped handle here
91 my $infodb_file_handle = undef;
92 if (defined $handle_pool{$pool_key})
93 {
94 $infodb_file_handle = $handle_pool{$pool_key};
95 }
96 else
97 {
98 print STDERR "tdbcluster::open_infodb_write_handle(" . $infodb_file_path . ")\n";
99 if(!open($infodb_file_handle, "| $cmd"))
100 {
101 print STDERR "Error: Failed to open pipe to $cmd\n";
102 print STDERR " $!\n";
103 return undef;
104 }
105 binmode($infodb_file_handle,":utf8");
106 $handle_pool{$pool_key} = $infodb_file_handle;
107 }
108 return $infodb_file_handle;
109}
110# /** open_infodb_write_handle() **/
111
112# /**
113# */
114sub close_infodb_write_handle
115{
116 my ($infodb_handle, $empty_pool) = @_;
117 if (defined $empty_pool && $empty_pool == 1)
118 {
119 print STDERR "tdbcluster::close_infodb_write_handle()\n";
120 close($infodb_handle);
121 }
122}
123# /** close_infodb_write_handle() **/
124
125# /**
126# */
127sub get_infodb_file_path
128{
129 my ($collection_name, $infodb_directory_path, $perform_firsttime_initialization, $hostname) = @_;
130 if (!defined $perform_firsttime_initialization)
131 {
132 $perform_firsttime_initialization = 0;
133 }
134 if (!defined $hostname)
135 {
136 $hostname = `hostname -s`;
137 $hostname =~ s/^\s*|\s*$//gi;
138
139 # Special case: look up the head node (top of list) from mpi.conf and do
140 # not add suffix if a match
141 if ($hostname eq 'medusa')
142 {
143 $hostname = '';
144 }
145 }
146 ###rint STDERR 'tdbcluster::get_infodb_file_path("' . $collection_name . '","' . $infodb_directory_path . '","' . $hostname . '")' . "\n";
147 my $infodb_file_extension = '.tdb';
148 if ($hostname ne '')
149 {
150 $infodb_file_extension = '.' . $hostname . $infodb_file_extension;
151 }
152 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
153 my $path = &util::filename_cat($infodb_directory_path, $infodb_file_name);
154 ###rint STDERR ' => ' . $path . "\n";
155 return $path;
156}
157# /** get_infodb_file_path() **/
158
159# /**
160# */
161sub read_infodb_file
162{
163 my $infodb_file_path = shift(@_);
164 my $infodb_map = shift(@_);
165
166 my $tdb2txt_exe = &dbutil::tdb::get_tdb_executable('tdb2txt');
167
168 if (!open (PIPEIN, "\"$tdb2txt_exe\" \"$infodb_file_path\" |"))
169 {
170 print STDERR 'Error: Failed to open pipe to ' . $tdb2txt_exe . "\n";
171 print STDERR " $!\n";
172 return undef;
173 }
174
175 binmode(PIPEIN,":utf8");
176
177 my $infodb_line = "";
178 my $infodb_key = "";
179 my $infodb_value = "";
180 while (defined ($infodb_line = <PIPEIN>))
181 {
182 if ($infodb_line =~ /^\[([^\]]+)\]$/)
183 {
184 $infodb_key = $1;
185 }
186 elsif ($infodb_line =~ /^-{70}$/)
187 {
188 $infodb_map->{$infodb_key} = $infodb_value;
189 $infodb_key = "";
190 $infodb_value = "";
191 }
192 else
193 {
194 $infodb_value .= $infodb_line;
195 }
196 }
197
198 close (PIPEIN);
199}
200# /** read_infodb_file() **/
201
202# /**
203# */
204sub read_infodb_keys
205{
206 my $infodb_file_path = shift(@_);
207 my $infodb_map = shift(@_);
208
209 my $tdbkeys_exe = &dbutil::tdb::get_tdb_executable('tdbkeys');
210
211 if (!open (PIPEIN, "\"tdbkeys_exe\" \"$infodb_file_path\" |"))
212 {
213 die "couldn't open pipe from gdbmkeys \$infodb_file_path\"\n$!\n";
214 }
215
216 binmode(PIPEIN,":utf8");
217
218 my $infodb_line = "";
219 my $infodb_key = "";
220 my $infodb_value = "";
221 while (defined ($infodb_line = <PIPEIN>))
222 {
223 # remove end of line
224 chomp $infodb_line;
225
226 $infodb_map->{$infodb_line} = 1;
227 }
228
229 close (PIPEIN);
230}
231# /** read_infodb_keys() **/
232
233# /**
234# */
235sub write_infodb_entry
236{
237 # With infodb_handle already set up, works the same as gdbm and gdbm_txtgz
238 # versions
239 &dbutil::gdbmtxtgz::write_infodb_entry(@_);
240}
241# /** write_infodb_entry() **/
242
243# /**
244# */
245sub write_infodb_rawentry
246{
247 # With infodb_handle already set up, works the same as gdbm and gdbm_txtgz
248 # versions
249 &dbutil::gdbmtxtgz::write_infodb_rawentry(@_);
250}
251# /** write_infodb_rawentry() **/
252
253# /**
254# */
255sub set_infodb_entry
256{
257 my $infodb_file_path = shift(@_);
258 my $infodb_key = shift(@_);
259 my $infodb_map = shift(@_);
260
261 # Protect metadata values that go inside quotes for tdbset
262 foreach my $k (keys %$infodb_map)
263 {
264 my @escaped_v = ();
265 foreach my $v (@{$infodb_map->{$k}})
266 {
267 if ($k eq "contains")
268 {
269 # protect quotes in ".2;".3 etc
270 $v =~ s/\"/\\\"/g;
271 push(@escaped_v, $v);
272 }
273 else
274 {
275 my $ev = &ghtml::unescape_html($v);
276 $ev =~ s/\"/\\\"/g;
277 push(@escaped_v, $ev);
278 }
279 }
280 $infodb_map->{$k} = \@escaped_v;
281 }
282
283 # Generate the record string
284 my $serialized_infodb_map = &dbutil::convert_infodb_hash_to_string($infodb_map);
285 ###rint STDERR "**** ser dr\n$serialized_infodb_map\n\n\n";
286
287 # Store it into GDBM
288 my $tdbset_exe = &dbutil::tdb::get_tdb_executable('tdbset');
289 my $cmd = "\"tdbset_exe\" \"$infodb_file_path\" \"$infodb_key\" \"$serialized_infodb_map\"";
290 my $status = system($cmd);
291
292 return $status;
293}
294# /** set_infodb_entry() **/
295
296# /**
297# */
298sub delete_infodb_entry
299{
300 # With infodb_handle already set up, works the same as gdbm and gdbm_txtgz
301 # versions
302 &dbutil::gdbmtxtgz::delete_infodb_entry(@_);
303}
304# /** delete_infodb_entry() **/
305
306
307## @function
308sub merge_databases
309{
310 my $source_infodb_file_path = shift(@_);
311 my $target_infodb_file_path = shift(@_);
312 # path specific filenames
313 my $txt2tdb_exe = &dbutil::tdb::get_tdb_executable('txt2tdb');
314 my $tdb2txt_exe = &dbutil::tdb::get_tdb_executable('tdb2txt');
315 my $cmd = $tdb2txt_exe . ' "' . $source_infodb_file_path . '" | ' . $txt2tdb_exe . ' -append "' . $target_infodb_file_path . '"';
316 ###rint STDERR "[DEBUG] cmd: " . $cmd . "\n";
317 my $status = system($cmd);
318 ###rint STDERR "[DEBUG] status: $status\n";
319 if ($status == 0)
320 {
321 &FileUtils::removeFiles($source_infodb_file_path);
322 }
323 return $status;
324}
325##
326
327
3281;
Note: See TracBrowser for help on using the repository browser.