root/gs2-extensions/tdb/trunk/perllib/DBDrivers/TDB.pm @ 30338

Revision 30338, 11.1 KB (checked in by jmt12, 4 years ago)

First versions of these drivers, that should be further refined to move repeated code to a parent class

  • Property svn:executable set to *
Line 
1###############################################################################
2#
3# DBDrivers/TDB.pm -- utility functions for writing to tdb databases. Should be
4#                     hauntingly similar to GDBM utility functions.
5#
6# A component of the Greenstone digital library software from the New Zealand
7# Digital Library Project at the University of Waikato, New Zealand.
8#
9# Copyright (C) 2011-2015 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19# more details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package DBDrivers::TDB;
28
29# Pragma
30use strict;
31
32# Libraries
33use Cwd;
34use Devel::Peek;
35use ghtml;
36use Scalar::Util 'refaddr';
37use util;
38# - OO inheritence
39use parent 'DBDrivers::GDBM';
40
41sub BEGIN
42{
43    if (!defined $ENV{'GSDLHOME'} || !defined $ENV{'GSDLOS'}) {
44        die("Error! Environment not prepared. Have you sourced setup.bash?\n");
45    }
46    if (!defined $ENV{'GEXTTDBEDIT_INSTALLED'}) {
47        die("Error! Path to TDB binaries not found. Have you sourced setup.bash?\n");
48    }
49}
50
51sub new
52{
53    my $class = shift(@_);
54
55    my $self = DBDrivers::GDBM->new();
56
57    # Default TDB file extension
58    $self->{'default_file_extension'} = 'tdb';
59    # Should the TDB used a specific affinity?
60    $self->{'forced_affinity'} = -1; # zero upwards indicates the affinity
61    # Keep track of all opened file handles
62    $self->{'handle_pool'} = {};
63    # Ask TDB executables to display debugging information?
64    $self->{'tdb_debug'} = 0; # 1 to enable
65
66    bless($self, $class);
67    return $self;
68}
69
70
71## @function DESTROY
72#
73# Built-in destructor block that, unlike END, gets passed a reference to self.
74# Responsible for properly closing any open database handles.
75#
76sub DESTROY
77{
78    my $self = shift(@_);
79    # Close all remaining filehandles
80    foreach my $infodb_file_path (keys(%{$self->{'handle_pool'}})) {
81    my $infodb_handle = $self->{'handle_pool'}->{$infodb_file_path};
82    # By passing the filepath as the second argument we instruct the driver
83    # that we actually want to close the connection by passing a non-zero
84    # value, but we sneakily optimize things a little as the close method
85    # can now check to see if it's been provided a file_path rather than
86    # having to search the handle pool for it. The file_path is needed to
87    # remove the closed handle from the pool anyway.
88    $self->close_infodb_write_handle($infodb_handle, $infodb_file_path);
89    }
90}
91## DESTROY(void) => void ##
92
93
94# -----------------------------------------------------------------------------
95#   TDB IMPLEMENTATION
96# -----------------------------------------------------------------------------
97
98
99## @function _get_tdb_executable(string)
100#
101sub _get_tdb_executable
102{
103    my $self = shift(@_);
104    my $program = shift(@_);
105    if (!defined $ENV{GEXTTDBEDIT_INSTALLED} || !-d $ENV{GEXTTDBEDIT_INSTALLED})
106    {
107    die('Fatal Error! Path to TDB binaries not found. Have you sourced setup.bash?');
108    }
109    my $program_exe = &util::filename_cat($ENV{GEXTTDBEDIT_INSTALLED} . '/bin/' . $program . &util::get_os_exe());
110    if (!-x $program_exe)
111    {
112    die('Fatal Error! File doesn\'t exist or isn\'t executable: ' . $program_exe);
113    }
114    return $program_exe;
115}
116## _get_tdb_executable(string) => string ##
117
118
119# Handled by BaseDBDriver
120# sub get_infodb_file_path(string, string)
121
122# With infodb_handle already set up, these functions work the same as parent version
123# sub delete_infodb_entry {}
124# sub write_infodb_entry {}
125# sub write_infodb_rawentry {}
126
127
128## @function close_infodb_write_handle(filehandle)
129#
130#  Some slight-of-hand here due to the way Perl passes variables to methods.
131#  Most of the time (i.e. under all the existing calls in the Greenstone code)
132#  this does nothing, as TDB handles can be left open and reused by multiple
133#  writers/readers (the exception being complete file reads, but they are
134#  handled in their own function anyway).
135#
136#  However TDB's version of this function will look for an extra variable and,
137#  if true (non-zero), will actually close the handle. Several methods below
138#  call close but also pass the infodb_file_path as the second argument, which
139#  is enough to have the connections properly closed.
140#
141#  Note that when this class passes from scope all open handles will be
142#  properly closed by the DESTROY block.
143#
144sub close_infodb_write_handle {
145    my $self = shift(@_);
146    my $infodb_handle = shift(@_);
147    my $actually_close = shift(@_); # Undefined most of the time
148    if (defined($actually_close)) {
149    $self->_debugPrint('(<infodb_handle>,"' . $actually_close . '")');
150    # We'll need the file path so we can locate and remove the entry in the
151    # handle pool
152    my $infodb_file_path = undef;
153    # Sometimes we can cheat, as the actually_close variable will have the
154    # file_path in it thanks to the DESTROY block above. Doing a regex on
155    # actually_close will treat it like a string no matter what it was, and
156    # we can search for the appropriate file extension that should be there
157    # for valid paths.
158    my $pattern = '\.' . $self->{'default_file_extension'} . '$';
159    if ($actually_close =~ /$pattern/) {
160        $infodb_file_path = $actually_close;
161    }
162    # If we can't cheat then we are stuck finding which connection in the
163    # handle_pool we are about to close. Need to compare objects using
164    # refaddr()
165    else {
166        foreach my $possible_file_path (values(%{$self->{'handle_pool'}})) {
167        my $possible_handle = $self->{'handle_pool'}->{$possible_file_path};
168        if (ref($infodb_handle) && ref($possible_handle) && refaddr($infodb_handle) == refaddr($possible_handle)) {
169            $infodb_file_path = $possible_file_path;
170            last;
171        }
172        }
173    }
174    if (defined($infodb_file_path)) {
175        delete($self->{'handle_pool'}->{$infodb_file_path});
176    }
177    else {
178        print STDERR "Warning! About to close TDB database handle, but couldn't locate in open handle pool.\n";
179    }
180    # Call GDBM's close to do the heavy-lifting
181    $self->SUPER::close_infodb_write_handle($infodb_handle);
182    }
183}
184## close_infodb_write_handle(filehandle) => void ##
185
186
187## @function open_infodb_write_handle(string, string)
188#
189sub open_infodb_write_handle
190{
191    my $self = shift(@_);
192    my $infodb_file_path = shift(@_);
193    my $opt_append = shift(@_);
194
195    my $txt2tdb_exe = $self->_get_tdb_executable('txt2tdb');
196
197    my $cmd = '"' . $txt2tdb_exe . '"';
198    if ((defined $opt_append) && ($opt_append eq "append")) {
199        $cmd .= ' -append';
200    }
201    $cmd .= ' "' . $infodb_file_path . '"';
202    # Optional flags
203    if ($self->{'forced_affinity'} >= 0) {
204        $cmd = 'taskset -c 5 ' . $cmd;
205    }
206    if ($self->{'debug'}) {
207        $cmd .= ' -debug';
208    }
209
210    # we're going to pipe the key value pairs, in the appropriate format, from
211    # within the buildproc, so we create a piped handle here
212    my $infodb_file_handle = undef;
213    # if the connection is already open, simply return it.
214    if (defined $self->{'handle_pool'}->{$infodb_file_path}) {
215    $infodb_file_handle = $self->{'handle_pool'}->{$infodb_file_path};
216    }
217    else {
218    $self->_debugPrint('(' . $infodb_file_path . ')');
219    if(!open($infodb_file_handle, "| $cmd")) {
220        print STDERR "Error: Failed to open pipe to $cmd\n";
221        print STDERR "       $!\n";
222        return undef;
223    }
224    binmode($infodb_file_handle,":utf8");
225    # Remember to store the newly created connection in the pool so we can
226    # re-use for subsequent calls.
227    $self->{'handle_pool'}->{$infodb_file_path} = $infodb_file_handle;
228    }
229    return $infodb_file_handle;
230}
231## open_infodb_write_handle(string, string) => filehandle ##
232
233
234## @function read_infodb_file
235#
236sub read_infodb_file
237{
238    my $self = shift(@_);
239    my $infodb_file_path = shift(@_);
240    my $infodb_map = shift(@_);
241
242    $self->_debugPrint('(' . $infodb_file_path . ', <hashmap>)');
243
244    my $tdb2txt_exe = $self->_get_tdb_executable('tdb2txt');
245
246    if (!open (PIPEIN, '"' . $tdb2txt_exe . '" "' . $infodb_file_path . '" |')) {
247        print STDERR 'Error: Failed to open pipe to ' . $tdb2txt_exe . "\n";
248        print STDERR "       $!\n";
249        return undef;
250    }
251
252    binmode(PIPEIN,":utf8");
253
254    my $infodb_line = "";
255    my $infodb_key = "";
256    my $infodb_value = "";
257    while (defined ($infodb_line = <PIPEIN>)) {
258        if ($infodb_line =~ /^\[([^\]]+)\]$/) {
259            $infodb_key = $1;
260        }
261        elsif ($infodb_line =~ /^-{70}$/) {
262            $infodb_map->{$infodb_key} = $infodb_value;
263            $infodb_key = "";
264            $infodb_value = "";
265        }
266        else {
267            $infodb_value .= $infodb_line;
268        }
269    }
270    close (PIPEIN);
271}
272## read_infodb_file(string, hashmap) => void ##
273
274
275## @function read_infodb_keys(string, hashmap)
276#
277sub read_infodb_keys
278{
279    my $self = shift(@_);
280    my $infodb_file_path = shift(@_);
281    my $infodb_map = shift(@_);
282
283    $self->_debugPrint('(' . $infodb_file_path . ', <hashmap>)');
284
285    my $tdbkeys_exe = $self->_get_tdb_executable('tdbkeys');
286
287    if (!open (PIPEIN, '"' . $tdbkeys_exe . '" "' . $infodb_file_path . '" |')) {
288        die("Error! Couldn't open pipe from read_infodb_keys: $infodb_file_path\n$!\n");
289    }
290
291    binmode(PIPEIN,":utf8");
292
293    my $infodb_line = "";
294    my $infodb_key = "";
295    my $infodb_value = "";
296    while (defined ($infodb_line = <PIPEIN>)) {
297        # remove end of line
298        chomp $infodb_line;
299        $infodb_map->{$infodb_line} = 1;
300    }
301
302    close (PIPEIN);
303}
304## read_infodb_keys(string, hashmap) => void ##
305
306
307## @function set_infodb_entry(string, string, hashmap)
308#
309sub set_infodb_entry
310{
311    my $self = shift(@_);
312    my $infodb_file_path = shift(@_);
313    my $infodb_key = shift(@_);
314    my $infodb_map = shift(@_);
315
316    $self->_debugPrint('(' . $infodb_file_path . ', ' . $infodb_key . ', <hashmap>)');
317
318    # Protect metadata values that go inside quotes for tdbset
319    foreach my $k (keys %$infodb_map) {
320        my @escaped_v = ();
321        foreach my $v (@{$infodb_map->{$k}}) {
322            if ($k eq "contains") {
323                # protect quotes in ".2;".3 etc
324                $v =~ s/\"/\\\"/g;
325                push(@escaped_v, $v);
326            }
327            else {
328                my $ev = &ghtml::unescape_html($v);
329                $ev =~ s/\"/\\\"/g;
330                push(@escaped_v, $ev);
331            }
332        }
333        $infodb_map->{$k} = \@escaped_v;
334    }
335
336    # Generate the record string
337    my $serialized_infodb_map = $self->_convert_infodb_hash_to_string($infodb_map);
338
339    # Store it into GDBM
340    my $tdbset_exe = $self->_get_tdb_executable('tdbset');
341    my $cmd = '"' . $tdbset_exe . '" "' . $infodb_file_path . '" "' . $infodb_key . '" "' . $serialized_infodb_map . '"';
342    my $status = system($cmd);
343
344    return $status;
345}
346## set_infodb_entry(string, string, hashmap) => integer ##
347
3481;
Note: See TracBrowser for help on using the browser.