source: gs2-extensions/tdb/trunk/perllib/DBDrivers/TDB.pm@ 30338

Last change on this file since 30338 was 30338, checked in by jmt12, 8 years ago

First versions of these drivers, that should be further refined to move repeated code to a parent class

  • Property svn:executable set to *
File size: 11.1 KB
Line 
1###############################################################################
2#
3# DBDrivers/TDB.pm -- utility functions for writing to tdb databases. Should be
4# hauntingly similar to GDBM utility functions.
5#
6# A component of the Greenstone digital library software from the New Zealand
7# Digital Library Project at the University of Waikato, New Zealand.
8#
9# Copyright (C) 2011-2015 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19# more details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package DBDrivers::TDB;
28
29# Pragma
30use strict;
31
32# Libraries
33use Cwd;
34use Devel::Peek;
35use ghtml;
36use Scalar::Util 'refaddr';
37use util;
38# - OO inheritence
39use parent 'DBDrivers::GDBM';
40
41sub BEGIN
42{
43 if (!defined $ENV{'GSDLHOME'} || !defined $ENV{'GSDLOS'}) {
44 die("Error! Environment not prepared. Have you sourced setup.bash?\n");
45 }
46 if (!defined $ENV{'GEXTTDBEDIT_INSTALLED'}) {
47 die("Error! Path to TDB binaries not found. Have you sourced setup.bash?\n");
48 }
49}
50
51sub new
52{
53 my $class = shift(@_);
54
55 my $self = DBDrivers::GDBM->new();
56
57 # Default TDB file extension
58 $self->{'default_file_extension'} = 'tdb';
59 # Should the TDB used a specific affinity?
60 $self->{'forced_affinity'} = -1; # zero upwards indicates the affinity
61 # Keep track of all opened file handles
62 $self->{'handle_pool'} = {};
63 # Ask TDB executables to display debugging information?
64 $self->{'tdb_debug'} = 0; # 1 to enable
65
66 bless($self, $class);
67 return $self;
68}
69
70
71## @function DESTROY
72#
73# Built-in destructor block that, unlike END, gets passed a reference to self.
74# Responsible for properly closing any open database handles.
75#
76sub DESTROY
77{
78 my $self = shift(@_);
79 # Close all remaining filehandles
80 foreach my $infodb_file_path (keys(%{$self->{'handle_pool'}})) {
81 my $infodb_handle = $self->{'handle_pool'}->{$infodb_file_path};
82 # By passing the filepath as the second argument we instruct the driver
83 # that we actually want to close the connection by passing a non-zero
84 # value, but we sneakily optimize things a little as the close method
85 # can now check to see if it's been provided a file_path rather than
86 # having to search the handle pool for it. The file_path is needed to
87 # remove the closed handle from the pool anyway.
88 $self->close_infodb_write_handle($infodb_handle, $infodb_file_path);
89 }
90}
91## DESTROY(void) => void ##
92
93
94# -----------------------------------------------------------------------------
95# TDB IMPLEMENTATION
96# -----------------------------------------------------------------------------
97
98
99## @function _get_tdb_executable(string)
100#
101sub _get_tdb_executable
102{
103 my $self = shift(@_);
104 my $program = shift(@_);
105 if (!defined $ENV{GEXTTDBEDIT_INSTALLED} || !-d $ENV{GEXTTDBEDIT_INSTALLED})
106 {
107 die('Fatal Error! Path to TDB binaries not found. Have you sourced setup.bash?');
108 }
109 my $program_exe = &util::filename_cat($ENV{GEXTTDBEDIT_INSTALLED} . '/bin/' . $program . &util::get_os_exe());
110 if (!-x $program_exe)
111 {
112 die('Fatal Error! File doesn\'t exist or isn\'t executable: ' . $program_exe);
113 }
114 return $program_exe;
115}
116## _get_tdb_executable(string) => string ##
117
118
119# Handled by BaseDBDriver
120# sub get_infodb_file_path(string, string)
121
122# With infodb_handle already set up, these functions work the same as parent version
123# sub delete_infodb_entry {}
124# sub write_infodb_entry {}
125# sub write_infodb_rawentry {}
126
127
128## @function close_infodb_write_handle(filehandle)
129#
130# Some slight-of-hand here due to the way Perl passes variables to methods.
131# Most of the time (i.e. under all the existing calls in the Greenstone code)
132# this does nothing, as TDB handles can be left open and reused by multiple
133# writers/readers (the exception being complete file reads, but they are
134# handled in their own function anyway).
135#
136# However TDB's version of this function will look for an extra variable and,
137# if true (non-zero), will actually close the handle. Several methods below
138# call close but also pass the infodb_file_path as the second argument, which
139# is enough to have the connections properly closed.
140#
141# Note that when this class passes from scope all open handles will be
142# properly closed by the DESTROY block.
143#
144sub close_infodb_write_handle {
145 my $self = shift(@_);
146 my $infodb_handle = shift(@_);
147 my $actually_close = shift(@_); # Undefined most of the time
148 if (defined($actually_close)) {
149 $self->_debugPrint('(<infodb_handle>,"' . $actually_close . '")');
150 # We'll need the file path so we can locate and remove the entry in the
151 # handle pool
152 my $infodb_file_path = undef;
153 # Sometimes we can cheat, as the actually_close variable will have the
154 # file_path in it thanks to the DESTROY block above. Doing a regex on
155 # actually_close will treat it like a string no matter what it was, and
156 # we can search for the appropriate file extension that should be there
157 # for valid paths.
158 my $pattern = '\.' . $self->{'default_file_extension'} . '$';
159 if ($actually_close =~ /$pattern/) {
160 $infodb_file_path = $actually_close;
161 }
162 # If we can't cheat then we are stuck finding which connection in the
163 # handle_pool we are about to close. Need to compare objects using
164 # refaddr()
165 else {
166 foreach my $possible_file_path (values(%{$self->{'handle_pool'}})) {
167 my $possible_handle = $self->{'handle_pool'}->{$possible_file_path};
168 if (ref($infodb_handle) && ref($possible_handle) && refaddr($infodb_handle) == refaddr($possible_handle)) {
169 $infodb_file_path = $possible_file_path;
170 last;
171 }
172 }
173 }
174 if (defined($infodb_file_path)) {
175 delete($self->{'handle_pool'}->{$infodb_file_path});
176 }
177 else {
178 print STDERR "Warning! About to close TDB database handle, but couldn't locate in open handle pool.\n";
179 }
180 # Call GDBM's close to do the heavy-lifting
181 $self->SUPER::close_infodb_write_handle($infodb_handle);
182 }
183}
184## close_infodb_write_handle(filehandle) => void ##
185
186
187## @function open_infodb_write_handle(string, string)
188#
189sub open_infodb_write_handle
190{
191 my $self = shift(@_);
192 my $infodb_file_path = shift(@_);
193 my $opt_append = shift(@_);
194
195 my $txt2tdb_exe = $self->_get_tdb_executable('txt2tdb');
196
197 my $cmd = '"' . $txt2tdb_exe . '"';
198 if ((defined $opt_append) && ($opt_append eq "append")) {
199 $cmd .= ' -append';
200 }
201 $cmd .= ' "' . $infodb_file_path . '"';
202 # Optional flags
203 if ($self->{'forced_affinity'} >= 0) {
204 $cmd = 'taskset -c 5 ' . $cmd;
205 }
206 if ($self->{'debug'}) {
207 $cmd .= ' -debug';
208 }
209
210 # we're going to pipe the key value pairs, in the appropriate format, from
211 # within the buildproc, so we create a piped handle here
212 my $infodb_file_handle = undef;
213 # if the connection is already open, simply return it.
214 if (defined $self->{'handle_pool'}->{$infodb_file_path}) {
215 $infodb_file_handle = $self->{'handle_pool'}->{$infodb_file_path};
216 }
217 else {
218 $self->_debugPrint('(' . $infodb_file_path . ')');
219 if(!open($infodb_file_handle, "| $cmd")) {
220 print STDERR "Error: Failed to open pipe to $cmd\n";
221 print STDERR " $!\n";
222 return undef;
223 }
224 binmode($infodb_file_handle,":utf8");
225 # Remember to store the newly created connection in the pool so we can
226 # re-use for subsequent calls.
227 $self->{'handle_pool'}->{$infodb_file_path} = $infodb_file_handle;
228 }
229 return $infodb_file_handle;
230}
231## open_infodb_write_handle(string, string) => filehandle ##
232
233
234## @function read_infodb_file
235#
236sub read_infodb_file
237{
238 my $self = shift(@_);
239 my $infodb_file_path = shift(@_);
240 my $infodb_map = shift(@_);
241
242 $self->_debugPrint('(' . $infodb_file_path . ', <hashmap>)');
243
244 my $tdb2txt_exe = $self->_get_tdb_executable('tdb2txt');
245
246 if (!open (PIPEIN, '"' . $tdb2txt_exe . '" "' . $infodb_file_path . '" |')) {
247 print STDERR 'Error: Failed to open pipe to ' . $tdb2txt_exe . "\n";
248 print STDERR " $!\n";
249 return undef;
250 }
251
252 binmode(PIPEIN,":utf8");
253
254 my $infodb_line = "";
255 my $infodb_key = "";
256 my $infodb_value = "";
257 while (defined ($infodb_line = <PIPEIN>)) {
258 if ($infodb_line =~ /^\[([^\]]+)\]$/) {
259 $infodb_key = $1;
260 }
261 elsif ($infodb_line =~ /^-{70}$/) {
262 $infodb_map->{$infodb_key} = $infodb_value;
263 $infodb_key = "";
264 $infodb_value = "";
265 }
266 else {
267 $infodb_value .= $infodb_line;
268 }
269 }
270 close (PIPEIN);
271}
272## read_infodb_file(string, hashmap) => void ##
273
274
275## @function read_infodb_keys(string, hashmap)
276#
277sub read_infodb_keys
278{
279 my $self = shift(@_);
280 my $infodb_file_path = shift(@_);
281 my $infodb_map = shift(@_);
282
283 $self->_debugPrint('(' . $infodb_file_path . ', <hashmap>)');
284
285 my $tdbkeys_exe = $self->_get_tdb_executable('tdbkeys');
286
287 if (!open (PIPEIN, '"' . $tdbkeys_exe . '" "' . $infodb_file_path . '" |')) {
288 die("Error! Couldn't open pipe from read_infodb_keys: $infodb_file_path\n$!\n");
289 }
290
291 binmode(PIPEIN,":utf8");
292
293 my $infodb_line = "";
294 my $infodb_key = "";
295 my $infodb_value = "";
296 while (defined ($infodb_line = <PIPEIN>)) {
297 # remove end of line
298 chomp $infodb_line;
299 $infodb_map->{$infodb_line} = 1;
300 }
301
302 close (PIPEIN);
303}
304## read_infodb_keys(string, hashmap) => void ##
305
306
307## @function set_infodb_entry(string, string, hashmap)
308#
309sub set_infodb_entry
310{
311 my $self = shift(@_);
312 my $infodb_file_path = shift(@_);
313 my $infodb_key = shift(@_);
314 my $infodb_map = shift(@_);
315
316 $self->_debugPrint('(' . $infodb_file_path . ', ' . $infodb_key . ', <hashmap>)');
317
318 # Protect metadata values that go inside quotes for tdbset
319 foreach my $k (keys %$infodb_map) {
320 my @escaped_v = ();
321 foreach my $v (@{$infodb_map->{$k}}) {
322 if ($k eq "contains") {
323 # protect quotes in ".2;".3 etc
324 $v =~ s/\"/\\\"/g;
325 push(@escaped_v, $v);
326 }
327 else {
328 my $ev = &ghtml::unescape_html($v);
329 $ev =~ s/\"/\\\"/g;
330 push(@escaped_v, $ev);
331 }
332 }
333 $infodb_map->{$k} = \@escaped_v;
334 }
335
336 # Generate the record string
337 my $serialized_infodb_map = $self->_convert_infodb_hash_to_string($infodb_map);
338
339 # Store it into GDBM
340 my $tdbset_exe = $self->_get_tdb_executable('tdbset');
341 my $cmd = '"' . $tdbset_exe . '" "' . $infodb_file_path . '" "' . $infodb_key . '" "' . $serialized_infodb_map . '"';
342 my $status = system($cmd);
343
344 return $status;
345}
346## set_infodb_entry(string, string, hashmap) => integer ##
347
3481;
Note: See TracBrowser for help on using the repository browser.