root/gs2-extensions/tdb/trunk/perllib/DBDrivers/70HyphenFormat.pm @ 30347

Revision 30347, 12.8 KB (checked in by jmt12, 4 years ago)

Continuing to refactor driver code to move shared code up to parent classes. Have all the basic drivers done...

Line 
1###############################################################################
2#
3# 70HyphenFormat.pm -- The parent class of drivers that use the basic GS format
4#                      of a text obeying these rules:
5#
6#                      <line>      := <uniqueid> <metadata>+ <separator>
7#                      <uniqueid>  := \[[a-z][a-z0-9]*\]\n
8#                      <metadata>  := <[a-z][a-z0-9]*>(^-{70})+\n
9#                      <separator> := -{70}\n
10#
11#                      Contains some utility functions useful to any driver
12#                      that makes use of this format.
13#
14# A component of the Greenstone digital library software from the New Zealand
15# Digital Library Project at the University of Waikato, New Zealand.
16#
17# Copyright (C) 1999-2015 New Zealand Digital Library Project
18#
19# This program is free software; you can redistribute it and/or modify it under
20# the terms of the GNU General Public License as published by the Free Software
21# Foundation; either version 2 of the License, or (at your option) any later
22# version.
23#
24# This program is distributed in the hope that it will be useful, but WITHOUT
25# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
26# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
27# more details.
28#
29# You should have received a copy of the GNU General Public License along with
30# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
31# Ave, Cambridge, MA 02139, USA.
32#
33###############################################################################
34
35# Note: This driver may be a candidate for further splitting, maybe into a
36# PipedExecutableDriver and a 70HyphenFormatDriver... but for now all piped
37# drivers are 70 hyphen format ones, so, yeah.
38
39package DBDrivers::70HyphenFormat;
40
41# Pragma
42use strict;
43
44# Libraries
45use ghtml;
46use util;
47use FileUtils;
48use parent 'DBDrivers::BaseDBDriver';
49
50use constant {
51    RWMODE_READ  => '-|',
52    RWMODE_WRITE => '|-',
53};
54
55## @function constructor
56#
57sub new
58{
59    my $class = shift(@_);
60    my $self = DBDrivers::BaseDBDriver->new(@_);
61    $self->{'executable_path'} = 'error';
62    $self->{'keyread_executable'} = 'error';
63    $self->{'read_executable'} = 'error';
64    $self->{'write_executable'} = 'error';
65    $self->{'forced_affinity'} = -1; # Set to processor number for forced affinity
66    bless($self, $class);
67    return $self;
68}
69## new(void) => 70HyphenFormat ##
70
71
72################################## Protected ##################################
73
74
75## @function close_infodb_write_handle(filehandle)
76#
77sub close_infodb_write_handle
78{
79    my $self = shift(@_);
80    $self->debugPrintFunctionHeader(@_);
81    my $handle = shift(@_);
82    my $force_close = shift(@_); # Undefined most of the time
83    my $continue_close = $self->removeConnectionIfPersistent($handle, $force_close);
84    if ($continue_close) {
85    close($handle);
86    }
87    return;
88}
89## close_infodb_write_handle(filehandle) => void ##
90
91
92## @function convert_infodb_hash_to_string(hashmap) => string
93#
94sub convert_infodb_hash_to_string
95{
96    my $self = shift(@_);
97    my $infodb_map = shift(@_);
98    my $infodb_entry_value = "";
99    foreach my $infodb_value_key (keys(%$infodb_map)) {
100        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
101            $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
102        }
103    }
104    return $infodb_entry_value;
105}
106## convert_infodb_hash_to_string(hashmap) => string ##
107
108
109## @function convert_infodb_string_to_hash(string) => hashmap
110#
111sub convert_infodb_string_to_hash
112{
113    my $self = shift(@_);
114    my $infodb_entry_value = shift(@_);
115    my $infodb_map = ();
116
117    if (!defined $infodb_entry_value) {
118    print STDERR "Warning: No value to convert into a infodb hashtable\n";
119    }
120    else {
121        while ($infodb_entry_value =~ /^<(.*?)>(.*)$/mg) {
122            my $infodb_value_key = $1;
123            my $infodb_value = $2;
124
125            if (!defined($infodb_map->{$infodb_value_key})) {
126                $infodb_map->{$infodb_value_key} = [ $infodb_value ];
127            }
128            else {
129                push(@{$infodb_map->{$infodb_value_key}}, $infodb_value);
130            }
131    }
132    }
133
134    return $infodb_map;
135}
136## convert_infodb_string_to_hash(string) => hashmap ##
137
138
139## @function delete_infodb_entry(filehandle, string)
140#
141sub delete_infodb_entry
142{
143    my $self = shift(@_);
144    $self->debugPrintFunctionHeader(@_);
145    my $infodb_handle = shift(@_);
146    my $infodb_key = shift(@_);
147    # A minus at the end of a key (after the ]) signifies 'delete'
148    print $infodb_handle '[' . $infodb_key . ']-' . "\n";
149    # The 70 minus signs are also needed, to help make the parsing by db2txt simple
150    print $infodb_handle '-' x 70, "\n";
151}
152## delete_infodb_entry(filehandle, string) => void ##
153
154
155## @function open_infodb_write_handle(string, string)
156#
157sub open_infodb_write_handle
158{
159    my $self = shift(@_);
160    $self->debugPrintFunctionHeader(@_);
161    my $path = shift(@_);
162    my $append = shift(@_);
163    my $infodb_file_handle = $self->retrieveConnectionIfPersistent($path, $append);;
164    # No available existing connection
165    if (!defined $infodb_file_handle || !$infodb_file_handle) {
166        $infodb_file_handle = $self->openWriteHandle($path, $append, @_);
167    $self->registerConnectionIfPersistent($infodb_file_handle, $path, $append);
168    }
169    return $infodb_file_handle;
170}
171## open_infodb_write_handle(string, string) => filehandle ##
172
173
174## @function openPipedHandle(integer, string, string, string*) => filehandle
175#
176sub openPipedHandle
177{
178    my $self = shift(@_);
179    my $mode = shift(@_);
180    my $executable_and_default_args = shift(@_);
181    my $infodb_file_path = shift(@_);
182    my ($executable, $default_args) = $executable_and_default_args =~ /^([a-z0-9]+)\s*(.*)$/;
183    my $exe = &FileUtils::filenameConcatenate($self->{'executable_path'}, $executable . &util::get_os_exe());
184    if (!-e $exe) {
185    # Hope it's on path
186    $exe = $executable . &util::get_os_exe();
187    }
188    my $infodb_file_handle = undef;
189    my $cmd = '';
190    if ($self->{'forced_affinity'} >= 0)
191    {
192        $cmd = 'taskset -c ' . $self->{'forced_affinity'} . ' ';
193    }
194    $cmd .= '"' . $exe . '" ' . $default_args;
195    foreach my $open_arg (@_) {
196    # Special - append is typically missing a hyphen
197    if ($open_arg eq 'append') {
198        $open_arg = '-append';
199    }
200    $cmd .= ' ' . $open_arg;
201    }
202    $cmd .= ' "' . $infodb_file_path . '"';
203    $self->debugPrint("CMD: '" . $cmd . "'\n");
204    if(!open($infodb_file_handle, $mode . ':utf8', $cmd)) {
205        print STDERR "Error: Failed to open pipe to '$cmd'\n";
206        print STDERR "       $!\n";
207        return undef;
208    }
209    #binmode($infodb_file_handle,":utf8");
210    return $infodb_file_handle;
211}
212## openPipedHandle(integer, string, string, string*) => filehandle ##
213
214
215## @function openReadHandle(string, string) => filehandle
216#
217sub openReadHandle
218{
219    my $self = shift(@_);
220    return $self->openPipedHandle(RWMODE_READ, $self->{'read_executable'}, @_);
221}
222## openReadHandle(string, string) => filehandle
223
224
225## @function openWriteHandle(*) => filehandle
226#
227sub openWriteHandle
228{
229    my $self = shift(@_);
230    return $self->openPipedHandle(RWMODE_WRITE, $self->{'write_executable'}, @_);
231}
232## openWriteHandle(*) => filehandle ##
233
234
235## @function read_infodb_entry(string, string) => hashmap
236#
237sub read_infodb_entry
238{
239    my $self = shift(@_);
240    my $raw_string = $self->read_infodb_rawentry(@_);
241    my $infodb_rec = $self->convert_infodb_string_to_hash($raw_string);
242    return $infodb_rec;
243}
244## read_infodb_entry(string, string) => hashmap ##
245
246
247## @function read_infodb_file(string, hashmap) => void
248#
249sub read_infodb_file
250{
251    my $self = shift(@_);
252    my $infodb_file_path = shift(@_);
253    my $infodb_map = shift(@_);
254    $self->debugPrintFunctionHeader($infodb_file_path, $infodb_map);
255    my $infodb_file_handle = $self->openReadHandle($infodb_file_path);
256    my $infodb_line = "";
257    my $infodb_key = "";
258    my $infodb_value = "";
259    while (defined ($infodb_line = <$infodb_file_handle>)) {
260        $infodb_line =~ s/(\r\n)+$//; # more general than chomp
261        if ($infodb_line =~ /^\[([^\]]+)\]$/) {
262            $infodb_key = $1;
263        }
264        elsif ($infodb_line =~ /^-{70}$/) {
265            $infodb_map->{$infodb_key} = $infodb_value;
266            $infodb_key = "";
267            $infodb_value = "";
268        }
269        else {
270            $infodb_value .= $infodb_line;
271        }
272    }
273  $self->close_infodb_write_handle($infodb_file_handle);
274}
275## read_infodb_file(string, hashmap) => void ##
276
277
278## @function read_infodb_keys(string, hashmap) => void
279#
280sub read_infodb_keys
281{
282    my $self = shift(@_);
283    my $infodb_file_path = shift(@_);
284    my $infodb_map = shift(@_);
285    my $infodb_file_handle = $self->openPipedHandle(RWMODE_READ, $self->{'keyread_executable'}, $infodb_file_path);
286    if (!$infodb_file_handle) {
287    die("Couldn't open pipe from gdbmkeys: " . $infodb_file_path . "\n");
288    }
289    my $infodb_line = "";
290    my $infodb_key = "";
291    my $infodb_value = "";
292    # Simple case - dedicated keyread exe, so keys are strings
293    if ($self->{'keyread_executable'} ne $self->{'read_executable'}) {
294    while (defined ($infodb_line = <$infodb_file_handle>)) {
295        $infodb_line =~ s/[\r\n]+$//;
296        $infodb_map->{$infodb_line} = 1;
297    }
298    }
299    # Slightly more difficult - have to parse keys out of 70hyphen format
300    else {
301    while (defined ($infodb_line = <$infodb_file_handle>)) {
302        if ($infodb_line =~ /^\[([^\]]+)\](-)?[\r\n]*$/) {
303        my $key = $1;
304        my $delete_flag = $2;
305        if (defined $delete_flag) {
306            delete $infodb_map->{$key}
307        }
308        else {
309            $infodb_map->{$key} = 1;
310        }
311        }
312    }
313    }
314    $self->close_infodb_write_handle($infodb_file_handle);
315}
316## read_infodb_keys(string, hashmap) => void ##
317
318
319## @function read_infodb_rawentry(string, string) => string
320#
321# !! TEMPORARY: Slow and naive implementation that just reads the entire file
322# and picks out the one value. This should one day be replaced with database-
323# specific versions that will use dbget etc.
324#
325sub read_infodb_rawentry
326{
327    my $self = shift(@_);
328    my $infodb_file_path = shift(@_);
329    my $infodb_key = shift(@_);
330    # temporary hashmap... we're only interested in one entry
331    my $infodb_map = {};
332    $self->read_infodb_file($infodb_file_path, $infodb_map);
333    return $infodb_map->{$infodb_key};
334}
335## read_infodb_rawentry(string, string) => string ##
336
337
338## @function set_infodb_entry(string, string, hashmap)
339#
340sub set_infodb_entry
341{
342    my $self = shift(@_);
343    my $infodb_file_path = shift(@_);
344    my $infodb_key = shift(@_);
345    my $infodb_map = shift(@_);
346
347    # HTML escape anything that is not part of the "contains" metadata value
348    foreach my $k (keys %$infodb_map) {
349    my @escaped_v = ();
350    foreach my $v (@{$infodb_map->{$k}}) {
351        if ($k eq "contains") {
352        push(@escaped_v, $v);
353        }
354        else {
355        my $ev = &ghtml::unescape_html($v);
356        push(@escaped_v, $ev);
357        }
358    }
359    $infodb_map->{$k} = \@escaped_v;
360    }
361
362    # Generate the record string
363    my $serialized_infodb_map = $self->convert_infodb_hash_to_string($infodb_map);
364
365    # Store it into DB using '... -append' which despite its name actually
366    # replaces the record if it already exists
367    my $status = undef;
368    my $infodb_file_handle = $self->openWriteHandle($infodb_file_path, '-append');
369    if (!$infodb_file_handle) {
370    print STDERR "Error: set_infodb_entry() failed to open pipe to: " . $infodb_file_handle ."\n";
371    print STDERR "       $!\n";
372    $status = -1;
373    }
374    else {
375    print $infodb_file_handle "[$infodb_key]\n";
376    print $infodb_file_handle "$serialized_infodb_map\n";
377    $self->close_infodb_write_handle($infodb_file_handle);
378    $status = 0; # as in exit status of cmd OK
379    }
380    return $status;
381}
382## set_infodb_entry(string, string, hashmap) => integer ##
383
384
385## @function write_infodb_entry(filehandle, string, hashmap)
386#
387sub write_infodb_entry
388{
389    my $self = shift(@_);
390    my $infodb_handle = shift(@_);
391    my $infodb_key = shift(@_);
392    my $infodb_map = shift(@_);
393
394    print $infodb_handle "[$infodb_key]\n";
395    foreach my $infodb_value_key (sort keys(%$infodb_map)) {
396        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
397            if ($infodb_value =~ /-{70,}/) {
398                # if value contains 70 or more hyphens in a row we need to escape them
399                # to prevent txt2db from treating them as a separator
400                $infodb_value =~ s/-/&\#045;/gi;
401            }
402            print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
403        }
404    }
405    print $infodb_handle '-' x 70, "\n";
406}
407## write_infodb_entry(filehandle, string, hashmap) => void ##
408
409
410## @function write_infodb_rawentry(filehandle, string, string)
411#
412sub write_infodb_rawentry
413{
414    my $self = shift(@_);
415    my $infodb_handle = shift(@_);
416    my $infodb_key = shift(@_);
417    my $infodb_val = shift(@_);
418
419    print $infodb_handle "[$infodb_key]\n";
420    print $infodb_handle "$infodb_val\n";
421    print $infodb_handle '-' x 70, "\n";
422}
423## write_infodb_rawentry(filehandle, string, string) ##
424
425
4261;
Note: See TracBrowser for help on using the browser.