root/gs2-extensions/tdb/trunk/perllib/DBDrivers/70HyphenFormat.pm @ 30341

Revision 30341, 11.7 KB (checked in by jmt12, 4 years ago)

Ability to notice delete entries in 70 hyphen format when retrieving keys - added for GDBMTXTGZ

Line 
1###############################################################################
2#
3# 70HyphenFormat.pm -- The parent class of drivers that use the basic GS format
4#                      of a text obeying these rules:
5#
6#                      <line>      := <uniqueid> <metadata>+ <separator>
7#                      <uniqueid>  := \[[a-z][a-z0-9]*\]\n
8#                      <metadata>  := <[a-z][a-z0-9]*>(^-{70})+\n
9#                      <separator> := -{70}\n
10#
11#                      Contains some utility functions useful to any driver
12#                      that makes use of this format.
13#
14# A component of the Greenstone digital library software from the New Zealand
15# Digital Library Project at the University of Waikato, New Zealand.
16#
17# Copyright (C) 1999-2015 New Zealand Digital Library Project
18#
19# This program is free software; you can redistribute it and/or modify it under
20# the terms of the GNU General Public License as published by the Free Software
21# Foundation; either version 2 of the License, or (at your option) any later
22# version.
23#
24# This program is distributed in the hope that it will be useful, but WITHOUT
25# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
26# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
27# more details.
28#
29# You should have received a copy of the GNU General Public License along with
30# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
31# Ave, Cambridge, MA 02139, USA.
32#
33###############################################################################
34
35package DBDrivers::70HyphenFormat;
36
37# Pragma
38use strict;
39
40# Libraries
41use ghtml;
42use util;
43use FileUtils;
44use parent 'DBDrivers::BaseDBDriver';
45
46use constant {
47    RWMODE_READ  => '-|',
48    RWMODE_WRITE => '|-',
49};
50
51## @function constructor
52#
53sub new
54{
55    my $class = shift(@_);
56    my $self = DBDrivers::BaseDBDriver->new(@_);
57    $self->{'executable_path'} = 'error';
58    $self->{'keyread_executable'} = 'error';
59    $self->{'read_executable'} = 'error';
60    $self->{'write_executable'} = 'error';
61    bless($self, $class);
62    return $self;
63}
64## new(void) => 70HyphenFormat ##
65
66
67################################## Protected ##################################
68
69
70## @function close_infodb_handle(filehandle)
71#
72sub close_infodb_handle
73{
74    my $self = shift(@_);
75    my $infodb_handle = shift(@_);
76    $self->debugPrintFunctionHeader();
77    close($infodb_handle);
78}
79## close_infodb_handle(filehandle) => void ##
80
81
82## @function close_infodb_write_handle(filehandle)
83#
84sub close_infodb_write_handle
85{
86    my $self = shift(@_);
87    $self->close_infodb_handle(@_);
88}
89## close_infodb_write_handle(filehandle) => void ##
90
91
92## @function convert_infodb_hash_to_string(hashmap) => string
93#
94sub convert_infodb_hash_to_string
95{
96    my $self = shift(@_);
97    my $infodb_map = shift(@_);
98    my $infodb_entry_value = "";
99    foreach my $infodb_value_key (keys(%$infodb_map)) {
100        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
101            $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
102        }
103    }
104    return $infodb_entry_value;
105}
106## convert_infodb_hash_to_string(hashmap) => string ##
107
108
109## @function convert_infodb_string_to_hash(string) => hashmap
110#
111sub convert_infodb_string_to_hash
112{
113    my $self = shift(@_);
114    my $infodb_entry_value = shift(@_);
115    my $infodb_map = ();
116
117    if (!defined $infodb_entry_value) {
118    print STDERR "Warning: No value to convert into a infodb hashtable\n";
119    }
120    else {
121        while ($infodb_entry_value =~ /^<(.*?)>(.*)$/mg) {
122            my $infodb_value_key = $1;
123            my $infodb_value = $2;
124
125            if (!defined($infodb_map->{$infodb_value_key})) {
126                $infodb_map->{$infodb_value_key} = [ $infodb_value ];
127            }
128            else {
129                push(@{$infodb_map->{$infodb_value_key}}, $infodb_value);
130            }
131    }
132    }
133
134    return $infodb_map;
135}
136## convert_infodb_string_to_hash(string) => hashmap ##
137
138
139## @function delete_infodb_entry(filehandle, string)
140#
141sub delete_infodb_entry
142{
143    my $self = shift(@_);
144    my $infodb_handle = shift(@_);
145    my $infodb_key = shift(@_);
146
147    # A minus at the end of a key (after the ]) signifies 'delete'
148    print $infodb_handle "[$infodb_key]-\n";
149
150    # The 70 minus signs are also needed, to help make the parsing by db2txt simple
151    print $infodb_handle '-' x 70, "\n";
152}
153## delete_infodb_entry(filehandle, string) => void ##
154
155
156## @function open_infodb_write_handle(string, string)
157#
158sub open_infodb_write_handle
159{
160    my $self = shift(@_);
161    $self->debugPrintFunctionHeader(@_);
162    my $infodb_file_handle = $self->openWriteHandle(@_);
163    return $infodb_file_handle;
164}
165## open_infodb_write_handle(string, string) => filehandle ##
166
167
168## @function openPipedHandle(integer, string, string, string*) => filehandle
169#
170sub openPipedHandle
171{
172    my $self = shift(@_);
173    my $mode = shift(@_);
174    my $executable_and_default_args = shift(@_);
175    my $infodb_file_path = shift(@_);
176    my ($executable, $default_args) = $executable_and_default_args =~ /^([a-z0-9]+)\s*(.*)$/;
177    my $exe = &FileUtils::filenameConcatenate($self->{'executable_path'}, $executable . &util::get_os_exe());
178    if (!-e $exe) {
179    # Hope it's on path
180    $exe = $executable . &util::get_os_exe();
181    }
182    my $infodb_file_handle = undef;
183    my $cmd = '"' . $exe . '" ' . $default_args;
184    foreach my $open_arg (@_) {
185    $cmd .= ' ' . $open_arg;
186    }
187    $cmd .= ' "' . $infodb_file_path . '"';
188    $self->debugPrint("CMD: '" . $cmd . "'\n");
189    if(!open($infodb_file_handle, $mode . ':utf8', $cmd)) {
190        print STDERR "Error: Failed to open pipe to '$cmd'\n";
191        print STDERR "       $!\n";
192        return undef;
193    }
194    #binmode($infodb_file_handle,":utf8");
195    return $infodb_file_handle;
196}
197## openPipedHandle(integer, string, string, string*) => filehandle ##
198
199
200## @function openReadHandle(string, string) => filehandle
201sub openReadHandle
202{
203    my $self = shift(@_);
204    return $self->openPipedHandle(RWMODE_READ, $self->{'read_executable'}, @_);
205}
206## openReadHandle(string, string) => filehandle
207
208
209sub openWriteHandle
210{
211    my $self = shift(@_);
212    return $self->openPipedHandle(RWMODE_WRITE, $self->{'write_executable'}, @_);
213}
214
215## @function read_infodb_entry(string, string) => hashmap
216#
217sub read_infodb_entry
218{
219    my $self = shift(@_);
220    my $raw_string = $self->read_infodb_rawentry(@_);
221    my $infodb_rec = $self->convert_infodb_string_to_hash($raw_string);
222    return $infodb_rec;
223}
224## read_infodb_entry(string, string) => hashmap ##
225
226
227## @function read_infodb_file(string, hashmap) => void
228#
229sub read_infodb_file
230{
231    my $self = shift(@_);
232    my $infodb_file_path = shift(@_);
233    my $infodb_map = shift(@_);
234    my $infodb_file_handle = $self->openReadHandle($infodb_file_path);
235    my $infodb_line = "";
236    my $infodb_key = "";
237    my $infodb_value = "";
238    while (defined ($infodb_line = <$infodb_file_handle>)) {
239        $infodb_line =~ s/(\r\n)+$//; # more general than chomp
240        if ($infodb_line =~ /^\[([^\]]+)\]$/) {
241            $infodb_key = $1;
242        }
243        elsif ($infodb_line =~ /^-{70}$/) {
244            $infodb_map->{$infodb_key} = $infodb_value;
245            $infodb_key = "";
246            $infodb_value = "";
247        }
248        else {
249            $infodb_value .= $infodb_line;
250        }
251    }
252  $self->close_infodb_handle($infodb_file_handle);
253}
254## read_infodb_file(string, hashmap) => void ##
255
256
257## @function read_infodb_keys(string, hashmap) => void
258#
259sub read_infodb_keys
260{
261    my $self = shift(@_);
262    my $infodb_file_path = shift(@_);
263    my $infodb_map = shift(@_);
264    my $infodb_file_handle = $self->openPipedHandle(RWMODE_READ, $self->{'keyread_executable'}, $infodb_file_path);
265    if (!$infodb_file_handle) {
266    die("Couldn't open pipe from gdbmkeys: " . $infodb_file_path . "\n");
267    }
268    my $infodb_line = "";
269    my $infodb_key = "";
270    my $infodb_value = "";
271    # Simple case - dedicated keyread exe, so keys are strings
272    if ($self->{'keyread_executable'} ne $self->{'read_executable'}) {
273    while (defined ($infodb_line = <$infodb_file_handle>)) {
274        $infodb_line =~ s/[\r\n]+$//;
275        $infodb_map->{$infodb_line} = 1;
276    }
277    }
278    # Slightly more difficult - have to parse keys out of 70hyphen format
279    else {
280    while (defined ($infodb_line = <$infodb_file_handle>)) {
281        if ($infodb_line =~ /^\[([^\]]+)\](-)?[\r\n]*$/) {
282        my $key = $1;
283        my $delete_flag = $2;
284        if (defined $delete_flag) {
285            delete $infodb_map->{$key}
286        }
287        else {
288            $infodb_map->{$key} = 1;
289        }
290        }
291    }
292    }
293    $self->close_infodb_handle($infodb_file_handle);
294}
295## read_infodb_keys(string, hashmap) => void ##
296
297
298## @function read_infodb_rawentry(string, string) => string
299#
300# !! TEMPORARY: Slow and naive implementation that just reads the entire file
301# and picks out the one value. This should one day be replaced with database-
302# specific versions that will use dbget etc.
303#
304sub read_infodb_rawentry
305{
306    my $self = shift(@_);
307    my $infodb_file_path = shift(@_);
308    my $infodb_key = shift(@_);
309    # temporary hashmap... we're only interested in one entry
310    my $infodb_map = {};
311    $self->read_infodb_file($infodb_file_path, $infodb_map);
312    return $infodb_map->{$infodb_key};
313}
314## read_infodb_rawentry(string, string) => string ##
315
316
317## @function set_infodb_entry(string, string, hashmap)
318#
319sub set_infodb_entry
320{
321    my $self = shift(@_);
322    my $infodb_file_path = shift(@_);
323    my $infodb_key = shift(@_);
324    my $infodb_map = shift(@_);
325
326    # HTML escape anything that is not part of the "contains" metadata value
327    foreach my $k (keys %$infodb_map) {
328    my @escaped_v = ();
329    foreach my $v (@{$infodb_map->{$k}}) {
330        if ($k eq "contains") {
331        push(@escaped_v, $v);
332        }
333        else {
334        my $ev = &ghtml::unescape_html($v);
335        push(@escaped_v, $ev);
336        }
337    }
338    $infodb_map->{$k} = \@escaped_v;
339    }
340
341    # Generate the record string
342    my $serialized_infodb_map = $self->convert_infodb_hash_to_string($infodb_map);
343
344    # Store it into DB using '... -append' which despite its name actually
345    # replaces the record if it already exists
346    my $status = undef;
347    my $infodb_file_handle = $self->openWriteHandle($infodb_file_path, '-append');
348    if (!$infodb_file_handle) {
349    print STDERR "Error: set_infodb_entry() failed to open pipe to: " . $infodb_file_handle ."\n";
350    print STDERR "       $!\n";
351    $status = -1;
352    }
353    else {
354    print $infodb_file_handle "[$infodb_key]\n";
355    print $infodb_file_handle "$serialized_infodb_map\n";
356    $self->close_infodb_handle($infodb_file_handle);
357    $status = 0; # as in exit status of cmd OK
358    }
359    return $status;
360}
361## set_infodb_entry(string, string, hashmap) => integer ##
362
363
364## @function write_infodb_entry(filehandle, string, hashmap)
365#
366sub write_infodb_entry
367{
368    my $self = shift(@_);
369    my $infodb_handle = shift(@_);
370    my $infodb_key = shift(@_);
371    my $infodb_map = shift(@_);
372
373    print $infodb_handle "[$infodb_key]\n";
374    foreach my $infodb_value_key (sort keys(%$infodb_map)) {
375        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
376            if ($infodb_value =~ /-{70,}/) {
377                # if value contains 70 or more hyphens in a row we need to escape them
378                # to prevent txt2db from treating them as a separator
379                $infodb_value =~ s/-/&\#045;/gi;
380            }
381            print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
382        }
383    }
384    print $infodb_handle '-' x 70, "\n";
385}
386## write_infodb_entry(filehandle, string, hashmap) => void ##
387
388
389## @function write_infodb_rawentry(filehandle, string, string)
390#
391sub write_infodb_rawentry
392{
393    my $self = shift(@_);
394    my $infodb_handle = shift(@_);
395    my $infodb_key = shift(@_);
396    my $infodb_val = shift(@_);
397
398    print $infodb_handle "[$infodb_key]\n";
399    print $infodb_handle "$infodb_val\n";
400    print $infodb_handle '-' x 70, "\n";
401}
402## write_infodb_rawentry(filehandle, string, string) ##
403
404
4051;
Note: See TracBrowser for help on using the browser.