source: main/trunk/greenstone2/perllib/DBDrivers/70HyphenFormat.pm@ 30370

Last change on this file since 30370 was 30370, checked in by jmt12, 8 years ago

Reverting these back to the older but better supported ISA approach to declaring inheritance. Not even sure why I used the new parent mechanism (aside from possibly copying from tutorial)... can I still blame baby brain after 10 months?

File size: 11.6 KB
Line 
1###############################################################################
2#
3# 70HyphenFormat.pm -- The parent class of drivers that use the basic GS format
4# of a text obeying these rules:
5#
6# <line> := <uniqueid> <metadata>+ <separator>
7# <uniqueid> := \[[a-z][a-z0-9]*\]\n
8# <metadata> := <[a-z][a-z0-9]*>(^-{70})+\n
9# <separator> := -{70}\n
10#
11# Contains some utility functions useful to any driver
12# that makes use of this format.
13#
14# A component of the Greenstone digital library software from the New Zealand
15# Digital Library Project at the University of Waikato, New Zealand.
16#
17# Copyright (C) 1999-2015 New Zealand Digital Library Project
18#
19# This program is free software; you can redistribute it and/or modify it under
20# the terms of the GNU General Public License as published by the Free Software
21# Foundation; either version 2 of the License, or (at your option) any later
22# version.
23#
24# This program is distributed in the hope that it will be useful, but WITHOUT
25# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
26# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
27# more details.
28#
29# You should have received a copy of the GNU General Public License along with
30# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
31# Ave, Cambridge, MA 02139, USA.
32#
33###############################################################################
34
35# Note: This driver may be a candidate for further splitting, maybe into a
36# PipedExecutableDriver and a 70HyphenFormatDriver... but for now all piped
37# drivers are 70 hyphen format ones, so, yeah.
38
39package DBDrivers::70HyphenFormat;
40
41# Pragma
42use strict;
43
44# Libraries
45use ghtml;
46use util;
47use FileUtils;
48
49use DBDrivers::BaseDBDriver;
50
51BEGIN {
52 @DBDrivers::70HyphenFormat::ISA = ('DBDrivers::BaseDBDriver');
53}
54
55use constant {
56 RWMODE_READ => '-|',
57 RWMODE_WRITE => '|-',
58};
59
60## @function constructor
61#
62sub new
63{
64 my $class = shift(@_);
65 my $self = DBDrivers::BaseDBDriver->new(@_);
66 $self->{'executable_path'} = 'error';
67 $self->{'keyread_executable'} = 'error';
68 $self->{'read_executable'} = 'error';
69 $self->{'write_executable'} = 'error';
70 #
71 $self->{'forced_affinity'} = -1; # Set to processor number for forced affinity
72 bless($self, $class);
73 return $self;
74}
75## new(void) => 70HyphenFormat ##
76
77
78################################## Protected ##################################
79
80
81## @function close_infodb_write_handle(filehandle)
82#
83sub close_infodb_write_handle
84{
85 my $self = shift(@_);
86 $self->debugPrintFunctionHeader(@_);
87 my $handle = shift(@_);
88 my $force_close = shift(@_); # Undefined most of the time
89 my $continue_close = $self->removeConnectionIfPersistent($handle, $force_close);
90 if ($continue_close) {
91 close($handle);
92 }
93 return;
94}
95## close_infodb_write_handle(filehandle) => void ##
96
97
98## @function delete_infodb_entry(filehandle, string)
99#
100sub delete_infodb_entry
101{
102 my $self = shift(@_);
103 $self->debugPrintFunctionHeader(@_);
104 my $infodb_handle = shift(@_);
105 my $infodb_key = shift(@_);
106 # A minus at the end of a key (after the ]) signifies 'delete'
107 print $infodb_handle '[' . $infodb_key . ']-' . "\n";
108 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
109 print $infodb_handle '-' x 70, "\n";
110}
111## delete_infodb_entry(filehandle, string) => void ##
112
113
114## @function open_infodb_write_handle(string, string)
115#
116sub open_infodb_write_handle
117{
118 my $self = shift(@_);
119 $self->debugPrintFunctionHeader(@_);
120 my $path = shift(@_);
121 my $append = shift(@_);
122 my $infodb_file_handle = $self->retrieveConnectionIfPersistent($path, $append);;
123 # No available existing connection
124 if (!defined $infodb_file_handle || !$infodb_file_handle) {
125 $infodb_file_handle = $self->openWriteHandle($path, $append, @_);
126 $self->registerConnectionIfPersistent($infodb_file_handle, $path, $append);
127 }
128 return $infodb_file_handle;
129}
130## open_infodb_write_handle(string, string) => filehandle ##
131
132
133## @function openPipedHandle(integer, string, string, string*) => filehandle
134#
135sub openPipedHandle
136{
137 my $self = shift(@_);
138 my $mode = shift(@_);
139 my $executable_and_default_args = shift(@_);
140 my $infodb_file_path = shift(@_);
141 my ($executable, $default_args) = $executable_and_default_args =~ /^([a-z0-9]+)\s*(.*)$/;
142 my $exe = &FileUtils::filenameConcatenate($self->{'executable_path'}, $executable . &util::get_os_exe());
143 if (!-e $exe) {
144 # Hope it's on path
145 $exe = $executable . &util::get_os_exe();
146 }
147 my $infodb_file_handle = undef;
148 my $cmd = '';
149 if ($self->{'forced_affinity'} >= 0)
150 {
151 $cmd = 'taskset -c ' . $self->{'forced_affinity'} . ' ';
152 }
153 $cmd .= '"' . $exe . '" ' . $default_args;
154 foreach my $open_arg (@_) {
155 # Special - append is typically missing a hyphen
156 if ($open_arg eq 'append') {
157 $open_arg = '-append';
158 }
159 $cmd .= ' ' . $open_arg;
160 }
161 $cmd .= ' "' . $infodb_file_path . '"';
162 $self->debugPrint("CMD: '" . $cmd . "'\n");
163 if(!open($infodb_file_handle, $mode . ':utf8', $cmd)) {
164 print STDERR "Error: Failed to open pipe to '$cmd'\n";
165 print STDERR " $!\n";
166 return undef;
167 }
168 #binmode($infodb_file_handle,":utf8");
169 return $infodb_file_handle;
170}
171## openPipedHandle(integer, string, string, string*) => filehandle ##
172
173
174## @function openReadHandle(string, string) => filehandle
175#
176sub openReadHandle
177{
178 my $self = shift(@_);
179 return $self->openPipedHandle(RWMODE_READ, $self->{'read_executable'}, @_);
180}
181## openReadHandle(string, string) => filehandle
182
183
184## @function openWriteHandle(*) => filehandle
185#
186sub openWriteHandle
187{
188 my $self = shift(@_);
189 return $self->openPipedHandle(RWMODE_WRITE, $self->{'write_executable'}, @_);
190}
191## openWriteHandle(*) => filehandle ##
192
193
194## @function read_infodb_entry(string, string) => hashmap
195#
196sub read_infodb_entry
197{
198 my $self = shift(@_);
199 my $raw_string = $self->read_infodb_rawentry(@_);
200 my $infodb_rec = $self->convert_infodb_string_to_hash($raw_string);
201 return $infodb_rec;
202}
203## read_infodb_entry(string, string) => hashmap ##
204
205
206## @function read_infodb_file(string, hashmap) => void
207#
208sub read_infodb_file
209{
210 my $self = shift(@_);
211 my $infodb_file_path = shift(@_);
212 my $infodb_map = shift(@_);
213 $self->debugPrintFunctionHeader($infodb_file_path, $infodb_map);
214 my $infodb_file_handle = $self->openReadHandle($infodb_file_path);
215 my $infodb_line = "";
216 my $infodb_key = "";
217 my $infodb_value = "";
218 while (defined ($infodb_line = <$infodb_file_handle>)) {
219 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
220 if ($infodb_line =~ /^\[([^\]]+)\]$/) {
221 $infodb_key = $1;
222 }
223 elsif ($infodb_line =~ /^-{70}$/) {
224 $infodb_map->{$infodb_key} = $infodb_value;
225 $infodb_key = "";
226 $infodb_value = "";
227 }
228 else {
229 $infodb_value .= $infodb_line;
230 }
231 }
232 $self->close_infodb_write_handle($infodb_file_handle);
233}
234## read_infodb_file(string, hashmap) => void ##
235
236
237## @function read_infodb_keys(string, hashmap) => void
238#
239sub read_infodb_keys
240{
241 my $self = shift(@_);
242 my $infodb_file_path = shift(@_);
243 my $infodb_map = shift(@_);
244 my $infodb_file_handle = $self->openPipedHandle(RWMODE_READ, $self->{'keyread_executable'}, $infodb_file_path);
245 if (!$infodb_file_handle) {
246 die("Couldn't open pipe from gdbmkeys: " . $infodb_file_path . "\n");
247 }
248 my $infodb_line = "";
249 my $infodb_key = "";
250 my $infodb_value = "";
251 # Simple case - dedicated keyread exe, so keys are strings
252 if ($self->{'keyread_executable'} ne $self->{'read_executable'}) {
253 while (defined ($infodb_line = <$infodb_file_handle>)) {
254 $infodb_line =~ s/[\r\n]+$//;
255 $infodb_map->{$infodb_line} = 1;
256 }
257 }
258 # Slightly more difficult - have to parse keys out of 70hyphen format
259 else {
260 while (defined ($infodb_line = <$infodb_file_handle>)) {
261 if ($infodb_line =~ /^\[([^\]]+)\](-)?[\r\n]*$/) {
262 my $key = $1;
263 my $delete_flag = $2;
264 if (defined $delete_flag) {
265 delete $infodb_map->{$key}
266 }
267 else {
268 $infodb_map->{$key} = 1;
269 }
270 }
271 }
272 }
273 $self->close_infodb_write_handle($infodb_file_handle);
274}
275## read_infodb_keys(string, hashmap) => void ##
276
277
278## @function read_infodb_rawentry(string, string) => string
279#
280# !! TEMPORARY: Slow and naive implementation that just reads the entire file
281# and picks out the one value. This should one day be replaced with database-
282# specific versions that will use dbget etc.
283#
284sub read_infodb_rawentry
285{
286 my $self = shift(@_);
287 my $infodb_file_path = shift(@_);
288 my $infodb_key = shift(@_);
289 # temporary hashmap... we're only interested in one entry
290 my $infodb_map = {};
291 $self->read_infodb_file($infodb_file_path, $infodb_map);
292 return $infodb_map->{$infodb_key};
293}
294## read_infodb_rawentry(string, string) => string ##
295
296
297## @function set_infodb_entry(string, string, hashmap)
298#
299sub set_infodb_entry
300{
301 my $self = shift(@_);
302 my $infodb_file_path = shift(@_);
303 my $infodb_key = shift(@_);
304 my $infodb_map = shift(@_);
305
306 # HTML escape anything that is not part of the "contains" metadata value
307 foreach my $k (keys %$infodb_map) {
308 my @escaped_v = ();
309 foreach my $v (@{$infodb_map->{$k}}) {
310 if ($k eq "contains") {
311 push(@escaped_v, $v);
312 }
313 else {
314 my $ev = &ghtml::unescape_html($v);
315 push(@escaped_v, $ev);
316 }
317 }
318 $infodb_map->{$k} = \@escaped_v;
319 }
320
321 # Generate the record string
322 my $serialized_infodb_map = $self->convert_infodb_hash_to_string($infodb_map);
323
324 # Store it into DB using '... -append' which despite its name actually
325 # replaces the record if it already exists
326 my $status = undef;
327 my $infodb_file_handle = $self->openWriteHandle($infodb_file_path, '-append');
328 if (!$infodb_file_handle) {
329 print STDERR "Error: set_infodb_entry() failed to open pipe to: " . $infodb_file_handle ."\n";
330 print STDERR " $!\n";
331 $status = -1;
332 }
333 else {
334 print $infodb_file_handle "[$infodb_key]\n";
335 print $infodb_file_handle "$serialized_infodb_map\n";
336 $self->close_infodb_write_handle($infodb_file_handle);
337 $status = 0; # as in exit status of cmd OK
338 }
339 return $status;
340}
341## set_infodb_entry(string, string, hashmap) => integer ##
342
343
344## @function write_infodb_entry(filehandle, string, hashmap)
345#
346sub write_infodb_entry
347{
348 my $self = shift(@_);
349 my $infodb_handle = shift(@_);
350 my $infodb_key = shift(@_);
351 my $infodb_map = shift(@_);
352
353 print $infodb_handle "[$infodb_key]\n";
354 foreach my $infodb_value_key (sort keys(%$infodb_map)) {
355 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
356 if ($infodb_value =~ /-{70,}/) {
357 # if value contains 70 or more hyphens in a row we need to escape them
358 # to prevent txt2db from treating them as a separator
359 $infodb_value =~ s/-/&\#045;/gi;
360 }
361 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
362 }
363 }
364 print $infodb_handle '-' x 70, "\n";
365}
366## write_infodb_entry(filehandle, string, hashmap) => void ##
367
368
369## @function write_infodb_rawentry(filehandle, string, string)
370#
371sub write_infodb_rawentry
372{
373 my $self = shift(@_);
374 my $infodb_handle = shift(@_);
375 my $infodb_key = shift(@_);
376 my $infodb_val = shift(@_);
377
378 print $infodb_handle "[$infodb_key]\n";
379 print $infodb_handle "$infodb_val\n";
380 print $infodb_handle '-' x 70, "\n";
381}
382## write_infodb_rawentry(filehandle, string, string) ##
383
384
3851;
Note: See TracBrowser for help on using the repository browser.