source: main/trunk/greenstone2/perllib/DBDrivers/70HyphenFormat.pm@ 30355

Last change on this file since 30355 was 30355, checked in by jmt12, 8 years ago

Initial checkin of OO drivers for new dbutils system

File size: 11.5 KB
Line 
1###############################################################################
2#
3# 70HyphenFormat.pm -- The parent class of drivers that use the basic GS format
4# of a text obeying these rules:
5#
6# <line> := <uniqueid> <metadata>+ <separator>
7# <uniqueid> := \[[a-z][a-z0-9]*\]\n
8# <metadata> := <[a-z][a-z0-9]*>(^-{70})+\n
9# <separator> := -{70}\n
10#
11# Contains some utility functions useful to any driver
12# that makes use of this format.
13#
14# A component of the Greenstone digital library software from the New Zealand
15# Digital Library Project at the University of Waikato, New Zealand.
16#
17# Copyright (C) 1999-2015 New Zealand Digital Library Project
18#
19# This program is free software; you can redistribute it and/or modify it under
20# the terms of the GNU General Public License as published by the Free Software
21# Foundation; either version 2 of the License, or (at your option) any later
22# version.
23#
24# This program is distributed in the hope that it will be useful, but WITHOUT
25# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
26# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
27# more details.
28#
29# You should have received a copy of the GNU General Public License along with
30# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
31# Ave, Cambridge, MA 02139, USA.
32#
33###############################################################################
34
35# Note: This driver may be a candidate for further splitting, maybe into a
36# PipedExecutableDriver and a 70HyphenFormatDriver... but for now all piped
37# drivers are 70 hyphen format ones, so, yeah.
38
39package DBDrivers::70HyphenFormat;
40
41# Pragma
42use strict;
43
44# Libraries
45use ghtml;
46use util;
47use FileUtils;
48use parent 'DBDrivers::BaseDBDriver';
49
50use constant {
51 RWMODE_READ => '-|',
52 RWMODE_WRITE => '|-',
53};
54
55## @function constructor
56#
57sub new
58{
59 my $class = shift(@_);
60 my $self = DBDrivers::BaseDBDriver->new(@_);
61 $self->{'executable_path'} = 'error';
62 $self->{'keyread_executable'} = 'error';
63 $self->{'read_executable'} = 'error';
64 $self->{'write_executable'} = 'error';
65 #
66 $self->{'forced_affinity'} = -1; # Set to processor number for forced affinity
67 bless($self, $class);
68 return $self;
69}
70## new(void) => 70HyphenFormat ##
71
72
73################################## Protected ##################################
74
75
76## @function close_infodb_write_handle(filehandle)
77#
78sub close_infodb_write_handle
79{
80 my $self = shift(@_);
81 $self->debugPrintFunctionHeader(@_);
82 my $handle = shift(@_);
83 my $force_close = shift(@_); # Undefined most of the time
84 my $continue_close = $self->removeConnectionIfPersistent($handle, $force_close);
85 if ($continue_close) {
86 close($handle);
87 }
88 return;
89}
90## close_infodb_write_handle(filehandle) => void ##
91
92
93## @function delete_infodb_entry(filehandle, string)
94#
95sub delete_infodb_entry
96{
97 my $self = shift(@_);
98 $self->debugPrintFunctionHeader(@_);
99 my $infodb_handle = shift(@_);
100 my $infodb_key = shift(@_);
101 # A minus at the end of a key (after the ]) signifies 'delete'
102 print $infodb_handle '[' . $infodb_key . ']-' . "\n";
103 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
104 print $infodb_handle '-' x 70, "\n";
105}
106## delete_infodb_entry(filehandle, string) => void ##
107
108
109## @function open_infodb_write_handle(string, string)
110#
111sub open_infodb_write_handle
112{
113 my $self = shift(@_);
114 $self->debugPrintFunctionHeader(@_);
115 my $path = shift(@_);
116 my $append = shift(@_);
117 my $infodb_file_handle = $self->retrieveConnectionIfPersistent($path, $append);;
118 # No available existing connection
119 if (!defined $infodb_file_handle || !$infodb_file_handle) {
120 $infodb_file_handle = $self->openWriteHandle($path, $append, @_);
121 $self->registerConnectionIfPersistent($infodb_file_handle, $path, $append);
122 }
123 return $infodb_file_handle;
124}
125## open_infodb_write_handle(string, string) => filehandle ##
126
127
128## @function openPipedHandle(integer, string, string, string*) => filehandle
129#
130sub openPipedHandle
131{
132 my $self = shift(@_);
133 my $mode = shift(@_);
134 my $executable_and_default_args = shift(@_);
135 my $infodb_file_path = shift(@_);
136 my ($executable, $default_args) = $executable_and_default_args =~ /^([a-z0-9]+)\s*(.*)$/;
137 my $exe = &FileUtils::filenameConcatenate($self->{'executable_path'}, $executable . &util::get_os_exe());
138 if (!-e $exe) {
139 # Hope it's on path
140 $exe = $executable . &util::get_os_exe();
141 }
142 my $infodb_file_handle = undef;
143 my $cmd = '';
144 if ($self->{'forced_affinity'} >= 0)
145 {
146 $cmd = 'taskset -c ' . $self->{'forced_affinity'} . ' ';
147 }
148 $cmd .= '"' . $exe . '" ' . $default_args;
149 foreach my $open_arg (@_) {
150 # Special - append is typically missing a hyphen
151 if ($open_arg eq 'append') {
152 $open_arg = '-append';
153 }
154 $cmd .= ' ' . $open_arg;
155 }
156 $cmd .= ' "' . $infodb_file_path . '"';
157 $self->debugPrint("CMD: '" . $cmd . "'\n");
158 if(!open($infodb_file_handle, $mode . ':utf8', $cmd)) {
159 print STDERR "Error: Failed to open pipe to '$cmd'\n";
160 print STDERR " $!\n";
161 return undef;
162 }
163 #binmode($infodb_file_handle,":utf8");
164 return $infodb_file_handle;
165}
166## openPipedHandle(integer, string, string, string*) => filehandle ##
167
168
169## @function openReadHandle(string, string) => filehandle
170#
171sub openReadHandle
172{
173 my $self = shift(@_);
174 return $self->openPipedHandle(RWMODE_READ, $self->{'read_executable'}, @_);
175}
176## openReadHandle(string, string) => filehandle
177
178
179## @function openWriteHandle(*) => filehandle
180#
181sub openWriteHandle
182{
183 my $self = shift(@_);
184 return $self->openPipedHandle(RWMODE_WRITE, $self->{'write_executable'}, @_);
185}
186## openWriteHandle(*) => filehandle ##
187
188
189## @function read_infodb_entry(string, string) => hashmap
190#
191sub read_infodb_entry
192{
193 my $self = shift(@_);
194 my $raw_string = $self->read_infodb_rawentry(@_);
195 my $infodb_rec = $self->convert_infodb_string_to_hash($raw_string);
196 return $infodb_rec;
197}
198## read_infodb_entry(string, string) => hashmap ##
199
200
201## @function read_infodb_file(string, hashmap) => void
202#
203sub read_infodb_file
204{
205 my $self = shift(@_);
206 my $infodb_file_path = shift(@_);
207 my $infodb_map = shift(@_);
208 $self->debugPrintFunctionHeader($infodb_file_path, $infodb_map);
209 my $infodb_file_handle = $self->openReadHandle($infodb_file_path);
210 my $infodb_line = "";
211 my $infodb_key = "";
212 my $infodb_value = "";
213 while (defined ($infodb_line = <$infodb_file_handle>)) {
214 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
215 if ($infodb_line =~ /^\[([^\]]+)\]$/) {
216 $infodb_key = $1;
217 }
218 elsif ($infodb_line =~ /^-{70}$/) {
219 $infodb_map->{$infodb_key} = $infodb_value;
220 $infodb_key = "";
221 $infodb_value = "";
222 }
223 else {
224 $infodb_value .= $infodb_line;
225 }
226 }
227 $self->close_infodb_write_handle($infodb_file_handle);
228}
229## read_infodb_file(string, hashmap) => void ##
230
231
232## @function read_infodb_keys(string, hashmap) => void
233#
234sub read_infodb_keys
235{
236 my $self = shift(@_);
237 my $infodb_file_path = shift(@_);
238 my $infodb_map = shift(@_);
239 my $infodb_file_handle = $self->openPipedHandle(RWMODE_READ, $self->{'keyread_executable'}, $infodb_file_path);
240 if (!$infodb_file_handle) {
241 die("Couldn't open pipe from gdbmkeys: " . $infodb_file_path . "\n");
242 }
243 my $infodb_line = "";
244 my $infodb_key = "";
245 my $infodb_value = "";
246 # Simple case - dedicated keyread exe, so keys are strings
247 if ($self->{'keyread_executable'} ne $self->{'read_executable'}) {
248 while (defined ($infodb_line = <$infodb_file_handle>)) {
249 $infodb_line =~ s/[\r\n]+$//;
250 $infodb_map->{$infodb_line} = 1;
251 }
252 }
253 # Slightly more difficult - have to parse keys out of 70hyphen format
254 else {
255 while (defined ($infodb_line = <$infodb_file_handle>)) {
256 if ($infodb_line =~ /^\[([^\]]+)\](-)?[\r\n]*$/) {
257 my $key = $1;
258 my $delete_flag = $2;
259 if (defined $delete_flag) {
260 delete $infodb_map->{$key}
261 }
262 else {
263 $infodb_map->{$key} = 1;
264 }
265 }
266 }
267 }
268 $self->close_infodb_write_handle($infodb_file_handle);
269}
270## read_infodb_keys(string, hashmap) => void ##
271
272
273## @function read_infodb_rawentry(string, string) => string
274#
275# !! TEMPORARY: Slow and naive implementation that just reads the entire file
276# and picks out the one value. This should one day be replaced with database-
277# specific versions that will use dbget etc.
278#
279sub read_infodb_rawentry
280{
281 my $self = shift(@_);
282 my $infodb_file_path = shift(@_);
283 my $infodb_key = shift(@_);
284 # temporary hashmap... we're only interested in one entry
285 my $infodb_map = {};
286 $self->read_infodb_file($infodb_file_path, $infodb_map);
287 return $infodb_map->{$infodb_key};
288}
289## read_infodb_rawentry(string, string) => string ##
290
291
292## @function set_infodb_entry(string, string, hashmap)
293#
294sub set_infodb_entry
295{
296 my $self = shift(@_);
297 my $infodb_file_path = shift(@_);
298 my $infodb_key = shift(@_);
299 my $infodb_map = shift(@_);
300
301 # HTML escape anything that is not part of the "contains" metadata value
302 foreach my $k (keys %$infodb_map) {
303 my @escaped_v = ();
304 foreach my $v (@{$infodb_map->{$k}}) {
305 if ($k eq "contains") {
306 push(@escaped_v, $v);
307 }
308 else {
309 my $ev = &ghtml::unescape_html($v);
310 push(@escaped_v, $ev);
311 }
312 }
313 $infodb_map->{$k} = \@escaped_v;
314 }
315
316 # Generate the record string
317 my $serialized_infodb_map = $self->convert_infodb_hash_to_string($infodb_map);
318
319 # Store it into DB using '... -append' which despite its name actually
320 # replaces the record if it already exists
321 my $status = undef;
322 my $infodb_file_handle = $self->openWriteHandle($infodb_file_path, '-append');
323 if (!$infodb_file_handle) {
324 print STDERR "Error: set_infodb_entry() failed to open pipe to: " . $infodb_file_handle ."\n";
325 print STDERR " $!\n";
326 $status = -1;
327 }
328 else {
329 print $infodb_file_handle "[$infodb_key]\n";
330 print $infodb_file_handle "$serialized_infodb_map\n";
331 $self->close_infodb_write_handle($infodb_file_handle);
332 $status = 0; # as in exit status of cmd OK
333 }
334 return $status;
335}
336## set_infodb_entry(string, string, hashmap) => integer ##
337
338
339## @function write_infodb_entry(filehandle, string, hashmap)
340#
341sub write_infodb_entry
342{
343 my $self = shift(@_);
344 my $infodb_handle = shift(@_);
345 my $infodb_key = shift(@_);
346 my $infodb_map = shift(@_);
347
348 print $infodb_handle "[$infodb_key]\n";
349 foreach my $infodb_value_key (sort keys(%$infodb_map)) {
350 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
351 if ($infodb_value =~ /-{70,}/) {
352 # if value contains 70 or more hyphens in a row we need to escape them
353 # to prevent txt2db from treating them as a separator
354 $infodb_value =~ s/-/&\#045;/gi;
355 }
356 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
357 }
358 }
359 print $infodb_handle '-' x 70, "\n";
360}
361## write_infodb_entry(filehandle, string, hashmap) => void ##
362
363
364## @function write_infodb_rawentry(filehandle, string, string)
365#
366sub write_infodb_rawentry
367{
368 my $self = shift(@_);
369 my $infodb_handle = shift(@_);
370 my $infodb_key = shift(@_);
371 my $infodb_val = shift(@_);
372
373 print $infodb_handle "[$infodb_key]\n";
374 print $infodb_handle "$infodb_val\n";
375 print $infodb_handle '-' x 70, "\n";
376}
377## write_infodb_rawentry(filehandle, string, string) ##
378
379
3801;
Note: See TracBrowser for help on using the repository browser.