source: gs2-extensions/tdb/trunk/perllib/DBDrivers/70HyphenFormat.pm@ 30332

Last change on this file since 30332 was 30332, checked in by jmt12, 8 years ago

The parent class of all database drivers using the '70 hyphen separator' format. This includes GDBM and TDB.

File size: 11.5 KB
Line 
1###############################################################################
2#
3# 70HyphenFormat.pm -- The parent class of drivers that use the basic GS format
4# of a text obeying these rules:
5#
6# <line> := <uniqueid> <metadata>+ <separator>
7# <uniqueid> := \[[a-z][a-z0-9]*\]\n
8# <metadata> := <[a-z][a-z0-9]*>(^-{70})+\n
9# <separator> := -{70}\n
10#
11# Contains some utility functions useful to any driver
12# that makes use of this format.
13#
14# A component of the Greenstone digital library software from the New Zealand
15# Digital Library Project at the University of Waikato, New Zealand.
16#
17# Copyright (C) 1999-2015 New Zealand Digital Library Project
18#
19# This program is free software; you can redistribute it and/or modify it under
20# the terms of the GNU General Public License as published by the Free Software
21# Foundation; either version 2 of the License, or (at your option) any later
22# version.
23#
24# This program is distributed in the hope that it will be useful, but WITHOUT
25# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
26# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
27# more details.
28#
29# You should have received a copy of the GNU General Public License along with
30# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
31# Ave, Cambridge, MA 02139, USA.
32#
33###############################################################################
34
35package DBDrivers::70HyphenFormat;
36
37# Pragma
38use strict;
39
40# Libraries
41use ghtml;
42use util;
43use FileUtils;
44use parent 'DBDrivers::BaseDBDriver';
45
46use constant {
47 RWMODE_READ => '-|',
48 RWMODE_WRITE => '|-',
49};
50
51## @function constructor
52#
53sub new
54{
55 my $class = shift(@_);
56 my $self = DBDrivers::BaseDBDriver->new(@_);
57 $self->{'executable_path'} = 'error';
58 $self->{'keyread_executable'} = 'error';
59 $self->{'read_executable'} = 'error';
60 $self->{'write_executable'} = 'error';
61 bless($self, $class);
62 return $self;
63}
64## new(void) => 70HyphenFormat ##
65
66
67################################## Protected ##################################
68
69
70## @function close_infodb_handle(filehandle)
71#
72sub close_infodb_handle
73{
74 my $self = shift(@_);
75 my $infodb_handle = shift(@_);
76 $self->debugPrintFunctionHeader();
77 close($infodb_handle);
78}
79## close_infodb_handle(filehandle) => void ##
80
81
82## @function close_infodb_write_handle(filehandle)
83#
84sub close_infodb_write_handle
85{
86 my $self = shift(@_);
87 $self->close_infodb_handle(@_);
88}
89## close_infodb_write_handle(filehandle) => void ##
90
91
92## @function convert_infodb_hash_to_string(hashmap) => string
93#
94sub convert_infodb_hash_to_string
95{
96 my $self = shift(@_);
97 my $infodb_map = shift(@_);
98 my $infodb_entry_value = "";
99 foreach my $infodb_value_key (keys(%$infodb_map)) {
100 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
101 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
102 }
103 }
104 return $infodb_entry_value;
105}
106## convert_infodb_hash_to_string(hashmap) => string ##
107
108
109## @function convert_infodb_string_to_hash(string) => hashmap
110#
111sub convert_infodb_string_to_hash
112{
113 my $self = shift(@_);
114 my $infodb_entry_value = shift(@_);
115 my $infodb_map = ();
116
117 if (!defined $infodb_entry_value) {
118 print STDERR "Warning: No value to convert into a infodb hashtable\n";
119 }
120 else {
121 while ($infodb_entry_value =~ /^<(.*?)>(.*)$/mg) {
122 my $infodb_value_key = $1;
123 my $infodb_value = $2;
124
125 if (!defined($infodb_map->{$infodb_value_key})) {
126 $infodb_map->{$infodb_value_key} = [ $infodb_value ];
127 }
128 else {
129 push(@{$infodb_map->{$infodb_value_key}}, $infodb_value);
130 }
131 }
132 }
133
134 return $infodb_map;
135}
136## convert_infodb_string_to_hash(string) => hashmap ##
137
138
139## @function delete_infodb_entry(filehandle, string)
140#
141sub delete_infodb_entry
142{
143 my $self = shift(@_);
144 my $infodb_handle = shift(@_);
145 my $infodb_key = shift(@_);
146
147 # A minus at the end of a key (after the ]) signifies 'delete'
148 print $infodb_handle "[$infodb_key]-\n";
149
150 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
151 print $infodb_handle '-' x 70, "\n";
152}
153## delete_infodb_entry(filehandle, string) => void ##
154
155
156## @function open_infodb_write_handle(string, string)
157#
158sub open_infodb_write_handle
159{
160 my $self = shift(@_);
161 $self->debugPrintFunctionHeader(@_);
162 my $infodb_file_handle = $self->openWriteHandle(@_);
163 return $infodb_file_handle;
164}
165## open_infodb_write_handle(string, string) => filehandle ##
166
167
168## @function openPipedHandle(integer, string, string, string*) => filehandle
169#
170sub openPipedHandle
171{
172 my $self = shift(@_);
173 my $mode = shift(@_);
174 my $executable = shift(@_);
175 my $infodb_file_path = shift(@_);
176 my $exe = &FileUtils::filenameConcatenate($self->{'executable_path'}, $executable . &util::get_os_exe());
177 if (!-e $exe) {
178 # See if it's on path
179 $exe = $executable . &util::get_os_exe();
180 if (!-e $exe) {
181 print STDERR "Error: Unable to find " . $exe . "\n";
182 return undef;
183 }
184 }
185 my $infodb_file_handle = undef;
186 my $cmd = '"' . $exe . '"';
187 foreach my $open_arg (@_) {
188 $cmd .= ' ' . $open_arg;
189 }
190 $cmd .= ' "' . $infodb_file_path . '"';
191 $self->debugPrint("CMD: '" . $cmd . "'\n");
192 if(!open($infodb_file_handle, $mode . ':utf8', $cmd)) {
193 print STDERR "Error: Failed to open pipe to '$cmd'\n";
194 print STDERR " $!\n";
195 return undef;
196 }
197 #binmode($infodb_file_handle,":utf8");
198 return $infodb_file_handle;
199}
200## openPipedHandle(integer, string, string, string*) => filehandle ##
201
202
203## @function openReadHandle(string, string) => filehandle
204sub openReadHandle
205{
206 my $self = shift(@_);
207 return $self->openPipedHandle(RWMODE_READ, $self->{'read_executable'}, @_);
208}
209## openReadHandle(string, string) => filehandle
210
211
212sub openWriteHandle
213{
214 my $self = shift(@_);
215 return $self->openPipedHandle(RWMODE_WRITE, $self->{'write_executable'}, @_);
216}
217
218## @function read_infodb_entry(string, string) => hashmap
219#
220sub read_infodb_entry
221{
222 my $self = shift(@_);
223 my $raw_string = $self->read_infodb_rawentry(@_);
224 my $infodb_rec = $self->convert_infodb_string_to_hash($raw_string);
225 return $infodb_rec;
226}
227## read_infodb_entry(string, string) => hashmap ##
228
229
230## @function read_infodb_file(string, hashmap) => void
231#
232sub read_infodb_file
233{
234 my $self = shift(@_);
235 my $infodb_file_path = shift(@_);
236 my $infodb_map = shift(@_);
237 my $infodb_file_handle = $self->openReadHandle($infodb_file_path);
238 my $infodb_line = "";
239 my $infodb_key = "";
240 my $infodb_value = "";
241 while (defined ($infodb_line = <$infodb_file_handle>)) {
242 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
243 if ($infodb_line =~ /^\[([^\]]+)\]$/) {
244 $infodb_key = $1;
245 }
246 elsif ($infodb_line =~ /^-{70}$/) {
247 $infodb_map->{$infodb_key} = $infodb_value;
248 $infodb_key = "";
249 $infodb_value = "";
250 }
251 else {
252 $infodb_value .= $infodb_line;
253 }
254 }
255 $self->close_infodb_handle($infodb_file_handle);
256}
257## read_infodb_file(string, hashmap) => void ##
258
259
260## @function read_infodb_keys(string, hashmap) => void
261#
262sub read_infodb_keys
263{
264 my $self = shift(@_);
265 my $infodb_file_path = shift(@_);
266 my $infodb_map = shift(@_);
267 my $infodb_file_handle = $self->openPipedHandle(RWMODE_READ, $self->{'keyread_executable'}, $infodb_file_path);
268 if (!$infodb_file_handle) {
269 die("Couldn't open pipe from gdbmkeys: " . $infodb_file_path . "\n");
270 }
271 my $infodb_line = "";
272 my $infodb_key = "";
273 my $infodb_value = "";
274 # Simple case - dedicated keyread exe, so keys are strings
275 if ($self->{'keyread_executable'} ne $self->{'read_executable'}) {
276 while (defined ($infodb_line = <$infodb_file_handle>)) {
277 $infodb_line =~ s/[\r\n]+$//;
278 $infodb_map->{$infodb_line} = 1;
279 }
280 }
281 # Slightly more difficult - have to parse keys out of 70hyphen format
282 else {
283 while (defined ($infodb_line = <$infodb_file_handle>)) {
284 if ($infodb_line =~ /^\[([^\]]+)\][\r\n]*$/) {
285 $infodb_map->{$1} = 1;
286 }
287 }
288 }
289 $self->close_infodb_handle($infodb_file_handle);
290}
291## read_infodb_keys(string, hashmap) => void ##
292
293
294## @function read_infodb_rawentry(string, string) => string
295#
296# !! TEMPORARY: Slow and naive implementation that just reads the entire file
297# and picks out the one value. This should one day be replaced with database-
298# specific versions that will use dbget etc.
299#
300sub read_infodb_rawentry
301{
302 my $self = shift(@_);
303 my $infodb_file_path = shift(@_);
304 my $infodb_key = shift(@_);
305 # temporary hashmap... we're only interested in one entry
306 my $infodb_map = {};
307 $self->read_infodb_file($infodb_file_path, $infodb_map);
308 return $infodb_map->{$infodb_key};
309}
310## read_infodb_rawentry(string, string) => string ##
311
312
313## @function set_infodb_entry(string, string, hashmap)
314#
315sub set_infodb_entry
316{
317 my $self = shift(@_);
318 my $infodb_file_path = shift(@_);
319 my $infodb_key = shift(@_);
320 my $infodb_map = shift(@_);
321
322 # HTML escape anything that is not part of the "contains" metadata value
323 foreach my $k (keys %$infodb_map) {
324 my @escaped_v = ();
325 foreach my $v (@{$infodb_map->{$k}}) {
326 if ($k eq "contains") {
327 push(@escaped_v, $v);
328 }
329 else {
330 my $ev = &ghtml::unescape_html($v);
331 push(@escaped_v, $ev);
332 }
333 }
334 $infodb_map->{$k} = \@escaped_v;
335 }
336
337 # Generate the record string
338 my $serialized_infodb_map = $self->convert_infodb_hash_to_string($infodb_map);
339
340 # Store it into DB using '... -append' which despite its name actually
341 # replaces the record if it already exists
342 my $status = undef;
343 my $infodb_file_handle = $self->openWriteHandle($infodb_file_path, '-append');
344 if (!$infodb_file_handle) {
345 print STDERR "Error: set_infodb_entry() failed to open pipe to: " . $infodb_file_handle ."\n";
346 print STDERR " $!\n";
347 $status = -1;
348 }
349 else {
350 print $infodb_file_handle "[$infodb_key]\n";
351 print $infodb_file_handle "$serialized_infodb_map\n";
352 $self->close_infodb_handle($infodb_file_handle);
353 $status = 0; # as in exit status of cmd OK
354 }
355 return $status;
356}
357## set_infodb_entry(string, string, hashmap) => integer ##
358
359
360## @function write_infodb_entry(filehandle, string, hashmap)
361#
362sub write_infodb_entry
363{
364 my $self = shift(@_);
365 my $infodb_handle = shift(@_);
366 my $infodb_key = shift(@_);
367 my $infodb_map = shift(@_);
368
369 print $infodb_handle "[$infodb_key]\n";
370 foreach my $infodb_value_key (sort keys(%$infodb_map)) {
371 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
372 if ($infodb_value =~ /-{70,}/) {
373 # if value contains 70 or more hyphens in a row we need to escape them
374 # to prevent txt2db from treating them as a separator
375 $infodb_value =~ s/-/&\#045;/gi;
376 }
377 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
378 }
379 }
380 print $infodb_handle '-' x 70, "\n";
381}
382## write_infodb_entry(filehandle, string, hashmap) => void ##
383
384
385## @function write_infodb_rawentry(filehandle, string, string)
386#
387sub write_infodb_rawentry
388{
389 my $self = shift(@_);
390 my $infodb_handle = shift(@_);
391 my $infodb_key = shift(@_);
392 my $infodb_val = shift(@_);
393
394 print $infodb_handle "[$infodb_key]\n";
395 print $infodb_handle "$infodb_val\n";
396 print $infodb_handle '-' x 70, "\n";
397}
398## write_infodb_rawentry(filehandle, string, string) ##
399
400
4011;
Note: See TracBrowser for help on using the repository browser.