source: main/trunk/greenstone2/perllib/dbutil/jdbm.pm@ 31208

Last change on this file since 31208 was 31208, checked in by ak19, 7 years ago

Kathy found that the lowercased dbutil modules are not used (jdbm.pm, for example) and have been wholly replaced by the uppercased versions in DBDrivers folder. (I thought we had both). So I've moved the recently added code in jdbm.pm to JDBM.pm and fixed it up. Also tiedied up oaiinfo.pm of some unwanted commented out code.

File size: 8.8 KB
RevLine 
[21411]1###########################################################################
2#
3# dbutil::jdbm -- utility functions for writing to jdbm databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::jdbm;
28
29use strict;
30
31
32# -----------------------------------------------------------------------------
33# JDBM IMPLEMENTATION
34# -----------------------------------------------------------------------------
35
36# When DBUtil::* is properly structured with inheritence, then
37# much of this code (along with GDBM and GDBM-TXT-GZ) can be grouped into
38# a shared base class. Really it is only the the command that needs to
39# be constructed that changes between much of the code that is used
40
41
42sub open_infodb_write_handle
43{
44 my $infodb_file_path = shift(@_);
45 my $opt_append = shift(@_);
46
47 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
48 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
49
50 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
51
[28395]52 if ($^O eq "cygwin") {
53 # Away to run a java program, using a binary that is native to Windows, so need
54 # Windows directory and path separators
55
56 $classpath = `cygpath -wp "$classpath"`;
57 chomp($classpath);
58 $classpath =~ s%\\%\\\\%g;
59 }
60
[21411]61 my $infodb_file_handle = undef;
62 my $txt2jdb_cmd = "java -cp \"$classpath\" Txt2Jdb";
63
64 if ((defined $opt_append) && ($opt_append eq "append")) {
65 $txt2jdb_cmd .= " -append";
[28126]66 print STDERR "Append operation to $infodb_file_path\n";
[21411]67 }
[28126]68 else {
69 print STDERR "Create database $infodb_file_path\n";
70 }
[21411]71
72 # Lop off file extension, as JDBM does not expect this to be present
73 $infodb_file_path =~ s/\.jdb$//;
74
[28395]75 if ($^O eq "cygwin") {
76 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
77 chomp($infodb_file_path);
78 $infodb_file_path =~ s%\\%\\\\%g;
79 }
80
[21411]81 $txt2jdb_cmd .= " \"$infodb_file_path\"";
82
83 if (!open($infodb_file_handle, "| $txt2jdb_cmd"))
84 {
[22076]85 print STDERR "Error: Failed to open pipe to $txt2jdb_cmd";
86 print STDERR " $!\n";
87 return undef;
[21411]88 }
[23166]89
90 binmode($infodb_file_handle,":utf8");
[21411]91 return $infodb_file_handle;
92}
93
94
95
96sub close_infodb_write_handle
97{
98 my $infodb_handle = shift(@_);
99
100 close($infodb_handle);
101}
102
103
104sub get_infodb_file_path
105{
106 my $collection_name = shift(@_);
107 my $infodb_directory_path = shift(@_);
108
109 my $infodb_file_extension = ".jdb";
110 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
111 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
112}
113
114
115sub read_infodb_file
116{
117 my $infodb_file_path = shift(@_);
118 my $infodb_map = shift(@_);
119
120 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
121 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
122
123 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
124
[28395]125 if ($^O eq "cygwin") {
126 # Away to run a java program, using a binary that is native to Windows, so need
127 # Windows directory and path separators
128
129 $classpath = `cygpath -wp "$classpath"`;
130 chomp($classpath);
131 $classpath =~ s%\\%\\\\%g;
132
133 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
134 chomp($infodb_file_path);
135 $infodb_file_path =~ s%\\%\\\\%g;
136 }
137
[21411]138 my $jdb2txt_cmd = "java -cp \"$classpath\" Jdb2Txt";
139
140 open (PIPEIN, "$jdb2txt_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
[23166]141 binmode(PIPEIN,":utf8");
[21411]142 my $infodb_line = "";
143 my $infodb_key = "";
144 my $infodb_value = "";
145 while (defined ($infodb_line = <PIPEIN>))
146 {
[28395]147 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
148
[21411]149 if ($infodb_line =~ /^\[([^\]]+)\]$/)
150 {
151 $infodb_key = $1;
152 }
153 elsif ($infodb_line =~ /^-{70}$/)
154 {
155 $infodb_map->{$infodb_key} = $infodb_value;
156 $infodb_key = "";
157 $infodb_value = "";
158 }
159 else
160 {
161 $infodb_value .= $infodb_line;
162 }
163 }
164
165 close (PIPEIN);
166}
167
168sub read_infodb_keys
169{
170 my $infodb_file_path = shift(@_);
171 my $infodb_map = shift(@_);
172
173 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
174 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
175
176 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
177
178 my $jdbkeys_cmd = "java -cp \"$classpath\" JdbKeys";
179
180 open (PIPEIN, "$jdbkeys_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from jdbmkeys \$infodb_file_path\"\n";
[23166]181 binmode(PIPEIN,":utf8");
[21411]182 my $infodb_line = "";
183 my $infodb_key = "";
184 my $infodb_value = "";
185 while (defined ($infodb_line = <PIPEIN>))
186 {
[28395]187 # chomp $infodb_line; # remove end of line
188 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
[21411]189
190 $infodb_map->{$infodb_line} = 1;
191 }
192
193 close (PIPEIN);
194}
195
196
197
198sub write_infodb_entry
199{
200
201 my $infodb_handle = shift(@_);
202 my $infodb_key = shift(@_);
203 my $infodb_map = shift(@_);
[28395]204
[21411]205 print $infodb_handle "[$infodb_key]\n";
206 foreach my $infodb_value_key (keys(%$infodb_map))
207 {
208 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
209 {
210 if ($infodb_value =~ /-{70,}/)
211 {
212 # if value contains 70 or more hyphens in a row we need to escape them
213 # to prevent txt2db from treating them as a separator
214 $infodb_value =~ s/-/&\#045;/gi;
215 }
216 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
217 }
218 }
219 print $infodb_handle '-' x 70, "\n";
220}
221
222
[21856]223sub write_infodb_rawentry
224{
225
226 my $infodb_handle = shift(@_);
227 my $infodb_key = shift(@_);
228 my $infodb_val = shift(@_);
229
230 print $infodb_handle "[$infodb_key]\n";
231 print $infodb_handle "$infodb_val\n";
232 print $infodb_handle '-' x 70, "\n";
233}
234
[23399]235sub set_infodb_entry
236{
[28108]237 my $infodb_file_path = shift(@_);
238 my $infodb_key = shift(@_);
239 my $infodb_map = shift(@_);
[23399]240
[28108]241 # HTML escape anything that is not part of the "contains" metadata value
242 foreach my $k (keys %$infodb_map) {
243 my @escaped_v = ();
244 foreach my $v (@{$infodb_map->{$k}}) {
245 if ($k eq "contains") {
246 push(@escaped_v, $v);
247 }
248 else {
249 my $ev = &ghtml::unescape_html($v);
250 push(@escaped_v, $ev);
251 }
252 }
253 $infodb_map->{$k} = \@escaped_v;
254 }
255
256 # Generate the record string
257 my $serialized_infodb_map = &dbutil::convert_infodb_hash_to_string($infodb_map);
258### print STDERR "**** ser dr\n$serialized_infodb_map\n\n\n";
259
260 # Store it into JDBM using 'Txt2Jdb .... -append' which despite its name
261 # actually replaces the record if it already exists
262
263 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
264 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
265
266 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
267
268 # Lop off file extension, as JDBM does not expect this to be present
269 $infodb_file_path =~ s/\.jdb$//;
270
[28395]271 if ($^O eq "cygwin") {
272 # Away to run a java program, using a binary that is native to Windows, so need
273 # Windows directory and path separators
274
275 $classpath = `cygpath -wp "$classpath"`;
276 chomp($classpath);
277 $classpath =~ s%\\%\\\\%g;
278
279 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
280 chomp($infodb_file_path);
281 $infodb_file_path =~ s%\\%\\\\%g;
282 }
283
[28108]284 my $cmd = "java -cp \"$classpath\" Txt2Jdb -append \"$infodb_file_path\"";
285
286 my $status = undef;
287 if(!open(GOUT, "| $cmd"))
288 {
289 print STDERR "Error: jdbm::set_infodb_entry() failed to open pipe to: $cmd\n";
290 print STDERR " $!\n";
291 $status = -1;
292 }
293 else {
294 binmode(GOUT,":utf8");
295
296 print GOUT "[$infodb_key]\n";
297 print GOUT "$serialized_infodb_map\n";
298
299 close(GOUT);
300 $status = 0; # as in exit status of cmd OK
301 }
302
303 return $status;
[23399]304}
305
306
[28108]307
[21411]308sub delete_infodb_entry
309{
310 my $infodb_handle = shift(@_);
311 my $infodb_key = shift(@_);
312
313 # A minus at the end of a key (after the ]) signifies 'delete'
314 print $infodb_handle "[$infodb_key]-\n";
315
316 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
317 print $infodb_handle '-' x 70, "\n";
318}
319
320
3211;
Note: See TracBrowser for help on using the repository browser.