source: gs2-extensions/tdb/trunk/perllib/DBDrivers/JDBM.pm@ 30318

Last change on this file since 30318 was 30318, checked in by jmt12, 8 years ago

Initial checkin of object-oriented rewrite of the dbutils stuff to bring it more into line with plugins and classifiers.

  • Property svn:executable set to *
File size: 9.1 KB
Line 
1###############################################################################
2#
3# DBDrivers/JDBM.pm -- utility functions for writing to jdbm databases
4#
5# A component of the Greenstone digital library software from the New Zealand
6# Digital Library Project at the University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2015 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify it under
11# the terms of the GNU General Public License as published by the Free Software
12# Foundation; either version 2 of the License, or (at your option) any later
13# version.
14#
15# This program is distributed in the hope that it will be useful, but WITHOUT
16# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18# more details.
19#
20# You should have received a copy of the GNU General Public License along with
21# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
22# Ave, Cambridge, MA 02139, USA.
23#
24###############################################################################
25
26package DBDrivers::JDBM;
27
28# Pragma
29use strict;
30
31# Libraries
32use util;
33use FileUtils;
34use DBDrivers::BaseDBDriver;
35
36sub BEGIN
37{
38 @DBDrivers::JDBM::ISA = ( 'DBDrivers::BaseDBDriver' );
39}
40
41sub new
42{
43 my $class = shift(@_);
44 return bless ($self, $class);
45}
46
47# -----------------------------------------------------------------------------
48# JDBM IMPLEMENTATION
49# -----------------------------------------------------------------------------
50
51# When DBUtil::* is properly structured with inheritence, then
52# much of this code (along with GDBM and GDBM-TXT-GZ) can be grouped into
53# a shared base class. Really it is only the the command that needs to
54# be constructed that changes between much of the code that is used
55
56sub open_infodb_write_handle
57{
58 my $infodb_file_path = shift(@_);
59 my $opt_append = shift(@_);
60
61 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
62 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
63
64 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
65
66 if ($^O eq "cygwin") {
67 # Away to run a java program, using a binary that is native to Windows, so need
68 # Windows directory and path separators
69
70 $classpath = `cygpath -wp "$classpath"`;
71 chomp($classpath);
72 $classpath =~ s%\\%\\\\%g;
73 }
74
75 my $infodb_file_handle = undef;
76 my $txt2jdb_cmd = "java -cp \"$classpath\" Txt2Jdb";
77
78 if ((defined $opt_append) && ($opt_append eq "append")) {
79 $txt2jdb_cmd .= " -append";
80 print STDERR "Append operation to $infodb_file_path\n";
81 }
82 else {
83 print STDERR "Create database $infodb_file_path\n";
84 }
85
86 # Lop off file extension, as JDBM does not expect this to be present
87 $infodb_file_path =~ s/\.jdb$//;
88
89 if ($^O eq "cygwin") {
90 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
91 chomp($infodb_file_path);
92 $infodb_file_path =~ s%\\%\\\\%g;
93 }
94
95 $txt2jdb_cmd .= " \"$infodb_file_path\"";
96
97 if (!open($infodb_file_handle, "| $txt2jdb_cmd"))
98 {
99 print STDERR "Error: Failed to open pipe to $txt2jdb_cmd";
100 print STDERR " $!\n";
101 return undef;
102 }
103
104 binmode($infodb_file_handle,":utf8");
105 return $infodb_file_handle;
106}
107
108
109
110sub close_infodb_write_handle
111{
112 my $infodb_handle = shift(@_);
113
114 close($infodb_handle);
115}
116
117
118sub get_infodb_file_path
119{
120 my $collection_name = shift(@_);
121 my $infodb_directory_path = shift(@_);
122
123 my $infodb_file_extension = ".jdb";
124 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
125 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
126}
127
128
129sub read_infodb_file
130{
131 my $infodb_file_path = shift(@_);
132 my $infodb_map = shift(@_);
133
134 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
135 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
136
137 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
138
139 if ($^O eq "cygwin") {
140 # Away to run a java program, using a binary that is native to Windows, so need
141 # Windows directory and path separators
142
143 $classpath = `cygpath -wp "$classpath"`;
144 chomp($classpath);
145 $classpath =~ s%\\%\\\\%g;
146
147 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
148 chomp($infodb_file_path);
149 $infodb_file_path =~ s%\\%\\\\%g;
150 }
151
152 my $jdb2txt_cmd = "java -cp \"$classpath\" Jdb2Txt";
153
154 open (PIPEIN, "$jdb2txt_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
155 binmode(PIPEIN,":utf8");
156 my $infodb_line = "";
157 my $infodb_key = "";
158 my $infodb_value = "";
159 while (defined ($infodb_line = <PIPEIN>))
160 {
161 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
162
163 if ($infodb_line =~ /^\[([^\]]+)\]$/)
164 {
165 $infodb_key = $1;
166 }
167 elsif ($infodb_line =~ /^-{70}$/)
168 {
169 $infodb_map->{$infodb_key} = $infodb_value;
170 $infodb_key = "";
171 $infodb_value = "";
172 }
173 else
174 {
175 $infodb_value .= $infodb_line;
176 }
177 }
178
179 close (PIPEIN);
180}
181
182sub read_infodb_keys
183{
184 my $infodb_file_path = shift(@_);
185 my $infodb_map = shift(@_);
186
187 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
188 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
189
190 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
191
192 my $jdbkeys_cmd = "java -cp \"$classpath\" JdbKeys";
193
194 open (PIPEIN, "$jdbkeys_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from jdbmkeys \$infodb_file_path\"\n";
195 binmode(PIPEIN,":utf8");
196 my $infodb_line = "";
197 my $infodb_key = "";
198 my $infodb_value = "";
199 while (defined ($infodb_line = <PIPEIN>))
200 {
201 # chomp $infodb_line; # remove end of line
202 $infodb_line =~ s/(\r\n)+$//; # more general than chomp
203
204 $infodb_map->{$infodb_line} = 1;
205 }
206
207 close (PIPEIN);
208}
209
210
211
212sub write_infodb_entry
213{
214
215 my $infodb_handle = shift(@_);
216 my $infodb_key = shift(@_);
217 my $infodb_map = shift(@_);
218
219 print $infodb_handle "[$infodb_key]\n";
220 foreach my $infodb_value_key (keys(%$infodb_map))
221 {
222 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
223 {
224 if ($infodb_value =~ /-{70,}/)
225 {
226 # if value contains 70 or more hyphens in a row we need to escape them
227 # to prevent txt2db from treating them as a separator
228 $infodb_value =~ s/-/&\#045;/gi;
229 }
230 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
231 }
232 }
233 print $infodb_handle '-' x 70, "\n";
234}
235
236
237sub write_infodb_rawentry
238{
239
240 my $infodb_handle = shift(@_);
241 my $infodb_key = shift(@_);
242 my $infodb_val = shift(@_);
243
244 print $infodb_handle "[$infodb_key]\n";
245 print $infodb_handle "$infodb_val\n";
246 print $infodb_handle '-' x 70, "\n";
247}
248
249sub set_infodb_entry
250{
251 my $infodb_file_path = shift(@_);
252 my $infodb_key = shift(@_);
253 my $infodb_map = shift(@_);
254
255 # HTML escape anything that is not part of the "contains" metadata value
256 foreach my $k (keys %$infodb_map) {
257 my @escaped_v = ();
258 foreach my $v (@{$infodb_map->{$k}}) {
259 if ($k eq "contains") {
260 push(@escaped_v, $v);
261 }
262 else {
263 my $ev = &ghtml::unescape_html($v);
264 push(@escaped_v, $ev);
265 }
266 }
267 $infodb_map->{$k} = \@escaped_v;
268 }
269
270 # Generate the record string
271 my $serialized_infodb_map = &dbutil::convert_infodb_hash_to_string($infodb_map);
272### print STDERR "**** ser dr\n$serialized_infodb_map\n\n\n";
273
274 # Store it into JDBM using 'Txt2Jdb .... -append' which despite its name
275 # actually replaces the record if it already exists
276
277 my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
278 my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
279
280 my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
281
282 # Lop off file extension, as JDBM does not expect this to be present
283 $infodb_file_path =~ s/\.jdb$//;
284
285 if ($^O eq "cygwin") {
286 # Away to run a java program, using a binary that is native to Windows, so need
287 # Windows directory and path separators
288
289 $classpath = `cygpath -wp "$classpath"`;
290 chomp($classpath);
291 $classpath =~ s%\\%\\\\%g;
292
293 $infodb_file_path = `cygpath -w "$infodb_file_path"`;
294 chomp($infodb_file_path);
295 $infodb_file_path =~ s%\\%\\\\%g;
296 }
297
298 my $cmd = "java -cp \"$classpath\" Txt2Jdb -append \"$infodb_file_path\"";
299
300 my $status = undef;
301 if(!open(GOUT, "| $cmd"))
302 {
303 print STDERR "Error: jdbm::set_infodb_entry() failed to open pipe to: $cmd\n";
304 print STDERR " $!\n";
305 $status = -1;
306 }
307 else {
308 binmode(GOUT,":utf8");
309
310 print GOUT "[$infodb_key]\n";
311 print GOUT "$serialized_infodb_map\n";
312
313 close(GOUT);
314 $status = 0; # as in exit status of cmd OK
315 }
316
317 return $status;
318}
319
320
321
322
323sub delete_infodb_entry
324{
325 my $infodb_handle = shift(@_);
326 my $infodb_key = shift(@_);
327
328 # A minus at the end of a key (after the ]) signifies 'delete'
329 print $infodb_handle "[$infodb_key]-\n";
330
331 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
332 print $infodb_handle '-' x 70, "\n";
333}
334
3351;
Note: See TracBrowser for help on using the repository browser.