root/main/trunk/greenstone2/perllib/dbutil/jdbm.pm @ 31208

Revision 31208, 8.8 KB (checked in by ak19, 3 years ago)

Kathy found that the lowercased dbutil modules are not used (jdbm.pm, for example) and have been wholly replaced by the uppercased versions in DBDrivers folder. (I thought we had both). So I've moved the recently added code in jdbm.pm to JDBM.pm and fixed it up. Also tiedied up oaiinfo.pm of some unwanted commented out code.

Line 
1###########################################################################
2#
3# dbutil::jdbm -- utility functions for writing to jdbm databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::jdbm;
28
29use strict;
30
31
32# -----------------------------------------------------------------------------
33#   JDBM IMPLEMENTATION
34# -----------------------------------------------------------------------------
35
36# When DBUtil::* is properly structured with inheritence, then
37# much of this code (along with GDBM and GDBM-TXT-GZ) can be grouped into
38# a shared base class.  Really it is only the the command that needs to
39# be constructed that changes between much of the code that is used
40
41
42sub open_infodb_write_handle
43{
44  my $infodb_file_path = shift(@_);
45  my $opt_append = shift(@_);
46
47  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
48  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
49
50  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
51
52  if ($^O eq "cygwin") {
53      # Away to run a java program, using a binary that is native to Windows, so need
54      # Windows directory and path separators
55
56      $classpath = `cygpath -wp "$classpath"`;
57      chomp($classpath);
58      $classpath =~ s%\\%\\\\%g;
59  }
60
61  my $infodb_file_handle = undef;
62  my $txt2jdb_cmd = "java -cp \"$classpath\" Txt2Jdb";
63
64  if ((defined $opt_append) && ($opt_append eq "append")) {
65      $txt2jdb_cmd .= " -append";
66      print STDERR "Append operation to $infodb_file_path\n";
67  }
68  else {
69      print STDERR "Create database $infodb_file_path\n";
70  }
71 
72  # Lop off file extension, as JDBM does not expect this to be present
73  $infodb_file_path =~ s/\.jdb$//;
74
75  if ($^O eq "cygwin") {
76      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
77      chomp($infodb_file_path);
78      $infodb_file_path =~ s%\\%\\\\%g;
79  }
80
81  $txt2jdb_cmd .= " \"$infodb_file_path\"";
82
83  if (!open($infodb_file_handle, "| $txt2jdb_cmd"))
84  {
85      print STDERR "Error: Failed to open pipe to $txt2jdb_cmd";
86      print STDERR "       $!\n";
87      return undef;
88  }
89 
90  binmode($infodb_file_handle,":utf8");
91  return $infodb_file_handle;
92}
93
94
95
96sub close_infodb_write_handle
97{
98  my $infodb_handle = shift(@_);
99
100  close($infodb_handle);
101}
102
103
104sub get_infodb_file_path
105{
106  my $collection_name = shift(@_);
107  my $infodb_directory_path = shift(@_);
108
109  my $infodb_file_extension = ".jdb";
110  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
111  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
112}
113
114
115sub read_infodb_file
116{
117  my $infodb_file_path = shift(@_);
118  my $infodb_map = shift(@_);
119
120  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
121  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
122
123  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
124
125  if ($^O eq "cygwin") {
126      # Away to run a java program, using a binary that is native to Windows, so need
127      # Windows directory and path separators
128     
129      $classpath = `cygpath -wp "$classpath"`;
130      chomp($classpath);
131      $classpath =~ s%\\%\\\\%g;
132
133      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
134      chomp($infodb_file_path);
135      $infodb_file_path =~ s%\\%\\\\%g;
136  }
137
138  my $jdb2txt_cmd = "java -cp \"$classpath\" Jdb2Txt";
139
140  open (PIPEIN, "$jdb2txt_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
141  binmode(PIPEIN,":utf8");
142  my $infodb_line = "";
143  my $infodb_key = "";
144  my $infodb_value = "";
145  while (defined ($infodb_line = <PIPEIN>))
146  {
147    $infodb_line =~ s/(\r\n)+$//; # more general than chomp
148
149    if ($infodb_line =~ /^\[([^\]]+)\]$/)
150    {
151      $infodb_key = $1;
152    }
153    elsif ($infodb_line =~ /^-{70}$/)
154    {
155      $infodb_map->{$infodb_key} = $infodb_value;
156      $infodb_key = "";
157      $infodb_value = "";
158    }
159    else
160    {
161      $infodb_value .= $infodb_line;
162    }
163  }
164
165  close (PIPEIN);
166}
167
168sub read_infodb_keys
169{
170  my $infodb_file_path = shift(@_);
171  my $infodb_map = shift(@_);
172
173  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
174  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
175
176  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
177
178  my $jdbkeys_cmd = "java -cp \"$classpath\" JdbKeys";
179
180  open (PIPEIN, "$jdbkeys_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from jdbmkeys \$infodb_file_path\"\n";
181  binmode(PIPEIN,":utf8");
182  my $infodb_line = "";
183  my $infodb_key = "";
184  my $infodb_value = "";
185  while (defined ($infodb_line = <PIPEIN>))
186  {
187      # chomp $infodb_line; # remove end of line
188      $infodb_line =~ s/(\r\n)+$//; # more general than chomp
189
190      $infodb_map->{$infodb_line} = 1;
191  }
192
193  close (PIPEIN);
194}
195
196
197   
198sub write_infodb_entry
199{
200
201  my $infodb_handle = shift(@_);
202  my $infodb_key = shift(@_);
203  my $infodb_map = shift(@_);
204
205  print $infodb_handle "[$infodb_key]\n";
206  foreach my $infodb_value_key (keys(%$infodb_map))
207  {
208    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
209    {
210      if ($infodb_value =~ /-{70,}/)
211      {
212        # if value contains 70 or more hyphens in a row we need to escape them
213        # to prevent txt2db from treating them as a separator
214        $infodb_value =~ s/-/&\#045;/gi;
215      }
216      print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
217    }
218  }
219  print $infodb_handle '-' x 70, "\n";
220}
221
222
223sub write_infodb_rawentry
224{
225
226  my $infodb_handle = shift(@_);
227  my $infodb_key = shift(@_);
228  my $infodb_val = shift(@_);
229 
230  print $infodb_handle "[$infodb_key]\n";
231  print $infodb_handle "$infodb_val\n";
232  print $infodb_handle '-' x 70, "\n";
233}
234
235sub set_infodb_entry
236{
237    my $infodb_file_path = shift(@_);
238    my $infodb_key = shift(@_);
239    my $infodb_map = shift(@_);
240 
241    # HTML escape anything that is not part of the "contains" metadata value
242    foreach my $k (keys %$infodb_map) {
243      my @escaped_v = ();
244      foreach my $v (@{$infodb_map->{$k}}) {
245        if ($k eq "contains") {
246          push(@escaped_v, $v);
247        }
248        else {
249          my $ev = &ghtml::unescape_html($v);
250          push(@escaped_v, $ev);
251        }
252      }
253      $infodb_map->{$k} = \@escaped_v;
254    }
255   
256    # Generate the record string
257    my $serialized_infodb_map = &dbutil::convert_infodb_hash_to_string($infodb_map);
258###    print STDERR "**** ser dr\n$serialized_infodb_map\n\n\n";
259
260    # Store it into JDBM using 'Txt2Jdb .... -append' which despite its name
261    # actually replaces the record if it already exists
262
263    my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
264    my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
265   
266    my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
267
268    # Lop off file extension, as JDBM does not expect this to be present
269    $infodb_file_path =~ s/\.jdb$//;
270
271    if ($^O eq "cygwin") {
272    # Away to run a java program, using a binary that is native to Windows, so need
273    # Windows directory and path separators
274   
275      $classpath = `cygpath -wp "$classpath"`;
276      chomp($classpath);
277      $classpath =~ s%\\%\\\\%g;
278
279      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
280      chomp($infodb_file_path);
281      $infodb_file_path =~ s%\\%\\\\%g;
282    }
283
284    my $cmd = "java -cp \"$classpath\" Txt2Jdb -append \"$infodb_file_path\"";
285
286    my $status = undef;
287    if(!open(GOUT, "| $cmd"))
288    {
289    print STDERR "Error: jdbm::set_infodb_entry() failed to open pipe to: $cmd\n";
290    print STDERR "       $!\n";
291    $status = -1;
292    }
293    else {
294    binmode(GOUT,":utf8");
295   
296    print GOUT "[$infodb_key]\n";
297    print GOUT "$serialized_infodb_map\n";
298
299    close(GOUT);
300    $status = 0; # as in exit status of cmd OK
301    }
302
303    return $status; 
304}
305
306
307
308sub delete_infodb_entry
309{
310  my $infodb_handle = shift(@_);
311  my $infodb_key = shift(@_);
312 
313  # A minus at the end of a key (after the ]) signifies 'delete'
314  print $infodb_handle "[$infodb_key]-\n";
315
316  # The 70 minus signs are also needed, to help make the parsing by db2txt simple
317  print $infodb_handle '-' x 70, "\n";
318}
319
320
3211;
Note: See TracBrowser for help on using the browser.