root/gs2-extensions/tdb/trunk/perllib/DBDrivers/JDBM.pm @ 30318

Revision 30318, 9.1 KB (checked in by jmt12, 5 years ago)

Initial checkin of object-oriented rewrite of the dbutils stuff to bring it more into line with plugins and classifiers.

  • Property svn:executable set to *
Line 
1###############################################################################
2#
3# DBDrivers/JDBM.pm -- utility functions for writing to jdbm databases
4#
5# A component of the Greenstone digital library software from the New Zealand
6# Digital Library Project at the University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2015 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify it under
11# the terms of the GNU General Public License as published by the Free Software
12# Foundation; either version 2 of the License, or (at your option) any later
13# version.
14#
15# This program is distributed in the hope that it will be useful, but WITHOUT
16# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18# more details.
19#
20# You should have received a copy of the GNU General Public License along with
21# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
22# Ave, Cambridge, MA 02139, USA.
23#
24###############################################################################
25
26package DBDrivers::JDBM;
27
28# Pragma
29use strict;
30
31# Libraries
32use util;
33use FileUtils;
34use DBDrivers::BaseDBDriver;
35
36sub BEGIN
37{
38    @DBDrivers::JDBM::ISA = ( 'DBDrivers::BaseDBDriver' );
39}
40
41sub new
42{
43    my $class = shift(@_);
44    return bless ($self, $class);
45}
46
47# -----------------------------------------------------------------------------
48#   JDBM IMPLEMENTATION
49# -----------------------------------------------------------------------------
50
51# When DBUtil::* is properly structured with inheritence, then
52# much of this code (along with GDBM and GDBM-TXT-GZ) can be grouped into
53# a shared base class.  Really it is only the the command that needs to
54# be constructed that changes between much of the code that is used
55
56sub open_infodb_write_handle
57{
58  my $infodb_file_path = shift(@_);
59  my $opt_append = shift(@_);
60
61  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
62  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
63
64  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
65
66  if ($^O eq "cygwin") {
67      # Away to run a java program, using a binary that is native to Windows, so need
68      # Windows directory and path separators
69
70      $classpath = `cygpath -wp "$classpath"`;
71      chomp($classpath);
72      $classpath =~ s%\\%\\\\%g;
73  }
74
75  my $infodb_file_handle = undef;
76  my $txt2jdb_cmd = "java -cp \"$classpath\" Txt2Jdb";
77
78  if ((defined $opt_append) && ($opt_append eq "append")) {
79      $txt2jdb_cmd .= " -append";
80      print STDERR "Append operation to $infodb_file_path\n";
81  }
82  else {
83      print STDERR "Create database $infodb_file_path\n";
84  }
85 
86  # Lop off file extension, as JDBM does not expect this to be present
87  $infodb_file_path =~ s/\.jdb$//;
88
89  if ($^O eq "cygwin") {
90      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
91      chomp($infodb_file_path);
92      $infodb_file_path =~ s%\\%\\\\%g;
93  }
94
95  $txt2jdb_cmd .= " \"$infodb_file_path\"";
96
97  if (!open($infodb_file_handle, "| $txt2jdb_cmd"))
98  {
99      print STDERR "Error: Failed to open pipe to $txt2jdb_cmd";
100      print STDERR "       $!\n";
101      return undef;
102  }
103 
104  binmode($infodb_file_handle,":utf8");
105  return $infodb_file_handle;
106}
107
108
109
110sub close_infodb_write_handle
111{
112  my $infodb_handle = shift(@_);
113
114  close($infodb_handle);
115}
116
117
118sub get_infodb_file_path
119{
120  my $collection_name = shift(@_);
121  my $infodb_directory_path = shift(@_);
122
123  my $infodb_file_extension = ".jdb";
124  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
125  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
126}
127
128
129sub read_infodb_file
130{
131  my $infodb_file_path = shift(@_);
132  my $infodb_map = shift(@_);
133
134  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
135  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
136
137  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
138
139  if ($^O eq "cygwin") {
140      # Away to run a java program, using a binary that is native to Windows, so need
141      # Windows directory and path separators
142     
143      $classpath = `cygpath -wp "$classpath"`;
144      chomp($classpath);
145      $classpath =~ s%\\%\\\\%g;
146
147      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
148      chomp($infodb_file_path);
149      $infodb_file_path =~ s%\\%\\\\%g;
150  }
151
152  my $jdb2txt_cmd = "java -cp \"$classpath\" Jdb2Txt";
153
154  open (PIPEIN, "$jdb2txt_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
155  binmode(PIPEIN,":utf8");
156  my $infodb_line = "";
157  my $infodb_key = "";
158  my $infodb_value = "";
159  while (defined ($infodb_line = <PIPEIN>))
160  {
161    $infodb_line =~ s/(\r\n)+$//; # more general than chomp
162
163    if ($infodb_line =~ /^\[([^\]]+)\]$/)
164    {
165      $infodb_key = $1;
166    }
167    elsif ($infodb_line =~ /^-{70}$/)
168    {
169      $infodb_map->{$infodb_key} = $infodb_value;
170      $infodb_key = "";
171      $infodb_value = "";
172    }
173    else
174    {
175      $infodb_value .= $infodb_line;
176    }
177  }
178
179  close (PIPEIN);
180}
181
182sub read_infodb_keys
183{
184  my $infodb_file_path = shift(@_);
185  my $infodb_map = shift(@_);
186
187  my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
188  my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
189
190  my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
191
192  my $jdbkeys_cmd = "java -cp \"$classpath\" JdbKeys";
193
194  open (PIPEIN, "$jdbkeys_cmd \"$infodb_file_path\" |") || die "couldn't open pipe from jdbmkeys \$infodb_file_path\"\n";
195  binmode(PIPEIN,":utf8");
196  my $infodb_line = "";
197  my $infodb_key = "";
198  my $infodb_value = "";
199  while (defined ($infodb_line = <PIPEIN>))
200  {
201      # chomp $infodb_line; # remove end of line
202      $infodb_line =~ s/(\r\n)+$//; # more general than chomp
203
204      $infodb_map->{$infodb_line} = 1;
205  }
206
207  close (PIPEIN);
208}
209
210
211   
212sub write_infodb_entry
213{
214
215  my $infodb_handle = shift(@_);
216  my $infodb_key = shift(@_);
217  my $infodb_map = shift(@_);
218
219  print $infodb_handle "[$infodb_key]\n";
220  foreach my $infodb_value_key (keys(%$infodb_map))
221  {
222    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
223    {
224      if ($infodb_value =~ /-{70,}/)
225      {
226        # if value contains 70 or more hyphens in a row we need to escape them
227        # to prevent txt2db from treating them as a separator
228        $infodb_value =~ s/-/&\#045;/gi;
229      }
230      print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
231    }
232  }
233  print $infodb_handle '-' x 70, "\n";
234}
235
236
237sub write_infodb_rawentry
238{
239
240  my $infodb_handle = shift(@_);
241  my $infodb_key = shift(@_);
242  my $infodb_val = shift(@_);
243 
244  print $infodb_handle "[$infodb_key]\n";
245  print $infodb_handle "$infodb_val\n";
246  print $infodb_handle '-' x 70, "\n";
247}
248
249sub set_infodb_entry
250{
251    my $infodb_file_path = shift(@_);
252    my $infodb_key = shift(@_);
253    my $infodb_map = shift(@_);
254 
255    # HTML escape anything that is not part of the "contains" metadata value
256    foreach my $k (keys %$infodb_map) {
257      my @escaped_v = ();
258      foreach my $v (@{$infodb_map->{$k}}) {
259        if ($k eq "contains") {
260          push(@escaped_v, $v);
261        }
262        else {
263          my $ev = &ghtml::unescape_html($v);
264          push(@escaped_v, $ev);
265        }
266      }
267      $infodb_map->{$k} = \@escaped_v;
268    }
269   
270    # Generate the record string
271    my $serialized_infodb_map = &dbutil::convert_infodb_hash_to_string($infodb_map);
272###    print STDERR "**** ser dr\n$serialized_infodb_map\n\n\n";
273
274    # Store it into JDBM using 'Txt2Jdb .... -append' which despite its name
275    # actually replaces the record if it already exists
276
277    my $jdbmwrap_jar = &util::filename_cat($ENV{'GSDLHOME'},"bin","java", "JDBMWrapper.jar");
278    my $jdbm_jar = &util::filename_cat($ENV{'GSDLHOME'},"lib","java", "jdbm.jar");
279   
280    my $classpath = &util::pathname_cat($jdbmwrap_jar,$jdbm_jar);
281
282    # Lop off file extension, as JDBM does not expect this to be present
283    $infodb_file_path =~ s/\.jdb$//;
284
285    if ($^O eq "cygwin") {
286    # Away to run a java program, using a binary that is native to Windows, so need
287    # Windows directory and path separators
288   
289      $classpath = `cygpath -wp "$classpath"`;
290      chomp($classpath);
291      $classpath =~ s%\\%\\\\%g;
292
293      $infodb_file_path = `cygpath -w "$infodb_file_path"`;
294      chomp($infodb_file_path);
295      $infodb_file_path =~ s%\\%\\\\%g;
296    }
297
298    my $cmd = "java -cp \"$classpath\" Txt2Jdb -append \"$infodb_file_path\"";
299
300    my $status = undef;
301    if(!open(GOUT, "| $cmd"))
302    {
303    print STDERR "Error: jdbm::set_infodb_entry() failed to open pipe to: $cmd\n";
304    print STDERR "       $!\n";
305    $status = -1;
306    }
307    else {
308    binmode(GOUT,":utf8");
309   
310    print GOUT "[$infodb_key]\n";
311    print GOUT "$serialized_infodb_map\n";
312
313    close(GOUT);
314    $status = 0; # as in exit status of cmd OK
315    }
316
317    return $status; 
318}
319
320
321
322
323sub delete_infodb_entry
324{
325  my $infodb_handle = shift(@_);
326  my $infodb_key = shift(@_);
327 
328  # A minus at the end of a key (after the ]) signifies 'delete'
329  print $infodb_handle "[$infodb_key]-\n";
330
331  # The 70 minus signs are also needed, to help make the parsing by db2txt simple
332  print $infodb_handle '-' x 70, "\n";
333}
334
3351;
Note: See TracBrowser for help on using the browser.