source: main/trunk/greenstone2/perllib/dbutil/gdbmtxtgz.pm@ 27602

Last change on this file since 27602 was 27602, checked in by ak19, 11 years ago

Adding sorting on keys. Particularly necessary for diffcol.pl (automated testing), since otherwise the OS orders the keys coming out of the database and this ordering can differ.

File size: 5.2 KB
Line 
1###########################################################################
2#
3# dbutil::gdbmtxtgz -- utility functions for writing to gdbm-txtgz databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::gdbmtxtgz;
28
29use strict;
30
31
32
33# -----------------------------------------------------------------------------
34# GDBM TXT-GZ IMPLEMENTATION
35# -----------------------------------------------------------------------------
36
37sub open_infodb_write_handle
38{
39 # Keep infodb in GDBM neutral form => save data as compressed text file,
40 # read for txt2db to be run on it later (i.e. by the runtime system,
41 # first time the collection is ever accessed). This makes it easier
42 # distribute pre-built collections to various architectures.
43 #
44 # NB: even if two architectures are little endian (e.g. Intel and
45 # ARM procesors) GDBM does *not* guarantee that the database generated on
46 # one will work on the other
47
48 my $infodb_file_path = shift(@_);
49
50 # Greenstone ships with gzip for windows, on $PATH
51
52 my $infodb_file_handle = undef;
53 if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\""))
54 {
55 print STDERR "Error: Failed to open pipe to gzip - > \"$infodb_file_path\"\n";
56 print STDERR " $!\n";
57 return undef;
58 }
59 binmode($infodb_file_handle,":utf8");
60 return $infodb_file_handle;
61}
62
63
64sub close_infodb_write_handle
65{
66 my $infodb_handle = shift(@_);
67
68 close($infodb_handle);
69}
70
71
72sub get_infodb_file_path
73{
74 my $collection_name = shift(@_);
75 my $infodb_directory_path = shift(@_);
76
77 my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz";
78 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
79}
80
81
82sub read_infodb_file
83{
84 my $infodb_file_path = shift(@_);
85 my $infodb_map = shift(@_);
86
87 my $cmd = "gzip --decompress --to-stdout \"$infodb_file_path\"";
88
89 open (PIPEIN, "$cmd |")
90 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n";
91
92 binmode(PIPEIN,":utf8");
93 my $infodb_line = "";
94 my $infodb_key = "";
95 my $infodb_value = "";
96 while (defined ($infodb_line = <PIPEIN>))
97 {
98 if ($infodb_line =~ /^\[([^\]]+)\]$/)
99 {
100 $infodb_key = $1;
101 }
102 elsif ($infodb_line =~ /^-{70}$/)
103 {
104 $infodb_map->{$infodb_key} = $infodb_value;
105 $infodb_key = "";
106 $infodb_value = "";
107 }
108 else
109 {
110 $infodb_value .= $infodb_line;
111 }
112 }
113
114 close (PIPEIN);
115}
116
117
118sub read_infodb_keys
119{
120 my $infodb_file_path = shift(@_);
121 my $infodb_map = shift(@_);
122
123 my $cmd = "gzip --decompress --to-stdout \"$infodb_file_path\"";
124
125 open (PIPEIN, "$cmd |")
126 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n";
127
128 binmode(PIPEIN,":utf8");
129 my $infodb_line = "";
130 my $infodb_key = "";
131 while (defined ($infodb_line = <PIPEIN>))
132 {
133 if ($infodb_line =~ /^\[([^\]]+)\]$/)
134 {
135 $infodb_key = $1;
136 }
137 elsif ($infodb_line =~ /^-{70}$/)
138 {
139 $infodb_map->{$infodb_key} = 1;
140 $infodb_key = "";
141 }
142 }
143
144 close (PIPEIN);
145}
146
147
148sub write_infodb_entry
149{
150
151 my $infodb_handle = shift(@_);
152 my $infodb_key = shift(@_);
153 my $infodb_map = shift(@_);
154
155 print $infodb_handle "[$infodb_key]\n";
156 foreach my $infodb_value_key (sort keys(%$infodb_map))
157 {
158 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
159 {
160 if ($infodb_value =~ /-{70,}/)
161 {
162 # if value contains 70 or more hyphens in a row we need to escape them
163 # to prevent txt2db from treating them as a separator
164 $infodb_value =~ s/-/&\#045;/gi;
165 }
166 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
167 }
168 }
169 print $infodb_handle '-' x 70, "\n";
170}
171
172
173
174sub write_infodb_rawentry
175{
176
177 my $infodb_handle = shift(@_);
178 my $infodb_key = shift(@_);
179 my $infodb_val = shift(@_);
180
181 print $infodb_handle "[$infodb_key]\n";
182 print $infodb_handle "$infodb_val\n";
183 print $infodb_handle '-' x 70, "\n";
184}
185
186 sub set_infodb_entry
187{
188 my $infodb_file_path = shift(@_);
189 my $infodb_key = shift(@_);
190 my $infodb_map = shift(@_);
191
192 print STDERR "***** gdbmtxtgz::set_infodb_entry() not implemented yet!\n";
193}
194
195
196
197sub delete_infodb_entry
198{
199
200 my $infodb_handle = shift(@_);
201 my $infodb_key = shift(@_);
202
203
204 # A minus at the end of a key (after the ]) signifies 'delete'
205 print $infodb_handle "[$infodb_key]-\n";
206
207 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
208 print $infodb_handle '-' x 70, "\n";
209}
210
211
212
2131;
Note: See TracBrowser for help on using the repository browser.