source: gsdl/trunk/perllib/dbutil.pm@ 16841

Last change on this file since 16841 was 16726, checked in by mdewsnip, 16 years ago

Added quote marks around sqlite executable path so it works when Greenstone is installed in a path containing spaces.

File size: 9.4 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
31sub close_infodb_write_handle
32{
33 my $infodb_type = shift(@_);
34 my $infodb_handle = shift(@_);
35
36 if ($infodb_type eq "sqlite")
37 {
38 return &close_infodb_write_handle_sqlite($infodb_handle);
39 }
40
41 # Use GDBM if the infodb type is empty or not one of the values above
42 return &close_infodb_write_handle_gdbm($infodb_handle);
43}
44
45
46sub get_default_infodb_type
47{
48 return "gdbm";
49}
50
51
52sub get_infodb_file_path
53{
54 my $infodb_type = shift(@_);
55 my $collection_name = shift(@_);
56 my $infodb_directory_path = shift(@_);
57
58 if ($infodb_type eq "sqlite")
59 {
60 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
61 }
62
63 # Use GDBM if the infodb type is empty or not one of the values above
64 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
65}
66
67
68sub open_infodb_write_handle
69{
70 my $infodb_type = shift(@_);
71 my $infodb_file_path = shift(@_);
72
73 if ($infodb_type eq "sqlite")
74 {
75 return &open_infodb_write_handle_sqlite($infodb_file_path);
76 }
77
78 # Use GDBM if the infodb type is empty or not one of the values above
79 return &open_infodb_write_handle_gdbm($infodb_file_path);
80}
81
82
83sub read_infodb_file
84{
85 my $infodb_type = shift(@_);
86 my $infodb_file_path = shift(@_);
87 my $infodb_map = shift(@_);
88
89 if ($infodb_type eq "sqlite")
90 {
91 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
92 }
93
94 # Use GDBM if the infodb type is empty or not one of the values above
95 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
96}
97
98
99sub write_infodb_entry
100{
101 my $infodb_type = shift(@_);
102 my $infodb_handle = shift(@_);
103 my $infodb_key = shift(@_);
104 my $infodb_map = shift(@_);
105
106 if ($infodb_type eq "sqlite")
107 {
108 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
109 }
110
111 # Use GDBM if the infodb type is empty or not one of the values above
112 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
113}
114
115
116
117# ----------------------------------------------------------------------------------------
118# GDBM IMPLEMENTATION
119# ----------------------------------------------------------------------------------------
120
121sub close_infodb_write_handle_gdbm
122{
123 my $infodb_handle = shift(@_);
124
125 close($infodb_handle);
126}
127
128
129sub get_infodb_file_path_gdbm
130{
131 my $collection_name = shift(@_);
132 my $infodb_directory_path = shift(@_);
133
134 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
135 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
136 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
137}
138
139
140sub open_infodb_write_handle_gdbm
141{
142 my $infodb_file_path = shift(@_);
143
144 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
145 my $infodb_file_handle = undef;
146 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
147 {
148 return undef;
149 }
150
151 return $infodb_file_handle;
152}
153
154
155sub read_infodb_file_gdbm
156{
157 my $infodb_file_path = shift(@_);
158 my $infodb_map = shift(@_);
159
160 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
161 my $infodb_line = "";
162 my $infodb_key = "";
163 my $infodb_value = "";
164 while (defined ($infodb_line = <PIPEIN>))
165 {
166 if ($infodb_line =~ /^\[([^\]]+)\]$/)
167 {
168 $infodb_key = $1;
169 }
170 elsif ($infodb_line =~ /^-{70}$/)
171 {
172 $infodb_map->{$infodb_key} = $infodb_value;
173 $infodb_key = "";
174 $infodb_value = "";
175 }
176 else
177 {
178 $infodb_value .= $infodb_line;
179 }
180 }
181
182 close (PIPEIN);
183}
184
185
186sub write_infodb_entry_gdbm
187{
188 my $infodb_handle = shift(@_);
189 my $infodb_key = shift(@_);
190 my $infodb_map = shift(@_);
191
192 print $infodb_handle "[$infodb_key]\n";
193 foreach my $infodb_value_key (keys(%$infodb_map))
194 {
195 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
196 {
197 if ($infodb_value =~ /-{70,}/)
198 {
199 # if value contains 70 or more hyphens in a row we need to escape them
200 # to prevent txt2db from treating them as a separator
201 $infodb_value =~ s/-/&\#045;/gi;
202 }
203 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
204 }
205 }
206 print $infodb_handle '-' x 70, "\n";
207}
208
209
210
211# ----------------------------------------------------------------------------------------
212# SQLITE IMPLEMENTATION
213# ----------------------------------------------------------------------------------------
214
215sub close_infodb_write_handle_sqlite
216{
217 my $infodb_handle = shift(@_);
218
219 # Close the transaction we began after opening the file
220 print $infodb_handle "END TRANSACTION;\n";
221
222 # This is crucial for efficient queries on the database!
223 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
224
225 close($infodb_handle);
226}
227
228
229sub get_infodb_file_path_sqlite
230{
231 my $collection_name = shift(@_);
232 my $infodb_directory_path = shift(@_);
233
234 my $infodb_file_extension = ".db";
235 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
236 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
237}
238
239
240sub open_infodb_write_handle_sqlite
241{
242 my $infodb_file_path = shift(@_);
243
244 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
245 my $infodb_handle = undef;
246 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
247 {
248 return undef;
249 }
250
251 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
252 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
253
254 # This is crucial for efficiency when importing large amounts of data
255 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
256
257 # This is very important for efficiency, otherwise each command will be actioned one at a time
258 print $infodb_handle "BEGIN TRANSACTION;\n";
259
260 return $infodb_handle;
261}
262
263
264sub read_infodb_file_sqlite
265{
266 my $infodb_file_path = shift(@_);
267 my $infodb_map = shift(@_);
268
269 # !! TO IMPLEMENT
270}
271
272
273sub write_infodb_entry_sqlite
274{
275 my $infodb_handle = shift(@_);
276 my $infodb_key = shift(@_);
277 my $infodb_map = shift(@_);
278
279 # Add the key -> value mapping into the "data" table
280 my $infodb_entry_value = "";
281 foreach my $infodb_value_key (keys(%$infodb_map))
282 {
283 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
284 {
285 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
286 }
287 }
288
289 my $safe_infodb_key = &sqlite_safe($infodb_key);
290 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
291
292 # If this infodb entry is for a document, add all the interesting document metadata to the
293 # "document_metadata" table (for use by the dynamic classifiers)
294 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
295 {
296 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
297
298 foreach my $infodb_value_key (keys(%$infodb_map))
299 {
300 # We're not interested in most of the automatically added document metadata
301 next if ($infodb_value_key eq "archivedir" ||
302 $infodb_value_key eq "assocfilepath" ||
303 $infodb_value_key eq "childtype" ||
304 $infodb_value_key eq "contains" ||
305 $infodb_value_key eq "docnum" ||
306 $infodb_value_key eq "doctype" ||
307 $infodb_value_key eq "Encoding" ||
308 $infodb_value_key eq "FileSize" ||
309 $infodb_value_key eq "hascover" ||
310 $infodb_value_key eq "hastxt" ||
311 $infodb_value_key eq "lastmodified" ||
312 $infodb_value_key eq "metadataset" ||
313 $infodb_value_key eq "thistype" ||
314 $infodb_value_key =~ /^metadatafreq\-/ ||
315 $infodb_value_key =~ /^metadatalist\-/);
316
317 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
318 {
319 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
320 }
321 }
322 }
323}
324
325
326sub sqlite_safe
327{
328 my $value = shift(@_);
329
330 # Escape any single quotes in the value
331 $value =~ s/\'/\'\'/g;
332
333 return $value;
334}
335
336
3371;
Note: See TracBrowser for help on using the repository browser.