source: gsdl/trunk/perllib/dbutil.pm@ 16339

Last change on this file since 16339 was 16339, checked in by davidb, 16 years ago

Added quotes around exec of 'txt2db' so it will work from within a folder with spaces in its name

File size: 9.4 KB
RevLine 
[15699]1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
[16176]31sub close_infodb_write_handle
32{
33 my $infodb_type = shift(@_);
34 my $infodb_handle = shift(@_);
35
36 if ($infodb_type eq "sqlite")
37 {
38 return &close_infodb_write_handle_sqlite($infodb_handle);
39 }
40
41 # Use GDBM if the infodb type is empty or not one of the values above
42 return &close_infodb_write_handle_gdbm($infodb_handle);
43}
44
45
[15727]46sub get_default_infodb_type
47{
48 return "gdbm";
49}
50
51
[15710]52sub get_infodb_file_path
53{
[15725]54 my $infodb_type = shift(@_);
[15710]55 my $collection_name = shift(@_);
56 my $infodb_directory_path = shift(@_);
57
[15725]58 if ($infodb_type eq "sqlite")
59 {
60 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
61 }
62
63 # Use GDBM if the infodb type is empty or not one of the values above
[15710]64 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
65}
66
67
[15711]68sub open_infodb_write_handle
69{
[15725]70 my $infodb_type = shift(@_);
[15711]71 my $infodb_file_path = shift(@_);
72
[15725]73 if ($infodb_type eq "sqlite")
74 {
75 return &open_infodb_write_handle_sqlite($infodb_file_path);
76 }
77
78 # Use GDBM if the infodb type is empty or not one of the values above
[15711]79 return &open_infodb_write_handle_gdbm($infodb_file_path);
80}
81
82
[15705]83sub read_infodb_file
84{
[15725]85 my $infodb_type = shift(@_);
[15705]86 my $infodb_file_path = shift(@_);
87 my $infodb_map = shift(@_);
88
[15746]89 if ($infodb_type eq "sqlite")
90 {
91 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
92 }
93
94 # Use GDBM if the infodb type is empty or not one of the values above
95 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
[15705]96}
97
98
[15699]99sub write_infodb_entry
100{
[15725]101 my $infodb_type = shift(@_);
[15699]102 my $infodb_handle = shift(@_);
103 my $infodb_key = shift(@_);
104 my $infodb_map = shift(@_);
105
[15725]106 if ($infodb_type eq "sqlite")
107 {
108 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
109 }
110
111 # Use GDBM if the infodb type is empty or not one of the values above
112 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
[15699]113}
114
[15705]115
116
117# ----------------------------------------------------------------------------------------
[15746]118# GDBM IMPLEMENTATION
[15705]119# ----------------------------------------------------------------------------------------
120
[16176]121sub close_infodb_write_handle_gdbm
122{
123 my $infodb_handle = shift(@_);
124
125 close($infodb_handle);
126}
127
128
[15710]129sub get_infodb_file_path_gdbm
130{
131 my $collection_name = shift(@_);
132 my $infodb_directory_path = shift(@_);
133
134 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
135 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
136 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
137}
138
139
[15711]140sub open_infodb_write_handle_gdbm
141{
142 my $infodb_file_path = shift(@_);
143
144 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
[15717]145 my $infodb_file_handle = undef;
[16339]146 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
[15711]147 {
148 return undef;
149 }
150
[15717]151 return $infodb_file_handle;
[15711]152}
153
154
[15705]155sub read_infodb_file_gdbm
156{
157 my $infodb_file_path = shift(@_);
158 my $infodb_map = shift(@_);
159
160 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
161 my $infodb_line = "";
162 my $infodb_key = "";
163 my $infodb_value = "";
164 while (defined ($infodb_line = <PIPEIN>))
165 {
166 if ($infodb_line =~ /^\[([^\]]+)\]$/)
167 {
168 $infodb_key = $1;
169 }
170 elsif ($infodb_line =~ /^-{70}$/)
171 {
172 $infodb_map->{$infodb_key} = $infodb_value;
173 $infodb_key = "";
174 $infodb_value = "";
175 }
176 else
177 {
178 $infodb_value .= $infodb_line;
179 }
180 }
181
182 close (PIPEIN);
183}
184
[15699]185
186sub write_infodb_entry_gdbm
187{
188 my $infodb_handle = shift(@_);
189 my $infodb_key = shift(@_);
190 my $infodb_map = shift(@_);
191
192 print $infodb_handle "[$infodb_key]\n";
193 foreach my $infodb_value_key (keys(%$infodb_map))
194 {
195 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
196 {
197 if ($infodb_value =~ /-{70,}/)
198 {
199 # if value contains 70 or more hyphens in a row we need to escape them
200 # to prevent txt2db from treating them as a separator
201 $infodb_value =~ s/-/&\#045;/gi;
202 }
203 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
204 }
205 }
206 print $infodb_handle '-' x 70, "\n";
207}
208
209
[15722]210
211# ----------------------------------------------------------------------------------------
[15746]212# SQLITE IMPLEMENTATION
[15722]213# ----------------------------------------------------------------------------------------
214
[16176]215sub close_infodb_write_handle_sqlite
216{
217 my $infodb_handle = shift(@_);
218
[16223]219 # Close the transaction we began after opening the file
[16178]220 print $infodb_handle "END TRANSACTION;\n";
221
[16223]222 # This is crucial for efficient queries on the database!
[16224]223 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
[16223]224
[16176]225 close($infodb_handle);
226}
227
228
[15722]229sub get_infodb_file_path_sqlite
230{
231 my $collection_name = shift(@_);
232 my $infodb_directory_path = shift(@_);
233
234 my $infodb_file_extension = ".db";
235 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
236 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
237}
238
239
240sub open_infodb_write_handle_sqlite
241{
242 my $infodb_file_path = shift(@_);
243
244 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
[16177]245 my $infodb_handle = undef;
246 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| $sqlite3_exe \"$infodb_file_path\""))
[15722]247 {
248 return undef;
249 }
250
[16225]251 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
252 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
[15722]253
[16240]254 # This is crucial for efficiency when importing large amounts of data
255 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
256
[16178]257 # This is very important for efficiency, otherwise each command will be actioned one at a time
258 print $infodb_handle "BEGIN TRANSACTION;\n";
259
[16177]260 return $infodb_handle;
[15722]261}
262
263
[15746]264sub read_infodb_file_sqlite
265{
266 my $infodb_file_path = shift(@_);
267 my $infodb_map = shift(@_);
268
269 # !! TO IMPLEMENT
270}
271
272
[15722]273sub write_infodb_entry_sqlite
274{
275 my $infodb_handle = shift(@_);
276 my $infodb_key = shift(@_);
277 my $infodb_map = shift(@_);
278
[15747]279 # Add the key -> value mapping into the "data" table
[15722]280 my $infodb_entry_value = "";
281 foreach my $infodb_value_key (keys(%$infodb_map))
282 {
283 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
284 {
285 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
286 }
287 }
288
[15748]289 my $safe_infodb_key = &sqlite_safe($infodb_key);
[16226]290 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
[15748]291
292 # If this infodb entry is for a document, add all the interesting document metadata to the
293 # "document_metadata" table (for use by the dynamic classifiers)
294 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
295 {
296 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
297
298 foreach my $infodb_value_key (keys(%$infodb_map))
299 {
300 # We're not interested in most of the automatically added document metadata
301 next if ($infodb_value_key eq "archivedir" ||
302 $infodb_value_key eq "assocfilepath" ||
303 $infodb_value_key eq "childtype" ||
304 $infodb_value_key eq "contains" ||
305 $infodb_value_key eq "docnum" ||
306 $infodb_value_key eq "doctype" ||
307 $infodb_value_key eq "Encoding" ||
308 $infodb_value_key eq "FileSize" ||
309 $infodb_value_key eq "hascover" ||
310 $infodb_value_key eq "hastxt" ||
311 $infodb_value_key eq "lastmodified" ||
312 $infodb_value_key eq "metadataset" ||
313 $infodb_value_key eq "thistype" ||
314 $infodb_value_key =~ /^metadatafreq\-/ ||
315 $infodb_value_key =~ /^metadatalist\-/);
316
317 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
318 {
319 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
320 }
321 }
322 }
[15747]323}
324
325
326sub sqlite_safe
327{
328 my $value = shift(@_);
329
[15722]330 # Escape any single quotes in the value
[15747]331 $value =~ s/\'/\'\'/g;
[15722]332
[15747]333 return $value;
[15722]334}
335
336
[15699]3371;
Note: See TracBrowser for help on using the repository browser.