source: gsdl/trunk/perllib/dbutil.pm@ 16178

Last change on this file since 16178 was 16178, checked in by mdewsnip, 16 years ago

Greatly improved SQLite database writing speed by adding "BEGIN TRANSACTION" and "END TRANSACTION" around all the write commands.

File size: 9.1 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
31sub close_infodb_write_handle
32{
33 my $infodb_type = shift(@_);
34 my $infodb_handle = shift(@_);
35
36 if ($infodb_type eq "sqlite")
37 {
38 return &close_infodb_write_handle_sqlite($infodb_handle);
39 }
40
41 # Use GDBM if the infodb type is empty or not one of the values above
42 return &close_infodb_write_handle_gdbm($infodb_handle);
43}
44
45
46sub get_default_infodb_type
47{
48 return "gdbm";
49}
50
51
52sub get_infodb_file_path
53{
54 my $infodb_type = shift(@_);
55 my $collection_name = shift(@_);
56 my $infodb_directory_path = shift(@_);
57
58 if ($infodb_type eq "sqlite")
59 {
60 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
61 }
62
63 # Use GDBM if the infodb type is empty or not one of the values above
64 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
65}
66
67
68sub open_infodb_write_handle
69{
70 my $infodb_type = shift(@_);
71 my $infodb_file_path = shift(@_);
72
73 if ($infodb_type eq "sqlite")
74 {
75 return &open_infodb_write_handle_sqlite($infodb_file_path);
76 }
77
78 # Use GDBM if the infodb type is empty or not one of the values above
79 return &open_infodb_write_handle_gdbm($infodb_file_path);
80}
81
82
83sub read_infodb_file
84{
85 my $infodb_type = shift(@_);
86 my $infodb_file_path = shift(@_);
87 my $infodb_map = shift(@_);
88
89 if ($infodb_type eq "sqlite")
90 {
91 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
92 }
93
94 # Use GDBM if the infodb type is empty or not one of the values above
95 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
96}
97
98
99sub write_infodb_entry
100{
101 my $infodb_type = shift(@_);
102 my $infodb_handle = shift(@_);
103 my $infodb_key = shift(@_);
104 my $infodb_map = shift(@_);
105
106 if ($infodb_type eq "sqlite")
107 {
108 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
109 }
110
111 # Use GDBM if the infodb type is empty or not one of the values above
112 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
113}
114
115
116
117# ----------------------------------------------------------------------------------------
118# GDBM IMPLEMENTATION
119# ----------------------------------------------------------------------------------------
120
121sub close_infodb_write_handle_gdbm
122{
123 my $infodb_handle = shift(@_);
124
125 close($infodb_handle);
126}
127
128
129sub get_infodb_file_path_gdbm
130{
131 my $collection_name = shift(@_);
132 my $infodb_directory_path = shift(@_);
133
134 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
135 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
136 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
137}
138
139
140sub open_infodb_write_handle_gdbm
141{
142 my $infodb_file_path = shift(@_);
143
144 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
145 my $infodb_file_handle = undef;
146 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| $txt2db_exe \"$infodb_file_path\""))
147 {
148 return undef;
149 }
150
151 return $infodb_file_handle;
152}
153
154
155sub read_infodb_file_gdbm
156{
157 my $infodb_file_path = shift(@_);
158 my $infodb_map = shift(@_);
159
160 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
161 my $infodb_line = "";
162 my $infodb_key = "";
163 my $infodb_value = "";
164 while (defined ($infodb_line = <PIPEIN>))
165 {
166 if ($infodb_line =~ /^\[([^\]]+)\]$/)
167 {
168 $infodb_key = $1;
169 }
170 elsif ($infodb_line =~ /^-{70}$/)
171 {
172 $infodb_map->{$infodb_key} = $infodb_value;
173 $infodb_key = "";
174 $infodb_value = "";
175 }
176 else
177 {
178 $infodb_value .= $infodb_line;
179 }
180 }
181
182 close (PIPEIN);
183}
184
185
186sub write_infodb_entry_gdbm
187{
188 my $infodb_handle = shift(@_);
189 my $infodb_key = shift(@_);
190 my $infodb_map = shift(@_);
191
192 print $infodb_handle "[$infodb_key]\n";
193 foreach my $infodb_value_key (keys(%$infodb_map))
194 {
195 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
196 {
197 if ($infodb_value =~ /-{70,}/)
198 {
199 # if value contains 70 or more hyphens in a row we need to escape them
200 # to prevent txt2db from treating them as a separator
201 $infodb_value =~ s/-/&\#045;/gi;
202 }
203 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
204 }
205 }
206 print $infodb_handle '-' x 70, "\n";
207}
208
209
210
211# ----------------------------------------------------------------------------------------
212# SQLITE IMPLEMENTATION
213# ----------------------------------------------------------------------------------------
214
215sub close_infodb_write_handle_sqlite
216{
217 my $infodb_handle = shift(@_);
218
219 print $infodb_handle "END TRANSACTION;\n";
220
221 close($infodb_handle);
222}
223
224
225sub get_infodb_file_path_sqlite
226{
227 my $collection_name = shift(@_);
228 my $infodb_directory_path = shift(@_);
229
230 my $infodb_file_extension = ".db";
231 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
232 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
233}
234
235
236sub open_infodb_write_handle_sqlite
237{
238 my $infodb_file_path = shift(@_);
239
240 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
241 my $infodb_handle = undef;
242 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| $sqlite3_exe \"$infodb_file_path\""))
243 {
244 return undef;
245 }
246
247 print $infodb_handle "CREATE TABLE data (key TEXT, value TEXT, PRIMARY KEY(key));\n";
248 print $infodb_handle "CREATE TABLE document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
249
250 # This is very important for efficiency, otherwise each command will be actioned one at a time
251 print $infodb_handle "BEGIN TRANSACTION;\n";
252
253 return $infodb_handle;
254}
255
256
257sub read_infodb_file_sqlite
258{
259 my $infodb_file_path = shift(@_);
260 my $infodb_map = shift(@_);
261
262 # !! TO IMPLEMENT
263}
264
265
266sub write_infodb_entry_sqlite
267{
268 my $infodb_handle = shift(@_);
269 my $infodb_key = shift(@_);
270 my $infodb_map = shift(@_);
271
272 # Add the key -> value mapping into the "data" table
273 my $infodb_entry_value = "";
274 foreach my $infodb_value_key (keys(%$infodb_map))
275 {
276 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
277 {
278 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
279 }
280 }
281
282 my $safe_infodb_key = &sqlite_safe($infodb_key);
283 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
284 print $infodb_handle "INSERT INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
285
286 # If this infodb entry is for a document, add all the interesting document metadata to the
287 # "document_metadata" table (for use by the dynamic classifiers)
288 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
289 {
290 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
291
292 foreach my $infodb_value_key (keys(%$infodb_map))
293 {
294 # We're not interested in most of the automatically added document metadata
295 next if ($infodb_value_key eq "archivedir" ||
296 $infodb_value_key eq "assocfilepath" ||
297 $infodb_value_key eq "childtype" ||
298 $infodb_value_key eq "contains" ||
299 $infodb_value_key eq "docnum" ||
300 $infodb_value_key eq "doctype" ||
301 $infodb_value_key eq "Encoding" ||
302 $infodb_value_key eq "FileSize" ||
303 $infodb_value_key eq "hascover" ||
304 $infodb_value_key eq "hastxt" ||
305 $infodb_value_key eq "lastmodified" ||
306 $infodb_value_key eq "metadataset" ||
307 $infodb_value_key eq "thistype" ||
308 $infodb_value_key =~ /^metadatafreq\-/ ||
309 $infodb_value_key =~ /^metadatalist\-/);
310
311 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
312 {
313 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
314 }
315 }
316 }
317}
318
319
320sub sqlite_safe
321{
322 my $value = shift(@_);
323
324 # Escape any single quotes in the value
325 $value =~ s/\'/\'\'/g;
326
327 return $value;
328}
329
330
3311;
Note: See TracBrowser for help on using the repository browser.