source: main/trunk/greenstone2/perllib/dbutil/sqlite.pm@ 24190

Last change on this file since 24190 was 24190, checked in by max, 13 years ago

Improve set_infodb_entry to now use optimisations such as transactions, this is very useful when we only update one metadata entry, it is now much faster.
Add a new set of optional optimisations when writing to the database in order to get even faster I/O by using PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL).

File size: 10.7 KB
Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010 DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39
40# -----------------------------------------------------------------------------
41# SQLITE IMPLEMENTATION
42# -----------------------------------------------------------------------------
43
44sub open_infodb_write_handle
45{
46 my $infodb_file_path = shift(@_);
47 my $opt_append = shift(@_);
48
49 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
50 my $infodb_handle = undef;
51
52 if (!-e "$sqlite3_exe")
53 {
54 print STDERR "Error: Unable to find $sqlite3_exe\n";
55 return undef;
56 }
57
58 # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
59 # word "memory". While this is not a problem usually, in our case, this ends up going out
60 # to the web page first, as part of the web page's headers, thus ruining the web page
61 # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
62 # the nul device instead.
63 my $nul_device="";
64 if(defined $db_fast && $db_fast == 1) {
65 if($ENV{'GSDLOS'} =~ m/windows/) {
66 $nul_device=">NUL";
67 } else {
68 $nul_device=">/dev/null"; # linux, mac
69 }
70 }
71
72 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
73 {
74 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
75 print STDERR " $!\n";
76 return undef;
77 }
78
79 binmode($infodb_handle,":utf8");
80
81 # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
82 # They should be set before the transaction begins
83 if (defined $db_fast && $db_fast == 1) {
84 print $infodb_handle "PRAGMA synchronous=OFF;\n";
85 print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
86 }
87
88 # This is very important for efficiency, otherwise each command will be actioned one at a time
89 print $infodb_handle "BEGIN TRANSACTION;\n";
90
91
92
93 if (!(defined $opt_append) || ($opt_append ne "append")) {
94 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
95 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
96 }
97
98 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
99 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
100
101 # This is crucial for efficiency when importing large amounts of data
102 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
103
104 return $infodb_handle;
105}
106
107
108sub close_infodb_write_handle
109{
110 my $infodb_handle = shift(@_);
111
112 # This is crucial for efficient queries on the database!
113 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
114
115 # Close the transaction we began after opening the file
116 print $infodb_handle "END TRANSACTION;\n";
117
118 close($infodb_handle);
119}
120
121
122sub read_infodb_cmd
123{
124 my $infodb_file_path = shift(@_);
125 my $sqlcmd = shift(@_);
126
127 my $result = "";
128
129 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
130 my $infodb_handle = undef;
131 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
132
133 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
134 {
135 print STDERR "Unable to execute: $cmd\n";
136 print STDERR "$!\n";
137 }
138 else {
139
140 binmode($infodb_handle, ":utf8");
141 my $line;
142 while (defined($line=<$infodb_handle>)) {
143 $result .= $line;
144 }
145
146 close($infodb_handle);
147 }
148
149 return $result;
150}
151
152sub get_infodb_file_path
153{
154 my $collection_name = shift(@_);
155 my $infodb_directory_path = shift(@_);
156
157 my $infodb_file_extension = ".db";
158 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
159 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
160}
161
162
163sub read_infodb_file
164{
165 my $infodb_file_path = shift(@_);
166 my $infodb_map = shift(@_);
167
168
169 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
170
171 my @keys = split(/\n/,$keys_str);
172
173 foreach my $k (@keys) {
174
175 my $k_safe = &sqlite_safe($k);
176 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
177
178 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
179
180 $infodb_map->{$k} = $val_str;
181 }
182
183}
184
185
186sub read_infodb_keys
187{
188 my $infodb_file_path = shift(@_);
189 my $infodb_map = shift(@_);
190
191
192 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
193
194 my @keys = split(/\n/,$keys_str);
195
196 foreach my $key (@keys)
197 {
198 $infodb_map->{$key} = 1;
199 }
200}
201
202sub read_infodb_rawentry
203{
204 my $infodb_file_path = shift(@_);
205 my $infodb_key = shift(@_);
206
207
208 my $key_safe = &sqlite_safe($infodb_key);
209 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
210
211 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
212
213 return $val_str
214}
215
216sub read_infodb_entry
217{
218 my $infodb_file_path = shift(@_);
219 my $infodb_key = shift(@_);
220
221 my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
222
223 my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
224
225 return $rec_hash;
226}
227
228
229sub write_infodb_entry
230{
231 my $infodb_handle = shift(@_);
232 my $infodb_key = shift(@_);
233 my $infodb_map = shift(@_);
234
235
236 # Add the key -> value mapping into the "data" table
237 my $infodb_entry_value = "";
238 foreach my $infodb_value_key (keys(%$infodb_map))
239 {
240 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
241 {
242 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
243 }
244 }
245
246 my $safe_infodb_key = &sqlite_safe($infodb_key);
247 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
248
249 # If this infodb entry is for a document, add all the interesting document metadata to the
250 # "document_metadata" table (for use by the dynamic classifiers)
251 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
252 {
253
254 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
255
256 foreach my $infodb_value_key (keys(%$infodb_map))
257 {
258 # We're not interested in most of the automatically added document metadata
259 next if ($infodb_value_key eq "archivedir" ||
260 $infodb_value_key eq "assocfilepath" ||
261 $infodb_value_key eq "childtype" ||
262 $infodb_value_key eq "contains" ||
263 $infodb_value_key eq "docnum" ||
264 $infodb_value_key eq "doctype" ||
265 $infodb_value_key eq "Encoding" ||
266 $infodb_value_key eq "FileSize" ||
267 $infodb_value_key eq "hascover" ||
268 $infodb_value_key eq "hastxt" ||
269 $infodb_value_key eq "lastmodified" ||
270 $infodb_value_key eq "metadataset" ||
271 $infodb_value_key eq "thistype" ||
272 $infodb_value_key =~ /^metadatafreq\-/ ||
273 $infodb_value_key =~ /^metadatalist\-/);
274
275 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
276 {
277 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
278 }
279 }
280 }
281
282 #### DEBUGGING
283 #my $new_file = "D:\\sql.txt";
284 #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
285 #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
286 #close(FOUT);
287 #print STDERR $insertStatementsBuffer;
288 #### END DEBUGGING
289}
290
291sub write_infodb_rawentry
292{
293 my $infodb_handle = shift(@_);
294 my $infodb_key = shift(@_);
295 my $infodb_val = shift(@_);
296
297 my $safe_infodb_key = &sqlite_safe($infodb_key);
298 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
299}
300
301
302sub set_infodb_entry
303{
304 my $infodb_file_path = shift(@_);
305 my $infodb_key = shift(@_);
306 my $infodb_map = shift(@_);
307 my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
308
309 if (!defined $infodb_handle) {
310 print STDERR "Error: Failed to open infodb write handle\n";
311 return -1;
312 }
313 else {
314 write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
315 close_infodb_write_handle($infodb_handle);
316 }
317 # Not currently checking for errors on write to DB
318 return 0;
319
320}
321
322
323
324sub delete_infodb_entry
325{
326 my $infodb_handle = shift(@_);
327 my $infodb_key = shift(@_);
328
329 # Delete the key from the "data" table
330
331 my $safe_infodb_key = &sqlite_safe($infodb_key);
332
333 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
334
335 # If this infodb entry is for a document, delete the
336 # "document_metadata" table entry also (for use by the dynamic classifiers)
337 if ($infodb_key !~ /\./)
338 {
339 # Possible for there not to be a docOID matching this infodb_key
340 # (entries are only made when <doctype> == doc
341 # Attempt to delete it, and don't complain if one isn't found
342
343 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
344
345 }
346}
347
348
349sub sqlite_safe
350{
351 my $value = shift(@_);
352
353 # Escape any single quotes in the value
354 $value =~ s/\'/\'\'/g;
355
356 return $value;
357}
358
359
3601;
Note: See TracBrowser for help on using the repository browser.