source: gs2-extensions/parallel-building/trunk/src/perllib/dbutil/sqlite.pm@ 24673

Last change on this file since 24673 was 24673, checked in by jmt12, 13 years ago

Added switch and code for enabling WAL when accessing SQLite, but it didn't seem to have the desired affect

File size: 11.0 KB
Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010 DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39# Set to 1 to enable Write Ahead Logging - which is supposed to allow multiple
40# readers/writers on a SQLite database (overridden by db_fast).
41my $db_wal = 0;
42
43
44# -----------------------------------------------------------------------------
45# SQLITE IMPLEMENTATION
46# -----------------------------------------------------------------------------
47
48sub open_infodb_write_handle
49{
50 my $infodb_file_path = shift(@_);
51 my $opt_append = shift(@_);
52
53 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
54 my $infodb_handle = undef;
55
56 if (!-e "$sqlite3_exe")
57 {
58 print STDERR "Error: Unable to find $sqlite3_exe\n";
59 return undef;
60 }
61
62 # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
63 # word "memory". While this is not a problem usually, in our case, this ends up going out
64 # to the web page first, as part of the web page's headers, thus ruining the web page
65 # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
66 # the nul device instead.
67 my $nul_device="";
68 if($db_fast || $db_wal) {
69 if($ENV{'GSDLOS'} =~ m/windows/) {
70 $nul_device=">NUL";
71 } else {
72 $nul_device=">/dev/null"; # linux, mac
73 }
74 }
75
76 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
77 {
78 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
79 print STDERR " $!\n";
80 return undef;
81 }
82
83 binmode($infodb_handle,":utf8");
84
85 # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
86 # They should be set before the transaction begins
87 if (defined $db_fast && $db_fast == 1) {
88 print $infodb_handle "PRAGMA synchronous=OFF;\n";
89 print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
90 }
91 # Allow parallel readers/writers by using a Write Ahead Logger
92 elsif ($db_wal)
93 {
94 print $infodb_handle "PRAGMA journal_mode=WAL;\n";
95 }
96
97 # This is very important for efficiency, otherwise each command will be actioned one at a time
98 print $infodb_handle "BEGIN TRANSACTION;\n";
99
100
101
102 if (!(defined $opt_append) || ($opt_append ne "append")) {
103 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
104 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
105 }
106
107 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
108 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
109
110 # This is crucial for efficiency when importing large amounts of data
111 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
112
113 return $infodb_handle;
114}
115
116
117sub close_infodb_write_handle
118{
119 my $infodb_handle = shift(@_);
120
121 # This is crucial for efficient queries on the database!
122 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
123
124 # Close the transaction we began after opening the file
125 print $infodb_handle "END TRANSACTION;\n";
126
127 close($infodb_handle);
128}
129
130
131sub read_infodb_cmd
132{
133 my $infodb_file_path = shift(@_);
134 my $sqlcmd = shift(@_);
135
136 my $result = "";
137
138 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
139 my $infodb_handle = undef;
140 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
141
142 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
143 {
144 print STDERR "Unable to execute: $cmd\n";
145 print STDERR "$!\n";
146 }
147 else {
148
149 binmode($infodb_handle, ":utf8");
150 my $line;
151 while (defined($line=<$infodb_handle>)) {
152 $result .= $line;
153 }
154
155 close($infodb_handle);
156 }
157
158 return $result;
159}
160
161sub get_infodb_file_path
162{
163 my $collection_name = shift(@_);
164 my $infodb_directory_path = shift(@_);
165
166 my $infodb_file_extension = ".db";
167 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
168 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
169}
170
171
172sub read_infodb_file
173{
174 my $infodb_file_path = shift(@_);
175 my $infodb_map = shift(@_);
176
177
178 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
179
180 my @keys = split(/\n/,$keys_str);
181
182 foreach my $k (@keys) {
183
184 my $k_safe = &sqlite_safe($k);
185 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
186
187 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
188
189 $infodb_map->{$k} = $val_str;
190 }
191
192}
193
194
195sub read_infodb_keys
196{
197 my $infodb_file_path = shift(@_);
198 my $infodb_map = shift(@_);
199
200
201 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
202
203 my @keys = split(/\n/,$keys_str);
204
205 foreach my $key (@keys)
206 {
207 $infodb_map->{$key} = 1;
208 }
209}
210
211sub read_infodb_rawentry
212{
213 my $infodb_file_path = shift(@_);
214 my $infodb_key = shift(@_);
215
216
217 my $key_safe = &sqlite_safe($infodb_key);
218 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
219
220 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
221
222 return $val_str
223}
224
225sub read_infodb_entry
226{
227 my $infodb_file_path = shift(@_);
228 my $infodb_key = shift(@_);
229
230 my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
231
232 my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
233
234 return $rec_hash;
235}
236
237
238sub write_infodb_entry
239{
240 my $infodb_handle = shift(@_);
241 my $infodb_key = shift(@_);
242 my $infodb_map = shift(@_);
243
244
245 # Add the key -> value mapping into the "data" table
246 my $infodb_entry_value = "";
247 foreach my $infodb_value_key (keys(%$infodb_map))
248 {
249 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
250 {
251 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
252 }
253 }
254
255 my $safe_infodb_key = &sqlite_safe($infodb_key);
256 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
257
258 # If this infodb entry is for a document, add all the interesting document metadata to the
259 # "document_metadata" table (for use by the dynamic classifiers)
260 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
261 {
262
263 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
264
265 foreach my $infodb_value_key (keys(%$infodb_map))
266 {
267 # We're not interested in most of the automatically added document metadata
268 next if ($infodb_value_key eq "archivedir" ||
269 $infodb_value_key eq "assocfilepath" ||
270 $infodb_value_key eq "childtype" ||
271 $infodb_value_key eq "contains" ||
272 $infodb_value_key eq "docnum" ||
273 $infodb_value_key eq "doctype" ||
274 $infodb_value_key eq "Encoding" ||
275 $infodb_value_key eq "FileSize" ||
276 $infodb_value_key eq "hascover" ||
277 $infodb_value_key eq "hastxt" ||
278 $infodb_value_key eq "lastmodified" ||
279 $infodb_value_key eq "metadataset" ||
280 $infodb_value_key eq "thistype" ||
281 $infodb_value_key =~ /^metadatafreq\-/ ||
282 $infodb_value_key =~ /^metadatalist\-/);
283
284 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
285 {
286 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
287 }
288 }
289 }
290
291 #### DEBUGGING
292 #my $new_file = "D:\\sql.txt";
293 #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
294 #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
295 #close(FOUT);
296 #print STDERR $insertStatementsBuffer;
297 #### END DEBUGGING
298}
299
300sub write_infodb_rawentry
301{
302 my $infodb_handle = shift(@_);
303 my $infodb_key = shift(@_);
304 my $infodb_val = shift(@_);
305
306 my $safe_infodb_key = &sqlite_safe($infodb_key);
307 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
308}
309
310
311sub set_infodb_entry
312{
313 my $infodb_file_path = shift(@_);
314 my $infodb_key = shift(@_);
315 my $infodb_map = shift(@_);
316 my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
317
318 if (!defined $infodb_handle) {
319 print STDERR "Error: Failed to open infodb write handle\n";
320 return -1;
321 }
322 else {
323 write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
324 close_infodb_write_handle($infodb_handle);
325 }
326 # Not currently checking for errors on write to DB
327 return 0;
328
329}
330
331
332
333sub delete_infodb_entry
334{
335 my $infodb_handle = shift(@_);
336 my $infodb_key = shift(@_);
337
338 # Delete the key from the "data" table
339
340 my $safe_infodb_key = &sqlite_safe($infodb_key);
341
342 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
343
344 # If this infodb entry is for a document, delete the
345 # "document_metadata" table entry also (for use by the dynamic classifiers)
346 if ($infodb_key !~ /\./)
347 {
348 # Possible for there not to be a docOID matching this infodb_key
349 # (entries are only made when <doctype> == doc
350 # Attempt to delete it, and don't complain if one isn't found
351
352 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
353
354 }
355}
356
357
358sub sqlite_safe
359{
360 my $value = shift(@_);
361
362 # Escape any single quotes in the value
363 $value =~ s/\'/\'\'/g;
364
365 return $value;
366}
367
368
3691;
Note: See TracBrowser for help on using the repository browser.