source: main/trunk/greenstone2/perllib/dbutil/sqlite.pm@ 27298

Last change on this file since 27298 was 27298, checked in by jmt12, 11 years ago

Add hidden support for the new experimental multiple reader/writer functionality available in SQLite

File size: 11.3 KB
Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010 DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39# Set to 1 to enable Write Ahead Logging - which is supposed to allow multiple
40# readers/writers on a SQLite database (incompatible with db_fast). From SQLite
41# 3.7 onwards, WAL offers limited parallel reader/writer support but is limited
42# to single computers (doesn't work over networked filesystems). For details
43# see: http://www.sqlite.org/draft/wal.html [jmt12]
44my $db_wal = 0;
45
46# -----------------------------------------------------------------------------
47# SQLITE IMPLEMENTATION
48# -----------------------------------------------------------------------------
49
50sub open_infodb_write_handle
51{
52 my $infodb_file_path = shift(@_);
53 my $opt_append = shift(@_);
54
55 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
56 my $infodb_handle = undef;
57
58 if (!-e "$sqlite3_exe")
59 {
60 print STDERR "Error: Unable to find $sqlite3_exe\n";
61 return undef;
62 }
63
64 # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
65 # word "memory". While this is not a problem usually, in our case, this ends up going out
66 # to the web page first, as part of the web page's headers, thus ruining the web page
67 # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
68 # the nul device instead.
69 # using WAL mode (which also changes the journal) suffers a similar issue [jmt12]
70 my $nul_device="";
71 if($db_fast == 1 || $db_wal == 1) {
72 if($ENV{'GSDLOS'} =~ m/windows/) {
73 $nul_device=">NUL";
74 } else {
75 $nul_device=">/dev/null"; # linux, mac
76 }
77 }
78
79 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
80 {
81 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
82 print STDERR " $!\n";
83 return undef;
84 }
85
86 binmode($infodb_handle,":utf8");
87
88 # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
89 # They should be set before the transaction begins
90 if (defined $db_fast && $db_fast == 1) {
91 print $infodb_handle "PRAGMA synchronous=OFF;\n";
92 print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
93 }
94 # Allow parallel readers/writers by using a Write Ahead Logger
95 elsif ($db_wal)
96 {
97 print $infodb_handle "PRAGMA journal_mode=WAL;\n";
98 }
99
100 # This is very important for efficiency, otherwise each command will be actioned one at a time
101 print $infodb_handle "BEGIN TRANSACTION;\n";
102
103
104
105 if (!(defined $opt_append) || ($opt_append ne "append")) {
106 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
107 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
108 }
109
110 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
111 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
112
113 # This is crucial for efficiency when importing large amounts of data
114 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
115
116 return $infodb_handle;
117}
118
119
120sub close_infodb_write_handle
121{
122 my $infodb_handle = shift(@_);
123
124 # This is crucial for efficient queries on the database!
125 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
126
127 # Close the transaction we began after opening the file
128 print $infodb_handle "END TRANSACTION;\n";
129
130 close($infodb_handle);
131}
132
133
134sub read_infodb_cmd
135{
136 my $infodb_file_path = shift(@_);
137 my $sqlcmd = shift(@_);
138
139 my $result = "";
140
141 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
142 my $infodb_handle = undef;
143 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
144
145 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
146 {
147 print STDERR "Unable to execute: $cmd\n";
148 print STDERR "$!\n";
149 }
150 else {
151
152 binmode($infodb_handle, ":utf8");
153 my $line;
154 while (defined($line=<$infodb_handle>)) {
155 $result .= $line;
156 }
157
158 close($infodb_handle);
159 }
160
161 return $result;
162}
163
164sub get_infodb_file_path
165{
166 my $collection_name = shift(@_);
167 my $infodb_directory_path = shift(@_);
168
169 my $infodb_file_extension = ".db";
170 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
171 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
172}
173
174
175sub read_infodb_file
176{
177 my $infodb_file_path = shift(@_);
178 my $infodb_map = shift(@_);
179
180
181 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
182
183 my @keys = split(/\n/,$keys_str);
184
185 foreach my $k (@keys) {
186
187 my $k_safe = &sqlite_safe($k);
188 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
189
190 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
191
192 $infodb_map->{$k} = $val_str;
193 }
194
195}
196
197
198sub read_infodb_keys
199{
200 my $infodb_file_path = shift(@_);
201 my $infodb_map = shift(@_);
202
203
204 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
205
206 my @keys = split(/\n/,$keys_str);
207
208 foreach my $key (@keys)
209 {
210 $infodb_map->{$key} = 1;
211 }
212}
213
214sub read_infodb_rawentry
215{
216 my $infodb_file_path = shift(@_);
217 my $infodb_key = shift(@_);
218
219
220 my $key_safe = &sqlite_safe($infodb_key);
221 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
222
223 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
224
225 return $val_str
226}
227
228sub read_infodb_entry
229{
230 my $infodb_file_path = shift(@_);
231 my $infodb_key = shift(@_);
232
233 my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
234
235 my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
236
237 return $rec_hash;
238}
239
240
241sub write_infodb_entry
242{
243 my $infodb_handle = shift(@_);
244 my $infodb_key = shift(@_);
245 my $infodb_map = shift(@_);
246
247
248 # Add the key -> value mapping into the "data" table
249 my $infodb_entry_value = "";
250 foreach my $infodb_value_key (keys(%$infodb_map))
251 {
252 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
253 {
254 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
255 }
256 }
257
258 my $safe_infodb_key = &sqlite_safe($infodb_key);
259 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
260
261 # If this infodb entry is for a document, add all the interesting document metadata to the
262 # "document_metadata" table (for use by the dynamic classifiers)
263 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
264 {
265
266 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
267
268 foreach my $infodb_value_key (keys(%$infodb_map))
269 {
270 # We're not interested in most of the automatically added document metadata
271 next if ($infodb_value_key eq "archivedir" ||
272 $infodb_value_key eq "assocfilepath" ||
273 $infodb_value_key eq "childtype" ||
274 $infodb_value_key eq "contains" ||
275 $infodb_value_key eq "docnum" ||
276 $infodb_value_key eq "doctype" ||
277 $infodb_value_key eq "Encoding" ||
278 $infodb_value_key eq "FileSize" ||
279 $infodb_value_key eq "hascover" ||
280 $infodb_value_key eq "hastxt" ||
281 $infodb_value_key eq "lastmodified" ||
282 $infodb_value_key eq "metadataset" ||
283 $infodb_value_key eq "thistype" ||
284 $infodb_value_key =~ /^metadatafreq\-/ ||
285 $infodb_value_key =~ /^metadatalist\-/);
286
287 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
288 {
289 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
290 }
291 }
292 }
293
294 #### DEBUGGING
295 #my $new_file = "D:\\sql.txt";
296 #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
297 #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
298 #close(FOUT);
299 #print STDERR $insertStatementsBuffer;
300 #### END DEBUGGING
301}
302
303sub write_infodb_rawentry
304{
305 my $infodb_handle = shift(@_);
306 my $infodb_key = shift(@_);
307 my $infodb_val = shift(@_);
308
309 my $safe_infodb_key = &sqlite_safe($infodb_key);
310 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
311}
312
313
314sub set_infodb_entry
315{
316 my $infodb_file_path = shift(@_);
317 my $infodb_key = shift(@_);
318 my $infodb_map = shift(@_);
319 my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
320
321 if (!defined $infodb_handle) {
322 print STDERR "Error: Failed to open infodb write handle\n";
323 return -1;
324 }
325 else {
326 write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
327 close_infodb_write_handle($infodb_handle);
328 }
329 # Not currently checking for errors on write to DB
330 return 0;
331
332}
333
334
335
336sub delete_infodb_entry
337{
338 my $infodb_handle = shift(@_);
339 my $infodb_key = shift(@_);
340
341 # Delete the key from the "data" table
342
343 my $safe_infodb_key = &sqlite_safe($infodb_key);
344
345 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
346
347 # If this infodb entry is for a document, delete the
348 # "document_metadata" table entry also (for use by the dynamic classifiers)
349 if ($infodb_key !~ /\./)
350 {
351 # Possible for there not to be a docOID matching this infodb_key
352 # (entries are only made when <doctype> == doc
353 # Attempt to delete it, and don't complain if one isn't found
354
355 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
356
357 }
358}
359
360
361sub sqlite_safe
362{
363 my $value = shift(@_);
364
365 # Escape any single quotes in the value
366 $value =~ s/\'/\'\'/g;
367
368 return $value;
369}
370
371
3721;
Note: See TracBrowser for help on using the repository browser.