source: main/trunk/greenstone2/perllib/DBDrivers/SQLITE.pm@ 30355

Last change on this file since 30355 was 30355, checked in by jmt12, 8 years ago

Initial checkin of OO drivers for new dbutils system

File size: 14.8 KB
Line 
1###############################################################################
2#
3# DBDrivers/SQLITE.pm -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software from the New Zealand
6# Digital Library Project at the University of Waikato, New Zealand.
7#
8# Copyright (C) 2009-2010 DL Consulting Ltd.
9# Copyright (C) 2009-2015 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19# more details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package DBDrivers::SQLITE;
28
29# Pragma
30use strict;
31
32# Libraries
33use util;
34use parent 'DBDrivers::BaseDBDriver';
35
36sub BEGIN
37{
38 if (!defined $ENV{'GSDLHOME'} || !defined $ENV{'GSDLOS'}) {
39 die("Error! Environment must be prepared by sourcing setup.bash\n");
40 }
41 @DBDrivers::SQLITE::ISA = ( 'DBDrivers::BaseDBDriver' );
42}
43
44sub new
45{
46 my $class = shift(@_);
47 my $self = DBDrivers::BaseDBDriver->new(@_);
48 $self->{'default_file_extension'} = 'db';
49 # Please set db_fast to 1 if you wish to enable faster I/O to the database
50 # by using optimisations such as PRAGMA journal_mode (MEMORY instead of
51 # DELETE) and synchronous (OFF instead of FULL)
52 # Please be aware that in this case it will be less secure and the database
53 # file may become corrupted if the if the operating system crashes or the
54 # computer loses power before that data has been written to the disk. But
55 # the speed gain is about 50x.
56 $self->{'db_fast'} = 0;
57 # Set to 1 to enable Write Ahead Logging - which is supposed to allow
58 # multiple readers/writers on a SQLite database (incompatible with
59 # db_fast). From SQLite 3.7 onwards, WAL offers limited parallel reader/
60 # writer support but is limited to single computers (doesn't work over
61 # networked filesystems). For details see:
62 # http://www.sqlite.org/draft/wal.html
63 $self->{'db_wal'} = 0;
64 # Sanity checking
65 if ($self->{'db_fast'} && $self->{'db_wal'}) {
66 print "Warning! Write-Ahead Logging is incompatible with certain SQLite optimizations; db_fast flag will be ignored.\n";
67 }
68 # The SQLite path we are using
69 $self->{'sqlite_executable'} = '';
70
71 # Support
72 $self->{'supports_set'} = 1;
73
74 bless($self, $class);
75 return $self;
76}
77
78# -----------------------------------------------------------------------------
79# SQLITE IMPLEMENTATION
80# -----------------------------------------------------------------------------
81
82## Protected
83
84## @function _getExecutable
85#
86sub _getExecutable
87{
88 my $self = shift(@_);
89 my $sqlite3_exe = $self->{'sqlite_executable'};
90 if ($sqlite3_exe eq '') {
91 # Try a full path first - SQLite3 may live in the Greenstone bin
92 # directory
93 $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'}, 'bin', $ENV{'GSDLOS'}, 'sqlite3' . &util::get_os_exe());
94 if (!-e "$sqlite3_exe") {
95 print "Warning! Didn't find SQLite3 on expected path, \"$sqlite3_exe\", let's hope its on the system path.\n";
96 $sqlite3_exe = 'sqlite3' . &util::get_os_exe();
97 }
98 $self->{'sqlite_executable'} = $sqlite3_exe;
99 }
100 return $sqlite3_exe;
101}
102## _getExecutable() => string ##
103
104
105## @function _readInfoDBCmd(string, string) => string
106#
107sub _readInfoDBCmd
108{
109 my $self = shift(@_);
110 my $infodb_file_path = shift(@_);
111 my $sqlcmd = shift(@_);
112 my $result = "";
113 $self->debugPrint('("' . $infodb_file_path . '", "' . $sqlcmd . '")');
114 my $sqlite3_exe = $self->_getExecutable();
115 my $infodb_handle = undef;
116 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
117 if (!open($infodb_handle, "$cmd |")) {
118 print STDERR "Unable to execute: $cmd\n";
119 print STDERR "$!\n";
120 }
121 else {
122 binmode($infodb_handle, ":utf8");
123 my $line;
124 while (defined($line=<$infodb_handle>)) {
125 $result .= $line;
126 }
127 close($infodb_handle);
128 }
129 return $result;
130}
131## _readInfoDBCmd(string, string) => string ##
132
133
134## @function _sqliteSafe(string) => string
135#
136sub _sqliteSafe
137{
138 my $self = shift(@_);
139 my $value = shift(@_);
140 # Escape any single quotes in the value
141 $value =~ s/\'/\'\'/g;
142 return $value;
143}
144## _sqliteSafe(string) => string ##
145
146
147## Public
148
149
150# Handled by BaseDBDriver
151# sub get_infodb_file_path {}
152
153
154## @function open_infodb_write_handle(string, string) => handle
155#
156sub open_infodb_write_handle
157{
158 my $self = shift(@_);
159 my $infodb_file_path = shift(@_);
160 my $opt_append = shift(@_);
161
162 if (!defined $opt_append) {
163 $opt_append = '';
164 }
165
166 $self->debugPrint('("' . $infodb_file_path . '","' . $opt_append . '")');
167
168 my $sqlite3_exe = $self->_getExecutable();
169 my $infodb_handle = undef;
170
171 # running sqlite3 with the pragma journal_mode=memory, causes sqlite to
172 # print out the word "memory". While this is not a problem usually, in our
173 # case, this ends up going out to the web page first, as part of the web
174 # page's headers, thus ruining the web page which causes an Internal
175 # Server Error (500). Therefore, we redirect sqlite's output to the null
176 # device instead. WAL mode (which also changes the journal) suffers a
177 # similar issue.
178 my $nul_device="";
179 if($self->{'db_fast'} == 1 || $self->{'db_wal'} == 1) {
180 if($ENV{'GSDLOS'} =~ m/windows/) {
181 $nul_device=">NUL";
182 } else {
183 $nul_device=">/dev/null"; # linux, mac
184 }
185 }
186
187 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
188 {
189 print STDERR "Error! Failed to open pipe to \"" . $sqlite3_exe . "\" \"" . $infodb_file_path . "\"\n" . $! . "\n";
190 return undef;
191 }
192
193 binmode($infodb_handle,":utf8");
194
195 # Allow parallel readers/writers by using a Write Ahead Logger
196 if ($self->{'db_wal'}) {
197 print $infodb_handle "PRAGMA journal_mode=WAL;\n";
198 }
199 # Add extra optimisations, less secure but with a massive gain in performan-
200 # ce with large databases which are often updated. They should be set before
201 # the transaction begins. Incompatible with WAL.
202 elsif ($self->{'db_fast'}) {
203 print $infodb_handle "PRAGMA synchronous=OFF;\n";
204 print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
205 }
206 # If we are *not* appending, drop the table for a fresh start
207 if ($opt_append ne "append") {
208 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
209 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
210 }
211 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
212 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
213 # These are crucial for efficiency when importing large amounts of data
214 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
215 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
216 # This is very important for efficiency, otherwise each command will be act-
217 # ioned one at a time
218 print $infodb_handle "BEGIN TRANSACTION;\n";
219 return $infodb_handle;
220}
221## open_infodb_write_handle(string, string) => handle ##
222
223
224## @function close_infodb_write_handle(handle) => void
225#
226sub close_infodb_write_handle
227{
228 my $self = shift(@_);
229 my $infodb_handle = shift(@_);
230 $self->debugPrint('(<handle>)');
231 # Close the transaction we began when opening the file
232 print $infodb_handle "END TRANSACTION;\n";
233 close($infodb_handle);
234}
235## close_infodb_write_handle(handle) => void ##
236
237
238## @function read_infodb_file(string, hashmap) => void
239#
240sub read_infodb_file
241{
242 my $self = shift(@_);
243 my $infodb_file_path = shift(@_);
244 my $infodb_map = shift(@_);
245 $self->debugPrint('("' . $infodb_file_path . '", <hashmap>)');
246 my $keys_str = $self->_readInfoDBCmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
247 my @keys = split(/\n/,$keys_str);
248 foreach my $k (@keys) {
249 my $k_safe = $self->_sqliteSafe($k);
250 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
251 my $val_str = $self->_readInfoDBCmd($infodb_file_path,$select_val_cmd);
252 $infodb_map->{$k} = $val_str;
253 }
254}
255## read_infodb_file(string, hashmap) => void ##
256
257
258## @function read_infodb_keys(string, hashmap) => void
259#
260sub read_infodb_keys
261{
262 my $self = shift(@_);
263 my $infodb_file_path = shift(@_);
264 my $infodb_map = shift(@_);
265 $self->debugPrint('("' . $infodb_file_path . '", <hashmap>)');
266 my $keys_str = $self->_readInfoDBCmd($infodb_file_path,"SELECT key FROM data;");
267 my @keys = split(/\n/,$keys_str);
268 foreach my $key (@keys)
269 {
270 $infodb_map->{$key} = 1;
271 }
272}
273## read_infodb_keys(string, hashmap) => void ##
274
275
276## @function read_infodb_rawentry(string, string) => string
277#
278sub read_infodb_rawentry
279{
280 my $self = shift(@_);
281 my $infodb_file_path = shift(@_);
282 my $infodb_key = shift(@_);
283 $self->debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '")');
284 my $key_safe = $self->_sqliteSafe($infodb_key);
285 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
286 my $val_str = $self->_readInfoDBCmd($infodb_file_path,$select_val_cmd);
287 return $val_str
288}
289## read_infodb_rawentry(string, string) => string ##
290
291
292## @function read_infodb_entry(string, string) => hashmap
293#
294sub read_infodb_entry
295{
296 my $self = shift(@_);
297 my $infodb_file_path = shift(@_);
298 my $infodb_key = shift(@_);
299 $self->debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '")');
300 my $val_str = $self->read_infodb_rawentry($infodb_file_path,$infodb_key);
301 my $rec_hash = $self->convert_infodb_string_to_hash($val_str);
302 return $rec_hash;
303}
304## read_infodb_entry(string, string) => hashmap ##
305
306
307## @function write_infodb_entry(handle, string, hashmap) => void
308#
309sub write_infodb_entry
310{
311 my $self = shift(@_);
312 my $infodb_handle = shift(@_);
313 my $infodb_key = shift(@_);
314 my $infodb_map = shift(@_);
315 $self->debugPrint('(<handle>, "' . $infodb_key . '", <hashmap>)');
316 # Add the key -> value mapping into the "data" table
317 my $infodb_entry_value = "";
318 foreach my $infodb_value_key (keys(%$infodb_map)) {
319 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
320 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
321 }
322 }
323
324 my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
325 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_entry_value) . "');\n";
326
327 # If this infodb entry is for a document, add all the interesting document
328 # metadata to the "document_metadata" table (for use by the dynamic
329 # classifiers)
330 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/) {
331 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
332 foreach my $infodb_value_key (keys(%$infodb_map)) {
333 # We're not interested in most of the automatically added document metadata
334 next if ($infodb_value_key eq "archivedir" ||
335 $infodb_value_key eq "assocfilepath" ||
336 $infodb_value_key eq "childtype" ||
337 $infodb_value_key eq "contains" ||
338 $infodb_value_key eq "docnum" ||
339 $infodb_value_key eq "doctype" ||
340 $infodb_value_key eq "Encoding" ||
341 $infodb_value_key eq "FileSize" ||
342 $infodb_value_key eq "hascover" ||
343 $infodb_value_key eq "hastxt" ||
344 $infodb_value_key eq "lastmodified" ||
345 $infodb_value_key eq "metadataset" ||
346 $infodb_value_key eq "thistype" ||
347 $infodb_value_key =~ /^metadatafreq\-/ ||
348 $infodb_value_key =~ /^metadatalist\-/);
349 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
350 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_value_key) . "', '" . $self->_sqliteSafe($infodb_value) . "');\n";
351 }
352 }
353 }
354
355 #### DEBUGGING
356 #my $new_file = "D:\\sql.txt";
357 #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
358 #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
359 #close(FOUT);
360 #print STDERR $insertStatementsBuffer;
361 #### END DEBUGGING
362}
363## write_infodb_entry(handle, string, hashmap) => void ##
364
365
366## @function write_infodb_rawentry(handle, string, string) => void
367#
368sub write_infodb_rawentry
369{
370 my $self = shift(@_);
371 my $infodb_handle = shift(@_);
372 my $infodb_key = shift(@_);
373 my $infodb_val = shift(@_);
374 $self->debugPrint('(<handle>, "' . $infodb_key . '", "' . $infodb_val . '")');
375 my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
376 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_val) . "');\n";
377}
378## write_infodb_rawentry(handle, string, string) => void ##
379
380
381## @function set_infodb_entry(string, string, hashmap) => integer
382#
383sub set_infodb_entry
384{
385 my $self = shift(@_);
386 my $infodb_file_path = shift(@_);
387 my $infodb_key = shift(@_);
388 my $infodb_map = shift(@_);
389 $self->debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '", <hashmap>)');
390 my $infodb_handle = $self->open_infodb_write_handle($infodb_file_path, "append");
391 if (!defined $infodb_handle) {
392 print STDERR "Error: Failed to open infodb write handle\n";
393 return -1;
394 }
395 else {
396 $self->write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
397 $self->close_infodb_write_handle($infodb_handle);
398 }
399 # Not currently checking for errors on write to DB
400 return 0;
401}
402## set_infodb_entry(string, string, hashmap) => integer ##
403
404
405## @function delete_infodb_entry(handle, string) => void
406#
407sub delete_infodb_entry
408{
409 my $self = shift(@_);
410 my $infodb_handle = shift(@_);
411 my $infodb_key = shift(@_);
412 $self->debugPrint('(<handle>, "' . $infodb_key . '")');
413 # Delete the key from the "data" table
414 my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
415 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
416 # If this infodb entry is for a document, delete the "document_metadata"
417 # table entry also (for use by the dynamic classifiers)
418 if ($infodb_key !~ /\./) {
419 # Possible for there not to be a docOID matching this infodb_key
420 # (entries are only made when <doctype> == doc
421 # Attempt to delete it, and don't complain if one isn't found
422 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
423 }
424}
425## delete_infodb_entry(handle, string) => void ##
426
427
4281;
Note: See TracBrowser for help on using the repository browser.