root/gs2-extensions/tdb/trunk/perllib/DBDrivers/SQLITE.pm @ 30330

Revision 30330, 14.8 KB (checked in by jmt12, 5 years ago)

Renamed to all uppercase for simplicity

  • Property svn:executable set to *
Line 
1###############################################################################
2#
3# DBDrivers/SQLITE.pm -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software from the New Zealand
6# Digital Library Project at the University of Waikato, New Zealand.
7#
8# Copyright (C) 2009-2010 DL Consulting Ltd.
9# Copyright (C) 2009-2015 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19# more details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package DBDrivers::SQLITE;
28
29# Pragma
30use strict;
31
32# Libraries
33use util;
34use parent 'DBDrivers::BaseDBDriver';
35
36sub BEGIN
37{
38    if (!defined $ENV{'GSDLHOME'} || !defined $ENV{'GSDLOS'}) {
39        die("Error! Environment must be prepared by sourcing setup.bash\n");
40    }
41    @DBDrivers::SQLITE::ISA = ( 'DBDrivers::BaseDBDriver' );
42}
43
44sub new
45{
46    my $class = shift(@_);
47    my $self = DBDrivers::BaseDBDriver->new();
48    $self->{'default_file_extension'} = 'db';
49    # Please set db_fast to 1 if you wish to enable faster I/O to the database
50    # by using optimisations such as PRAGMA journal_mode (MEMORY instead of
51    # DELETE) and synchronous (OFF instead of FULL)
52    # Please be aware that in this case it will be less secure and the database
53    # file may become corrupted if the if the operating system crashes or the
54    # computer loses power before that data has been written to the disk. But
55    # the speed gain is about 50x.
56    $self->{'db_fast'} = 0;
57    # Set to 1 to enable Write Ahead Logging - which is supposed to allow
58    # multiple readers/writers on a SQLite database (incompatible with
59    # db_fast). From SQLite 3.7 onwards, WAL offers limited parallel reader/
60    # writer support but is limited to single computers (doesn't work over
61    # networked filesystems). For details see:
62    # http://www.sqlite.org/draft/wal.html
63    $self->{'db_wal'} = 0;
64    # Sanity checking
65    if ($self->{'db_fast'} && $self->{'db_wal'}) {
66    print "Warning! Write-Ahead Logging is incompatible with certain SQLite optimizations; db_fast flag will be ignored.\n";
67    }
68    # The SQLite path we are using
69    $self->{'sqlite_executable'} = '';
70    bless($self, $class);
71    return $self;
72}
73
74# -----------------------------------------------------------------------------
75#   SQLITE IMPLEMENTATION
76# -----------------------------------------------------------------------------
77
78## Protected
79
80## @function _getExecutable
81#
82sub _getExecutable
83{
84    my $self = shift(@_);
85    my $sqlite3_exe = $self->{'sqlite_executable'};
86    if ($sqlite3_exe eq '') {
87    # Try a full path first - SQLite3 may live in the Greenstone bin
88    # directory
89    $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'}, 'bin', $ENV{'GSDLOS'}, 'sqlite3' . &util::get_os_exe());
90    if (!-e "$sqlite3_exe") {
91        print "Warning! Didn't find SQLite3 on expected path, \"$sqlite3_exe\", let's hope its on the system path.\n";
92        $sqlite3_exe = 'sqlite3' . &util::get_os_exe();
93    }
94    $self->{'sqlite_executable'} = $sqlite3_exe;
95    }
96    return $sqlite3_exe;
97}
98## _getExecutable() => string ##
99
100
101## @function _readInfoDBCmd(string, string) => string
102#
103sub _readInfoDBCmd
104{
105    my $self = shift(@_);
106    my $infodb_file_path = shift(@_);
107    my $sqlcmd = shift(@_);
108    my $result = "";
109    $self->_debugPrint('("' . $infodb_file_path . '", "' . $sqlcmd . '")');
110    my $sqlite3_exe = $self->_getExecutable();
111    my $infodb_handle = undef;
112    my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
113    if (!open($infodb_handle, "$cmd |")) {
114    print STDERR "Unable to execute: $cmd\n";
115    print STDERR "$!\n";
116    }
117    else {
118    binmode($infodb_handle, ":utf8");
119    my $line;
120    while (defined($line=<$infodb_handle>)) {
121        $result .= $line;
122    }
123    close($infodb_handle);
124    }
125    return $result;
126}
127## _readInfoDBCmd(string, string) => string ##
128
129
130## @function _sqliteSafe(string) => string
131#
132sub _sqliteSafe
133{
134    my $self = shift(@_);
135    my $value = shift(@_);
136    # Escape any single quotes in the value
137    $value =~ s/\'/\'\'/g;
138    return $value;
139}
140## _sqliteSafe(string) => string ##
141
142
143## Public
144
145
146# Handled by BaseDBDriver
147# sub get_infodb_file_path {}
148
149
150## @function open_infodb_write_handle(string, string) => handle
151#
152sub open_infodb_write_handle
153{
154    my $self = shift(@_);
155    my $infodb_file_path = shift(@_);
156    my $opt_append = shift(@_);
157
158    if (!defined $opt_append) {
159    $opt_append = '';
160    }
161
162    $self->_debugPrint('("' . $infodb_file_path . '","' . $opt_append . '")');
163
164    my $sqlite3_exe = $self->_getExecutable();
165    my $infodb_handle = undef;
166
167    # running sqlite3 with the pragma journal_mode=memory, causes sqlite to
168    # print out the word "memory". While this is not a problem usually, in our
169    # case, this ends up going out to the web page first, as part of the web
170    # page's headers, thus ruining the web page which causes an Internal
171    # Server Error (500). Therefore, we redirect sqlite's output to the null
172    # device instead. WAL mode (which also changes the journal) suffers a
173    # similar issue.
174    my $nul_device="";
175    if($self->{'db_fast'} == 1 || $self->{'db_wal'} == 1) {
176        if($ENV{'GSDLOS'} =~ m/windows/) {
177            $nul_device=">NUL";
178    } else {
179            $nul_device=">/dev/null"; # linux, mac
180    }
181    }
182
183    if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
184    {
185    print STDERR "Error! Failed to open pipe to \"" . $sqlite3_exe . "\" \"" . $infodb_file_path . "\"\n" . $! . "\n";
186    return undef;
187    }
188
189    binmode($infodb_handle,":utf8");
190
191    # Allow parallel readers/writers by using a Write Ahead Logger
192    if ($self->{'db_wal'}) {
193    print $infodb_handle "PRAGMA journal_mode=WAL;\n";
194    }
195    # Add extra optimisations, less secure but with a massive gain in performan-
196    # ce with large databases which are often updated. They should be set before
197    # the transaction begins. Incompatible with WAL.
198    elsif ($self->{'db_fast'}) {
199    print $infodb_handle "PRAGMA synchronous=OFF;\n";
200    print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
201    }
202    # If we are *not* appending, drop the table for a fresh start
203    if ($opt_append ne "append") {
204    print $infodb_handle "DROP TABLE IF EXISTS data;\n";
205    print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
206    }
207    print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
208    print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
209    # These are crucial for efficiency when importing large amounts of data
210    print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
211    print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
212    # This is very important for efficiency, otherwise each command will be act-
213    # ioned one at a time
214    print $infodb_handle "BEGIN TRANSACTION;\n";
215    return $infodb_handle;
216}
217## open_infodb_write_handle(string, string) => handle ##
218
219
220## @function close_infodb_write_handle(handle) => void
221#
222sub close_infodb_write_handle
223{
224    my $self = shift(@_);
225    my $infodb_handle = shift(@_);
226    $self->_debugPrint('(<handle>)');
227    # Close the transaction we began when opening the file
228    print $infodb_handle "END TRANSACTION;\n";
229    close($infodb_handle);
230}
231## close_infodb_write_handle(handle) => void ##
232
233
234## @function read_infodb_file(string, hashmap) => void
235#
236sub read_infodb_file
237{
238    my $self = shift(@_);
239    my $infodb_file_path = shift(@_);
240    my $infodb_map = shift(@_);
241    $self->_debugPrint('("' . $infodb_file_path . '", <hashmap>)');
242    my $keys_str = $self->_readInfoDBCmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
243    my @keys = split(/\n/,$keys_str);
244    foreach my $k (@keys) {
245    my $k_safe = $self->_sqliteSafe($k);
246    my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
247    my $val_str = $self->_readInfoDBCmd($infodb_file_path,$select_val_cmd);
248    $infodb_map->{$k} = $val_str;
249    }
250}
251## read_infodb_file(string, hashmap) => void ##
252
253
254## @function read_infodb_keys(string, hashmap) => void
255#
256sub read_infodb_keys
257{
258    my $self = shift(@_);
259    my $infodb_file_path = shift(@_);
260    my $infodb_map = shift(@_);
261    $self->_debugPrint('("' . $infodb_file_path . '", <hashmap>)');
262    my $keys_str = $self->_readInfoDBCmd($infodb_file_path,"SELECT key FROM data;");
263    my @keys = split(/\n/,$keys_str);
264    foreach my $key (@keys)
265    {
266    $infodb_map->{$key} = 1;
267    }
268}
269## read_infodb_keys(string, hashmap) => void ##
270
271
272## @function read_infodb_rawentry(string, string) => string
273#
274sub read_infodb_rawentry
275{
276    my $self = shift(@_);
277    my $infodb_file_path = shift(@_);
278    my $infodb_key = shift(@_);
279    $self->_debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '")');
280    my $key_safe = $self->_sqliteSafe($infodb_key);
281    my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
282    my $val_str = $self->_readInfoDBCmd($infodb_file_path,$select_val_cmd);
283    return $val_str
284}
285## read_infodb_rawentry(string, string) => string ##
286
287
288## @function read_infodb_entry(string, string) => hashmap
289#
290sub read_infodb_entry
291{
292    my $self = shift(@_);
293    my $infodb_file_path = shift(@_);
294    my $infodb_key = shift(@_);
295    $self->_debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '")');
296    my $val_str = $self->read_infodb_rawentry($infodb_file_path,$infodb_key);
297    my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
298    return $rec_hash;
299}
300## read_infodb_entry(string, string) => hashmap ##
301
302
303## @function write_infodb_entry(handle, string, hashmap) => void
304#
305sub write_infodb_entry
306{
307    my $self = shift(@_);
308    my $infodb_handle = shift(@_);
309    my $infodb_key = shift(@_);
310    my $infodb_map = shift(@_);
311    $self->_debugPrint('(<handle>, "' . $infodb_key . '", <hashmap>)');
312    # Add the key -> value mapping into the "data" table
313    my $infodb_entry_value = "";
314    foreach my $infodb_value_key (keys(%$infodb_map)) {
315    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
316        $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
317    }
318    }
319
320    my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
321    print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_entry_value) . "');\n";
322
323    # If this infodb entry is for a document, add all the interesting document
324    # metadata to the "document_metadata" table (for use by the dynamic
325    # classifiers)
326    if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/) {
327    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
328    foreach my $infodb_value_key (keys(%$infodb_map)) {
329        # We're not interested in most of the automatically added document metadata
330        next if ($infodb_value_key eq "archivedir" ||
331             $infodb_value_key eq "assocfilepath" ||
332             $infodb_value_key eq "childtype" ||
333             $infodb_value_key eq "contains" ||
334             $infodb_value_key eq "docnum" ||
335             $infodb_value_key eq "doctype" ||
336             $infodb_value_key eq "Encoding" ||
337             $infodb_value_key eq "FileSize" ||
338             $infodb_value_key eq "hascover" ||
339             $infodb_value_key eq "hastxt" ||
340             $infodb_value_key eq "lastmodified" ||
341             $infodb_value_key eq "metadataset" ||
342             $infodb_value_key eq "thistype" ||
343             $infodb_value_key =~ /^metadatafreq\-/ ||
344             $infodb_value_key =~ /^metadatalist\-/);
345        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) {
346        print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_value_key) . "', '" . $self->_sqliteSafe($infodb_value) . "');\n";
347        }
348    }
349    }
350
351    #### DEBUGGING
352    #my $new_file = "D:\\sql.txt";
353    #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
354    #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
355    #close(FOUT);
356    #print STDERR $insertStatementsBuffer;
357    #### END DEBUGGING
358}
359## write_infodb_entry(handle, string, hashmap) => void ##
360
361
362## @function write_infodb_rawentry(handle, string, string) => void
363#
364sub write_infodb_rawentry
365{
366    my $self = shift(@_);
367    my $infodb_handle = shift(@_);
368    my $infodb_key = shift(@_);
369    my $infodb_val = shift(@_);
370    $self->_debugPrint('(<handle>, "' . $infodb_key . '", "' . $infodb_val . '")');
371    my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
372    print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . $self->_sqliteSafe($infodb_val) . "');\n";
373}
374## write_infodb_rawentry(handle, string, string) => void ##
375
376
377## @function set_infodb_entry(string, string, hashmap) => integer
378#
379sub set_infodb_entry
380{
381    my $self = shift(@_);
382    my $infodb_file_path = shift(@_);
383    my $infodb_key = shift(@_);
384    my $infodb_map = shift(@_);
385    $self->_debugPrint('("' . $infodb_file_path . '", "' . $infodb_key . '", <hashmap>)');
386    my $infodb_handle = $self->open_infodb_write_handle($infodb_file_path, "append");
387    if (!defined $infodb_handle) {
388    print STDERR "Error: Failed to open infodb write handle\n";
389    return -1;
390    }
391    else {
392    $self->write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
393    $self->close_infodb_write_handle($infodb_handle);
394    }
395    # Not currently checking for errors on write to DB
396    return 0;
397}
398## set_infodb_entry(string, string, hashmap) => integer ##
399
400
401## @function delete_infodb_entry(handle, string) => void
402#
403sub delete_infodb_entry
404{
405    my $self = shift(@_);
406    my $infodb_handle = shift(@_);
407    my $infodb_key = shift(@_);
408    $self->_debugPrint('(<handle>, "' . $infodb_key . '")');
409    # Delete the key from the "data" table
410    my $safe_infodb_key = $self->_sqliteSafe($infodb_key);
411    print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
412    # If this infodb entry is for a document, delete the "document_metadata"
413    # table entry also (for use by the dynamic classifiers)
414    if ($infodb_key !~ /\./) {
415    # Possible for there not to be a docOID matching this infodb_key
416    # (entries are only made when <doctype> == doc
417    # Attempt to delete it, and don't complain if one isn't found
418    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
419    }
420}
421## delete_infodb_entry(handle, string) => void ##
422
423
4241;
Note: See TracBrowser for help on using the browser.