source: gs2-extensions/tdb/trunk/perllib/DBDrivers/SQLite.pm@ 30318

Last change on this file since 30318 was 30318, checked in by jmt12, 8 years ago

Initial checkin of object-oriented rewrite of the dbutils stuff to bring it more into line with plugins and classifiers.

  • Property svn:executable set to *
File size: 11.9 KB
RevLine 
[30318]1###############################################################################
2#
3# DBDrivers/SQLite.pm -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software from the New Zealand
6# Digital Library Project at the University of Waikato, New Zealand.
7#
8# Copyright (C) 2009-2010 DL Consulting Ltd.
9# Copyright (C) 2009-2015 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19# more details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package DBDrivers::SQLite;
28
29# Pragma
30use strict;
31
32# Libraries
33use util;
34use DBDrivers::BaseDBDriver;
35
36sub BEGIN
37{
38 if (!defined $ENV{'GSDLHOME'} || !defined $ENV{'GSDLOS'}) {
39 die("Error! Environment must be prepared by sourcing setup.bash\n");
40 }
41 @DBDrivers::SQLite::ISA = ( 'DBDrivers::BaseDBDriver' );
42}
43
44sub new
45{
46 my $class = shift(@_);
47 return bless ($self, $class);
48}
49
50# -----------------------------------------------------------------------------
51# SQLITE IMPLEMENTATION
52# -----------------------------------------------------------------------------
53
54# Please set $db_fast to 1 if you wish to enable faster I/O to the database by
55# using optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE)
56# and synchronous (OFF instead of FULL)
57# Please be aware that in this case it will be less secure and the database
58# file may become corrupted if the if the operating system crashes or the
59# computer loses power before that data has been written to the disk surface.
60# But the speed gain is about 50x
61my $db_fast = 0;
62
63# Set to 1 to enable Write Ahead Logging - which is supposed to allow multiple
64# readers/writers on a SQLite database (incompatible with db_fast). From SQLite
65# 3.7 onwards, WAL offers limited parallel reader/writer support but is limited
66# to single computers (doesn't work over networked filesystems). For details
67# see: http://www.sqlite.org/draft/wal.html [jmt12]
68my $db_wal = 0;
69
70## Private Methods
71
72## @function _getExecutable
73#
74sub _getExecutable
75{
76 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'}, 'bin', $ENV{'GSDLOS'}, 'sqlite3' . &util::get_os_exe());
77 if (!-e "$sqlite3_exe") {
78 die("Error! Unable to find SQLite3 executable: $sqlite3_exe\n");
79 }
80 return $sqlite3_exe;
81}
82## _getExecutable() => string ##
83
84
85sub sqlite_safe
86{
87 my $value = shift(@_);
88
89 # Escape any single quotes in the value
90 $value =~ s/\'/\'\'/g;
91
92 return $value;
93}
94
95
96sub open_infodb_write_handle
97{
98 my $infodb_file_path = shift(@_);
99 my $opt_append = shift(@_);
100
101 my $sqlite3_exe = &_getExecutable();
102 my $infodb_handle = undef;
103
104 # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
105 # word "memory". While this is not a problem usually, in our case, this ends up going out
106 # to the web page first, as part of the web page's headers, thus ruining the web page
107 # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
108 # the nul device instead.
109 # using WAL mode (which also changes the journal) suffers a similar issue [jmt12]
110 my $nul_device="";
111 if($db_fast == 1 || $db_wal == 1) {
112 if($ENV{'GSDLOS'} =~ m/windows/) {
113 $nul_device=">NUL";
114 } else {
115 $nul_device=">/dev/null"; # linux, mac
116 }
117 }
118
119 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
120 {
121 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
122 print STDERR " $!\n";
123 return undef;
124 }
125
126 binmode($infodb_handle,":utf8");
127
128 # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
129 # They should be set before the transaction begins
130 if (defined $db_fast && $db_fast == 1) {
131 print $infodb_handle "PRAGMA synchronous=OFF;\n";
132 print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
133 }
134 # Allow parallel readers/writers by using a Write Ahead Logger
135 elsif ($db_wal)
136 {
137 print $infodb_handle "PRAGMA journal_mode=WAL;\n";
138 }
139
140 # This is very important for efficiency, otherwise each command will be actioned one at a time
141 print $infodb_handle "BEGIN TRANSACTION;\n";
142
143
144
145 if (!(defined $opt_append) || ($opt_append ne "append")) {
146 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
147 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
148 }
149
150 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
151 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
152
153 # This is crucial for efficiency when importing large amounts of data
154 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
155
156 return $infodb_handle;
157}
158
159
160sub close_infodb_write_handle
161{
162 my $infodb_handle = shift(@_);
163
164 # This is crucial for efficient queries on the database!
165 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
166
167 # Close the transaction we began after opening the file
168 print $infodb_handle "END TRANSACTION;\n";
169
170 close($infodb_handle);
171}
172
173
174sub read_infodb_cmd
175{
176 my $infodb_file_path = shift(@_);
177 my $sqlcmd = shift(@_);
178
179 my $result = "";
180
181 my $sqlite3_exe = &_getExecutable();
182 my $infodb_handle = undef;
183 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
184
185 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
186 {
187 print STDERR "Unable to execute: $cmd\n";
188 print STDERR "$!\n";
189 }
190 else {
191
192 binmode($infodb_handle, ":utf8");
193 my $line;
194 while (defined($line=<$infodb_handle>)) {
195 $result .= $line;
196 }
197
198 close($infodb_handle);
199 }
200
201 return $result;
202}
203
204sub get_infodb_file_path
205{
206 my $collection_name = shift(@_);
207 my $infodb_directory_path = shift(@_);
208
209 my $infodb_file_extension = ".db";
210 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
211 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
212}
213
214
215sub read_infodb_file
216{
217 my $infodb_file_path = shift(@_);
218 my $infodb_map = shift(@_);
219
220
221 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
222
223 my @keys = split(/\n/,$keys_str);
224
225 foreach my $k (@keys) {
226
227 my $k_safe = &sqlite_safe($k);
228 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
229
230 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
231
232 $infodb_map->{$k} = $val_str;
233 }
234
235}
236
237
238sub read_infodb_keys
239{
240 my $infodb_file_path = shift(@_);
241 my $infodb_map = shift(@_);
242
243
244 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
245
246 my @keys = split(/\n/,$keys_str);
247
248 foreach my $key (@keys)
249 {
250 $infodb_map->{$key} = 1;
251 }
252}
253
254sub read_infodb_rawentry
255{
256 my $infodb_file_path = shift(@_);
257 my $infodb_key = shift(@_);
258
259
260 my $key_safe = &sqlite_safe($infodb_key);
261 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
262
263 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
264
265 return $val_str
266}
267
268sub read_infodb_entry
269{
270 my $infodb_file_path = shift(@_);
271 my $infodb_key = shift(@_);
272
273 my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
274
275 my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
276
277 return $rec_hash;
278}
279
280
281sub write_infodb_entry
282{
283 my $infodb_handle = shift(@_);
284 my $infodb_key = shift(@_);
285 my $infodb_map = shift(@_);
286
287
288 # Add the key -> value mapping into the "data" table
289 my $infodb_entry_value = "";
290 foreach my $infodb_value_key (keys(%$infodb_map))
291 {
292 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
293 {
294 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
295 }
296 }
297
298 my $safe_infodb_key = &sqlite_safe($infodb_key);
299 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
300
301 # If this infodb entry is for a document, add all the interesting document metadata to the
302 # "document_metadata" table (for use by the dynamic classifiers)
303 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
304 {
305
306 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
307
308 foreach my $infodb_value_key (keys(%$infodb_map))
309 {
310 # We're not interested in most of the automatically added document metadata
311 next if ($infodb_value_key eq "archivedir" ||
312 $infodb_value_key eq "assocfilepath" ||
313 $infodb_value_key eq "childtype" ||
314 $infodb_value_key eq "contains" ||
315 $infodb_value_key eq "docnum" ||
316 $infodb_value_key eq "doctype" ||
317 $infodb_value_key eq "Encoding" ||
318 $infodb_value_key eq "FileSize" ||
319 $infodb_value_key eq "hascover" ||
320 $infodb_value_key eq "hastxt" ||
321 $infodb_value_key eq "lastmodified" ||
322 $infodb_value_key eq "metadataset" ||
323 $infodb_value_key eq "thistype" ||
324 $infodb_value_key =~ /^metadatafreq\-/ ||
325 $infodb_value_key =~ /^metadatalist\-/);
326
327 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
328 {
329 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
330 }
331 }
332 }
333
334 #### DEBUGGING
335 #my $new_file = "D:\\sql.txt";
336 #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
337 #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
338 #close(FOUT);
339 #print STDERR $insertStatementsBuffer;
340 #### END DEBUGGING
341}
342
343sub write_infodb_rawentry
344{
345 my $infodb_handle = shift(@_);
346 my $infodb_key = shift(@_);
347 my $infodb_val = shift(@_);
348
349 my $safe_infodb_key = &sqlite_safe($infodb_key);
350 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
351}
352
353
354sub set_infodb_entry
355{
356 my $infodb_file_path = shift(@_);
357 my $infodb_key = shift(@_);
358 my $infodb_map = shift(@_);
359 my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
360
361 if (!defined $infodb_handle) {
362 print STDERR "Error: Failed to open infodb write handle\n";
363 return -1;
364 }
365 else {
366 write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
367 close_infodb_write_handle($infodb_handle);
368 }
369 # Not currently checking for errors on write to DB
370 return 0;
371
372}
373
374
375
376sub delete_infodb_entry
377{
378 my $infodb_handle = shift(@_);
379 my $infodb_key = shift(@_);
380
381 # Delete the key from the "data" table
382
383 my $safe_infodb_key = &sqlite_safe($infodb_key);
384
385 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
386
387 # If this infodb entry is for a document, delete the
388 # "document_metadata" table entry also (for use by the dynamic classifiers)
389 if ($infodb_key !~ /\./)
390 {
391 # Possible for there not to be a docOID matching this infodb_key
392 # (entries are only made when <doctype> == doc
393 # Attempt to delete it, and don't complain if one isn't found
394
395 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
396
397 }
398}
399
400
4011;
Note: See TracBrowser for help on using the repository browser.