root/main/trunk/greenstone2/perllib/dbutil/sqlite.pm @ 27298

Revision 27298, 11.3 KB (checked in by jmt12, 6 years ago)

Add hidden support for the new experimental multiple reader/writer functionality available in SQLite

Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010  DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39# Set to 1 to enable Write Ahead Logging - which is supposed to allow multiple
40# readers/writers on a SQLite database (incompatible with db_fast). From SQLite
41# 3.7 onwards, WAL offers limited parallel reader/writer support but is limited
42# to single computers (doesn't work over networked filesystems). For details
43# see: http://www.sqlite.org/draft/wal.html [jmt12]
44my $db_wal = 0;
45
46# -----------------------------------------------------------------------------
47#   SQLITE IMPLEMENTATION
48# -----------------------------------------------------------------------------
49
50sub open_infodb_write_handle
51{
52  my $infodb_file_path = shift(@_);
53  my $opt_append = shift(@_);
54 
55  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
56  my $infodb_handle = undef;
57
58  if (!-e "$sqlite3_exe")
59  {
60      print STDERR "Error: Unable to find $sqlite3_exe\n";
61      return undef;
62  }
63
64  # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
65  # word "memory". While this is not a problem usually, in our case, this ends up going out
66  # to the web page first, as part of the web page's headers, thus ruining the web page
67  # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
68  # the nul device instead.
69  # using WAL mode (which also changes the journal) suffers a similar issue [jmt12]
70  my $nul_device="";
71  if($db_fast == 1 || $db_wal == 1) {
72    if($ENV{'GSDLOS'} =~ m/windows/) {
73        $nul_device=">NUL";
74    } else {
75        $nul_device=">/dev/null"; # linux, mac
76    }
77  } 
78 
79  if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
80  {
81      print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
82      print STDERR "       $!\n";
83      return undef;
84  }
85
86  binmode($infodb_handle,":utf8");
87 
88   # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
89   # They should be set before the transaction begins
90  if (defined $db_fast && $db_fast == 1) {
91    print $infodb_handle "PRAGMA synchronous=OFF;\n";
92    print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
93  }
94  # Allow parallel readers/writers by using a Write Ahead Logger
95  elsif ($db_wal)
96  {
97    print $infodb_handle "PRAGMA journal_mode=WAL;\n";
98  }
99
100  # This is very important for efficiency, otherwise each command will be actioned one at a time
101  print $infodb_handle "BEGIN TRANSACTION;\n";
102 
103
104
105  if (!(defined $opt_append) || ($opt_append ne "append")) {
106    print $infodb_handle "DROP TABLE IF EXISTS data;\n";
107    print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
108  }
109   
110  print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
111  print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
112
113  # This is crucial for efficiency when importing large amounts of data
114  print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
115
116  return $infodb_handle;
117}
118
119
120sub close_infodb_write_handle
121{
122  my $infodb_handle = shift(@_);
123
124  # This is crucial for efficient queries on the database!
125  print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
126 
127  # Close the transaction we began after opening the file
128  print $infodb_handle "END TRANSACTION;\n";
129
130  close($infodb_handle);
131}
132
133
134sub read_infodb_cmd
135{
136  my $infodb_file_path = shift(@_);
137  my $sqlcmd = shift(@_);
138
139  my $result = "";
140
141  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
142  my $infodb_handle = undef;
143  my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
144
145  if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
146  {
147      print STDERR "Unable to execute: $cmd\n";
148      print STDERR "$!\n";
149  }
150  else {
151
152      binmode($infodb_handle, ":utf8");
153      my $line;
154      while (defined($line=<$infodb_handle>)) {
155      $result .= $line;
156      }
157
158      close($infodb_handle);
159  }
160
161  return $result;
162}
163
164sub get_infodb_file_path
165{
166  my $collection_name = shift(@_);
167  my $infodb_directory_path = shift(@_);
168
169  my $infodb_file_extension = ".db";
170  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
171  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
172}
173
174
175sub read_infodb_file
176{
177  my $infodb_file_path = shift(@_);
178  my $infodb_map = shift(@_);
179
180
181  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
182
183  my @keys = split(/\n/,$keys_str);
184
185  foreach my $k (@keys) {
186     
187      my $k_safe = &sqlite_safe($k);
188      my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
189
190      my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
191
192      $infodb_map->{$k} = $val_str;
193  }
194
195}
196
197
198sub read_infodb_keys
199{
200  my $infodb_file_path = shift(@_);
201  my $infodb_map = shift(@_);
202
203
204  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
205
206  my @keys = split(/\n/,$keys_str);
207
208  foreach my $key (@keys)
209  {
210      $infodb_map->{$key} = 1;
211  }
212}
213
214sub read_infodb_rawentry
215{
216  my $infodb_file_path = shift(@_);
217  my $infodb_key = shift(@_);
218
219 
220  my $key_safe = &sqlite_safe($infodb_key);
221  my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
222
223  my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
224
225  return $val_str
226}
227
228sub read_infodb_entry
229{
230  my $infodb_file_path = shift(@_);
231  my $infodb_key = shift(@_);
232
233  my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
234 
235  my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
236
237  return $rec_hash;
238}
239
240
241sub write_infodb_entry
242{
243  my $infodb_handle = shift(@_);
244  my $infodb_key = shift(@_);
245  my $infodb_map = shift(@_);
246 
247
248  # Add the key -> value mapping into the "data" table
249  my $infodb_entry_value = "";
250  foreach my $infodb_value_key (keys(%$infodb_map))
251  {
252    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
253    {
254      $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
255    }
256  }
257 
258  my $safe_infodb_key = &sqlite_safe($infodb_key);
259  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
260
261  # If this infodb entry is for a document, add all the interesting document metadata to the
262  #   "document_metadata" table (for use by the dynamic classifiers)
263  if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
264  {
265   
266    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
267   
268    foreach my $infodb_value_key (keys(%$infodb_map))
269    {
270      # We're not interested in most of the automatically added document metadata
271      next if ($infodb_value_key eq "archivedir" ||
272               $infodb_value_key eq "assocfilepath" ||
273               $infodb_value_key eq "childtype" ||
274               $infodb_value_key eq "contains" ||
275               $infodb_value_key eq "docnum" ||
276               $infodb_value_key eq "doctype" ||
277               $infodb_value_key eq "Encoding" ||
278               $infodb_value_key eq "FileSize" ||
279               $infodb_value_key eq "hascover" ||
280               $infodb_value_key eq "hastxt" ||
281               $infodb_value_key eq "lastmodified" ||
282               $infodb_value_key eq "metadataset" ||
283               $infodb_value_key eq "thistype" ||
284               $infodb_value_key =~ /^metadatafreq\-/ ||
285               $infodb_value_key =~ /^metadatalist\-/);
286
287      foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
288      {
289         print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
290      }
291    }
292  }
293
294     #### DEBUGGING
295    #my $new_file = "D:\\sql.txt";
296    #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
297    #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
298    #close(FOUT);
299    #print STDERR $insertStatementsBuffer;
300     #### END DEBUGGING
301}
302
303sub write_infodb_rawentry
304{
305  my $infodb_handle = shift(@_);
306  my $infodb_key = shift(@_);
307  my $infodb_val = shift(@_);
308
309  my $safe_infodb_key = &sqlite_safe($infodb_key);
310  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
311}
312
313
314sub set_infodb_entry
315{
316  my $infodb_file_path = shift(@_);
317  my $infodb_key = shift(@_);
318  my $infodb_map = shift(@_);
319  my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
320
321  if (!defined $infodb_handle) {
322      print STDERR "Error: Failed to open infodb write handle\n";
323      return -1;
324  }
325  else {
326      write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
327      close_infodb_write_handle($infodb_handle);
328  }
329  # Not currently checking for errors on write to DB
330  return 0;
331
332}
333
334
335
336sub delete_infodb_entry
337{
338  my $infodb_handle = shift(@_);
339  my $infodb_key = shift(@_);
340
341  # Delete the key from the "data" table
342
343  my $safe_infodb_key = &sqlite_safe($infodb_key);
344
345  print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
346
347  # If this infodb entry is for a document, delete the
348  #   "document_metadata" table entry also (for use by the dynamic classifiers)
349  if ($infodb_key !~ /\./)
350  {
351      # Possible for there not to be a docOID matching this infodb_key
352      # (entries are only made when <doctype> == doc
353      # Attempt to delete it, and don't complain if one isn't found
354
355      print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
356
357  }
358}
359
360
361sub sqlite_safe
362{
363  my $value = shift(@_);
364
365  # Escape any single quotes in the value
366  $value =~ s/\'/\'\'/g;
367
368  return $value;
369}
370
371
3721;
Note: See TracBrowser for help on using the browser.