root/gs2-extensions/parallel-building/trunk/src/perllib/dbutil/sqlite.pm @ 24673

Revision 24673, 11.0 KB (checked in by jmt12, 9 years ago)

Added switch and code for enabling WAL when accessing SQLite, but it didn't seem to have the desired affect

Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010  DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39# Set to 1 to enable Write Ahead Logging - which is supposed to allow multiple
40# readers/writers on a SQLite database (overridden by db_fast).
41my $db_wal = 0;
42
43
44# -----------------------------------------------------------------------------
45#   SQLITE IMPLEMENTATION
46# -----------------------------------------------------------------------------
47
48sub open_infodb_write_handle
49{
50  my $infodb_file_path = shift(@_);
51  my $opt_append = shift(@_);
52 
53  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
54  my $infodb_handle = undef;
55
56  if (!-e "$sqlite3_exe")
57  {
58      print STDERR "Error: Unable to find $sqlite3_exe\n";
59      return undef;
60  }
61
62  # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
63  # word "memory". While this is not a problem usually, in our case, this ends up going out
64  # to the web page first, as part of the web page's headers, thus ruining the web page
65  # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
66  # the nul device instead.
67  my $nul_device="";
68  if($db_fast || $db_wal) {
69    if($ENV{'GSDLOS'} =~ m/windows/) {
70        $nul_device=">NUL";
71    } else {
72        $nul_device=">/dev/null"; # linux, mac
73    }
74  } 
75 
76  if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
77  {
78      print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
79      print STDERR "       $!\n";
80      return undef;
81  }
82
83  binmode($infodb_handle,":utf8");
84 
85   # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
86   # They should be set before the transaction begins
87  if (defined $db_fast && $db_fast == 1) {
88    print $infodb_handle "PRAGMA synchronous=OFF;\n";
89    print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
90  }
91  # Allow parallel readers/writers by using a Write Ahead Logger
92  elsif ($db_wal)
93  {
94    print $infodb_handle "PRAGMA journal_mode=WAL;\n";
95  }
96
97  # This is very important for efficiency, otherwise each command will be actioned one at a time
98  print $infodb_handle "BEGIN TRANSACTION;\n";
99 
100
101
102  if (!(defined $opt_append) || ($opt_append ne "append")) {
103    print $infodb_handle "DROP TABLE IF EXISTS data;\n";
104    print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
105  }
106   
107  print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
108  print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
109
110  # This is crucial for efficiency when importing large amounts of data
111  print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
112
113  return $infodb_handle;
114}
115
116
117sub close_infodb_write_handle
118{
119  my $infodb_handle = shift(@_);
120
121  # This is crucial for efficient queries on the database!
122  print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
123 
124  # Close the transaction we began after opening the file
125  print $infodb_handle "END TRANSACTION;\n";
126
127  close($infodb_handle);
128}
129
130
131sub read_infodb_cmd
132{
133  my $infodb_file_path = shift(@_);
134  my $sqlcmd = shift(@_);
135
136  my $result = "";
137
138  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
139  my $infodb_handle = undef;
140  my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
141
142  if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
143  {
144      print STDERR "Unable to execute: $cmd\n";
145      print STDERR "$!\n";
146  }
147  else {
148
149      binmode($infodb_handle, ":utf8");
150      my $line;
151      while (defined($line=<$infodb_handle>)) {
152      $result .= $line;
153      }
154
155      close($infodb_handle);
156  }
157
158  return $result;
159}
160
161sub get_infodb_file_path
162{
163  my $collection_name = shift(@_);
164  my $infodb_directory_path = shift(@_);
165
166  my $infodb_file_extension = ".db";
167  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
168  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
169}
170
171
172sub read_infodb_file
173{
174  my $infodb_file_path = shift(@_);
175  my $infodb_map = shift(@_);
176
177
178  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
179
180  my @keys = split(/\n/,$keys_str);
181
182  foreach my $k (@keys) {
183     
184      my $k_safe = &sqlite_safe($k);
185      my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
186
187      my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
188
189      $infodb_map->{$k} = $val_str;
190  }
191
192}
193
194
195sub read_infodb_keys
196{
197  my $infodb_file_path = shift(@_);
198  my $infodb_map = shift(@_);
199
200
201  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
202
203  my @keys = split(/\n/,$keys_str);
204
205  foreach my $key (@keys)
206  {
207      $infodb_map->{$key} = 1;
208  }
209}
210
211sub read_infodb_rawentry
212{
213  my $infodb_file_path = shift(@_);
214  my $infodb_key = shift(@_);
215
216 
217  my $key_safe = &sqlite_safe($infodb_key);
218  my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
219
220  my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
221
222  return $val_str
223}
224
225sub read_infodb_entry
226{
227  my $infodb_file_path = shift(@_);
228  my $infodb_key = shift(@_);
229
230  my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
231 
232  my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
233
234  return $rec_hash;
235}
236
237
238sub write_infodb_entry
239{
240  my $infodb_handle = shift(@_);
241  my $infodb_key = shift(@_);
242  my $infodb_map = shift(@_);
243 
244
245  # Add the key -> value mapping into the "data" table
246  my $infodb_entry_value = "";
247  foreach my $infodb_value_key (keys(%$infodb_map))
248  {
249    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
250    {
251      $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
252    }
253  }
254 
255  my $safe_infodb_key = &sqlite_safe($infodb_key);
256  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
257
258  # If this infodb entry is for a document, add all the interesting document metadata to the
259  #   "document_metadata" table (for use by the dynamic classifiers)
260  if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
261  {
262   
263    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
264   
265    foreach my $infodb_value_key (keys(%$infodb_map))
266    {
267      # We're not interested in most of the automatically added document metadata
268      next if ($infodb_value_key eq "archivedir" ||
269               $infodb_value_key eq "assocfilepath" ||
270               $infodb_value_key eq "childtype" ||
271               $infodb_value_key eq "contains" ||
272               $infodb_value_key eq "docnum" ||
273               $infodb_value_key eq "doctype" ||
274               $infodb_value_key eq "Encoding" ||
275               $infodb_value_key eq "FileSize" ||
276               $infodb_value_key eq "hascover" ||
277               $infodb_value_key eq "hastxt" ||
278               $infodb_value_key eq "lastmodified" ||
279               $infodb_value_key eq "metadataset" ||
280               $infodb_value_key eq "thistype" ||
281               $infodb_value_key =~ /^metadatafreq\-/ ||
282               $infodb_value_key =~ /^metadatalist\-/);
283
284      foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
285      {
286         print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
287      }
288    }
289  }
290
291     #### DEBUGGING
292    #my $new_file = "D:\\sql.txt";
293    #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
294    #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
295    #close(FOUT);
296    #print STDERR $insertStatementsBuffer;
297     #### END DEBUGGING
298}
299
300sub write_infodb_rawentry
301{
302  my $infodb_handle = shift(@_);
303  my $infodb_key = shift(@_);
304  my $infodb_val = shift(@_);
305
306  my $safe_infodb_key = &sqlite_safe($infodb_key);
307  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
308}
309
310
311sub set_infodb_entry
312{
313  my $infodb_file_path = shift(@_);
314  my $infodb_key = shift(@_);
315  my $infodb_map = shift(@_);
316  my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
317
318  if (!defined $infodb_handle) {
319      print STDERR "Error: Failed to open infodb write handle\n";
320      return -1;
321  }
322  else {
323      write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
324      close_infodb_write_handle($infodb_handle);
325  }
326  # Not currently checking for errors on write to DB
327  return 0;
328
329}
330
331
332
333sub delete_infodb_entry
334{
335  my $infodb_handle = shift(@_);
336  my $infodb_key = shift(@_);
337
338  # Delete the key from the "data" table
339
340  my $safe_infodb_key = &sqlite_safe($infodb_key);
341
342  print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
343
344  # If this infodb entry is for a document, delete the
345  #   "document_metadata" table entry also (for use by the dynamic classifiers)
346  if ($infodb_key !~ /\./)
347  {
348      # Possible for there not to be a docOID matching this infodb_key
349      # (entries are only made when <doctype> == doc
350      # Attempt to delete it, and don't complain if one isn't found
351
352      print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
353
354  }
355}
356
357
358sub sqlite_safe
359{
360  my $value = shift(@_);
361
362  # Escape any single quotes in the value
363  $value =~ s/\'/\'\'/g;
364
365  return $value;
366}
367
368
3691;
Note: See TracBrowser for help on using the browser.