root/gs2-extensions/parallel-building/trunk/src/perllib/dbutil/sqlite.pm @ 24626

Revision 24626, 10.7 KB (checked in by jmt12, 9 years ago)

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010  DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31# Please set $db_fast to 1 if you wish to enable faster I/O to the database by using
32# optimisations such as PRAGMA journal_mode (MEMORY instead of DELETE) and synchronous (OFF instead of FULL)
33# Please be aware that in this case it will be less secure and the database file
34# may become corrupted if the if the operating system crashes or the computer loses
35# power before that data has been written to the disk surface.
36# But the speed gain is about 50x
37my $db_fast = 0;
38
39
40# -----------------------------------------------------------------------------
41#   SQLITE IMPLEMENTATION
42# -----------------------------------------------------------------------------
43
44sub open_infodb_write_handle
45{
46  my $infodb_file_path = shift(@_);
47  my $opt_append = shift(@_);
48 
49  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
50  my $infodb_handle = undef;
51
52  if (!-e "$sqlite3_exe")
53  {
54      print STDERR "Error: Unable to find $sqlite3_exe\n";
55      return undef;
56  }
57
58  # running sqlite3 with the pragma journal_mode=memory, causes sqlite to print out the
59  # word "memory". While this is not a problem usually, in our case, this ends up going out
60  # to the web page first, as part of the web page's headers, thus ruining the web page
61  # which causes an Internal Server Error (500). Therefore, we redirect sqlite's output to
62  # the nul device instead.
63  my $nul_device="";
64  if(defined $db_fast && $db_fast == 1) {
65    if($ENV{'GSDLOS'} =~ m/windows/) {
66        $nul_device=">NUL";
67    } else {
68        $nul_device=">/dev/null"; # linux, mac
69    }
70  } 
71 
72  if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"$nul_device"))
73  {
74      print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
75      print STDERR "       $!\n";
76      return undef;
77  }
78
79  binmode($infodb_handle,":utf8");
80 
81   # Add extra optimisations, less secure but with a massive gain in performance with large databases which are often uptaded
82   # They should be set before the transaction begins
83  if (defined $db_fast && $db_fast == 1) {
84    print $infodb_handle "PRAGMA synchronous=OFF;\n";
85    print $infodb_handle "PRAGMA journal_mode=MEMORY;\n";
86  }
87 
88  # This is very important for efficiency, otherwise each command will be actioned one at a time
89  print $infodb_handle "BEGIN TRANSACTION;\n";
90 
91
92
93  if (!(defined $opt_append) || ($opt_append ne "append")) {
94    print $infodb_handle "DROP TABLE IF EXISTS data;\n";
95    print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
96  }
97   
98  print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
99  print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
100
101  # This is crucial for efficiency when importing large amounts of data
102  print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
103
104  return $infodb_handle;
105}
106
107
108sub close_infodb_write_handle
109{
110  my $infodb_handle = shift(@_);
111
112  # This is crucial for efficient queries on the database!
113  print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
114 
115  # Close the transaction we began after opening the file
116  print $infodb_handle "END TRANSACTION;\n";
117
118  close($infodb_handle);
119}
120
121
122sub read_infodb_cmd
123{
124  my $infodb_file_path = shift(@_);
125  my $sqlcmd = shift(@_);
126
127  my $result = "";
128
129  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
130  my $infodb_handle = undef;
131  my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
132
133  if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
134  {
135      print STDERR "Unable to execute: $cmd\n";
136      print STDERR "$!\n";
137  }
138  else {
139
140      binmode($infodb_handle, ":utf8");
141      my $line;
142      while (defined($line=<$infodb_handle>)) {
143      $result .= $line;
144      }
145
146      close($infodb_handle);
147  }
148
149  return $result;
150}
151
152sub get_infodb_file_path
153{
154  my $collection_name = shift(@_);
155  my $infodb_directory_path = shift(@_);
156
157  my $infodb_file_extension = ".db";
158  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
159  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
160}
161
162
163sub read_infodb_file
164{
165  my $infodb_file_path = shift(@_);
166  my $infodb_map = shift(@_);
167
168
169  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
170
171  my @keys = split(/\n/,$keys_str);
172
173  foreach my $k (@keys) {
174     
175      my $k_safe = &sqlite_safe($k);
176      my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
177
178      my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
179
180      $infodb_map->{$k} = $val_str;
181  }
182
183}
184
185
186sub read_infodb_keys
187{
188  my $infodb_file_path = shift(@_);
189  my $infodb_map = shift(@_);
190
191
192  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
193
194  my @keys = split(/\n/,$keys_str);
195
196  foreach my $key (@keys)
197  {
198      $infodb_map->{$key} = 1;
199  }
200}
201
202sub read_infodb_rawentry
203{
204  my $infodb_file_path = shift(@_);
205  my $infodb_key = shift(@_);
206
207 
208  my $key_safe = &sqlite_safe($infodb_key);
209  my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
210
211  my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
212
213  return $val_str
214}
215
216sub read_infodb_entry
217{
218  my $infodb_file_path = shift(@_);
219  my $infodb_key = shift(@_);
220
221  my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
222 
223  my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
224
225  return $rec_hash;
226}
227
228
229sub write_infodb_entry
230{
231  my $infodb_handle = shift(@_);
232  my $infodb_key = shift(@_);
233  my $infodb_map = shift(@_);
234 
235
236  # Add the key -> value mapping into the "data" table
237  my $infodb_entry_value = "";
238  foreach my $infodb_value_key (keys(%$infodb_map))
239  {
240    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
241    {
242      $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
243    }
244  }
245 
246  my $safe_infodb_key = &sqlite_safe($infodb_key);
247  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
248
249  # If this infodb entry is for a document, add all the interesting document metadata to the
250  #   "document_metadata" table (for use by the dynamic classifiers)
251  if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
252  {
253   
254    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
255   
256    foreach my $infodb_value_key (keys(%$infodb_map))
257    {
258      # We're not interested in most of the automatically added document metadata
259      next if ($infodb_value_key eq "archivedir" ||
260               $infodb_value_key eq "assocfilepath" ||
261               $infodb_value_key eq "childtype" ||
262               $infodb_value_key eq "contains" ||
263               $infodb_value_key eq "docnum" ||
264               $infodb_value_key eq "doctype" ||
265               $infodb_value_key eq "Encoding" ||
266               $infodb_value_key eq "FileSize" ||
267               $infodb_value_key eq "hascover" ||
268               $infodb_value_key eq "hastxt" ||
269               $infodb_value_key eq "lastmodified" ||
270               $infodb_value_key eq "metadataset" ||
271               $infodb_value_key eq "thistype" ||
272               $infodb_value_key =~ /^metadatafreq\-/ ||
273               $infodb_value_key =~ /^metadatalist\-/);
274
275      foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
276      {
277         print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
278      }
279    }
280  }
281
282     #### DEBUGGING
283    #my $new_file = "D:\\sql.txt";
284    #open(FOUT, ">>$new_file") or die "Unable to open $new_file for writing out sql statements...ERROR: $!\n";
285    #print FOUT "BEGIN;\n".$insertStatementsBuffer."\nEND;\n";
286    #close(FOUT);
287    #print STDERR $insertStatementsBuffer;
288     #### END DEBUGGING
289}
290
291sub write_infodb_rawentry
292{
293  my $infodb_handle = shift(@_);
294  my $infodb_key = shift(@_);
295  my $infodb_val = shift(@_);
296
297  my $safe_infodb_key = &sqlite_safe($infodb_key);
298  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
299}
300
301
302sub set_infodb_entry
303{
304  my $infodb_file_path = shift(@_);
305  my $infodb_key = shift(@_);
306  my $infodb_map = shift(@_);
307  my $infodb_handle = open_infodb_write_handle($infodb_file_path, "append");
308
309  if (!defined $infodb_handle) {
310      print STDERR "Error: Failed to open infodb write handle\n";
311      return -1;
312  }
313  else {
314      write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
315      close_infodb_write_handle($infodb_handle);
316  }
317  # Not currently checking for errors on write to DB
318  return 0;
319
320}
321
322
323
324sub delete_infodb_entry
325{
326  my $infodb_handle = shift(@_);
327  my $infodb_key = shift(@_);
328
329  # Delete the key from the "data" table
330
331  my $safe_infodb_key = &sqlite_safe($infodb_key);
332
333  print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
334
335  # If this infodb entry is for a document, delete the
336  #   "document_metadata" table entry also (for use by the dynamic classifiers)
337  if ($infodb_key !~ /\./)
338  {
339      # Possible for there not to be a docOID matching this infodb_key
340      # (entries are only made when <doctype> == doc
341      # Attempt to delete it, and don't complain if one isn't found
342
343      print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
344
345  }
346}
347
348
349sub sqlite_safe
350{
351  my $value = shift(@_);
352
353  # Escape any single quotes in the value
354  $value =~ s/\'/\'\'/g;
355
356  return $value;
357}
358
359
3601;
Note: See TracBrowser for help on using the browser.