root/main/trunk/greenstone2/perllib/dbutil/sqlite.pm @ 23166

Revision 23166, 7.8 KB (checked in by kjdon, 10 years ago)

copying code from gdbm.pm: Attached :utf8 encoding to db pipes to prevent double encoding problem with incremental buildcol.pl

Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010  DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31
32# -----------------------------------------------------------------------------
33#   SQLITE IMPLEMENTATION
34# -----------------------------------------------------------------------------
35
36sub open_infodb_write_handle
37{
38  my $infodb_file_path = shift(@_);
39
40  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
41  my $infodb_handle = undef;
42
43  if (!-e "$sqlite3_exe")
44  {
45      print STDERR "Error: Unable to find $sqlite3_exe\n";
46      return undef;
47  }
48
49  if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
50  {
51      print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
52      print STDERR "       $!\n";
53      return undef;
54  }
55
56  binmode($infodb_handle,":utf8");
57
58  print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
59  print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
60
61  # This is crucial for efficiency when importing large amounts of data
62  print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
63
64  # This is very important for efficiency, otherwise each command will be actioned one at a time
65  print $infodb_handle "BEGIN TRANSACTION;\n";
66
67  return $infodb_handle;
68}
69
70
71sub close_infodb_write_handle
72{
73  my $infodb_handle = shift(@_);
74
75  # Close the transaction we began after opening the file
76  print $infodb_handle "END TRANSACTION;\n";
77
78  # This is crucial for efficient queries on the database!
79  print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
80
81  close($infodb_handle);
82}
83
84
85sub read_infodb_cmd
86{
87  my $infodb_file_path = shift(@_);
88  my $sqlcmd = shift(@_);
89
90  my $result = "";
91
92  my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
93  my $infodb_handle = undef;
94  my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
95
96  if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
97  {
98      print STDERR "Unable to execute: $cmd\n";
99      print STDERR "$!\n";
100  }
101  else {
102
103      binmode($infodb_handle, ":utf8");
104      my $line;
105      while (defined($line=<$infodb_handle>)) {
106      $result .= $line;
107      }
108
109      close($infodb_handle);
110  }
111
112  return $result;
113}
114
115sub get_infodb_file_path
116{
117  my $collection_name = shift(@_);
118  my $infodb_directory_path = shift(@_);
119
120  my $infodb_file_extension = ".db";
121  my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
122  return &util::filename_cat($infodb_directory_path, $infodb_file_name);
123}
124
125
126sub read_infodb_file
127{
128  my $infodb_file_path = shift(@_);
129  my $infodb_map = shift(@_);
130
131
132  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
133
134  my @keys = split(/\n/,$keys_str);
135
136  foreach my $k (@keys) {
137     
138      my $k_safe = &sqlite_safe($k);
139      my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
140
141      my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
142
143      $infodb_map->{$k} = $val_str;
144  }
145
146}
147
148
149sub read_infodb_keys
150{
151  my $infodb_file_path = shift(@_);
152  my $infodb_map = shift(@_);
153
154
155  my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
156
157  my @keys = split(/\n/,$keys_str);
158
159  foreach my $key (@keys)
160  {
161      $infodb_map->{$key} = 1;
162  }
163}
164
165
166sub write_infodb_entry
167{
168  my $infodb_handle = shift(@_);
169  my $infodb_key = shift(@_);
170  my $infodb_map = shift(@_);
171
172  # Add the key -> value mapping into the "data" table
173  my $infodb_entry_value = "";
174  foreach my $infodb_value_key (keys(%$infodb_map))
175  {
176    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
177    {
178      $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
179    }
180  }
181
182  my $safe_infodb_key = &sqlite_safe($infodb_key);
183  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
184
185  # If this infodb entry is for a document, add all the interesting document metadata to the
186  #   "document_metadata" table (for use by the dynamic classifiers)
187  if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
188  {
189    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
190
191    foreach my $infodb_value_key (keys(%$infodb_map))
192    {
193      # We're not interested in most of the automatically added document metadata
194      next if ($infodb_value_key eq "archivedir" ||
195               $infodb_value_key eq "assocfilepath" ||
196               $infodb_value_key eq "childtype" ||
197               $infodb_value_key eq "contains" ||
198               $infodb_value_key eq "docnum" ||
199               $infodb_value_key eq "doctype" ||
200               $infodb_value_key eq "Encoding" ||
201               $infodb_value_key eq "FileSize" ||
202               $infodb_value_key eq "hascover" ||
203               $infodb_value_key eq "hastxt" ||
204               $infodb_value_key eq "lastmodified" ||
205               $infodb_value_key eq "metadataset" ||
206               $infodb_value_key eq "thistype" ||
207               $infodb_value_key =~ /^metadatafreq\-/ ||
208               $infodb_value_key =~ /^metadatalist\-/);
209
210      foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
211      {
212        print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
213      }
214    }
215  }
216}
217
218
219
220sub write_infodb_rawentry
221{
222  my $infodb_handle = shift(@_);
223  my $infodb_key = shift(@_);
224  my $infodb_val = shift(@_);
225
226  my $safe_infodb_key = &sqlite_safe($infodb_key);
227  print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
228}
229
230
231
232sub delete_infodb_entry
233{
234  my $infodb_handle = shift(@_);
235  my $infodb_key = shift(@_);
236
237  # Delete the key from the "data" table
238
239  my $safe_infodb_key = &sqlite_safe($infodb_key);
240
241  print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
242
243  # If this infodb entry is for a document, delete the
244  #   "document_metadata" table entry also (for use by the dynamic classifiers)
245  if ($infodb_key !~ /\./)
246  {
247      # Possible for there not to be a docOID matching this infodb_key
248      # (entries are only made when <doctype> == doc
249      # Attempt to delete it, and don't complain if one isn't found
250
251      print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
252
253  }
254}
255
256
257sub sqlite_safe
258{
259  my $value = shift(@_);
260
261  # Escape any single quotes in the value
262  $value =~ s/\'/\'\'/g;
263
264  return $value;
265}
266
267
2681;
Note: See TracBrowser for help on using the browser.