source: main/trunk/greenstone2/perllib/dbutil/sqlite.pm@ 23683

Last change on this file since 23683 was 23683, checked in by ak19, 13 years ago

John Thompson fixed an important subtle bug that reveals itself when, after a full rebuild, you do an incremental build with mode set to infodb. The search results appear in a different order each time for the same search as before. His quick fix is to sort the results when taken out of the database, but he thinks that the bug itself implies that there is something dependent on the order of the data coming out of the DB that expects it to be in a certain order.

File size: 9.2 KB
Line 
1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2009-2010 DL Consulting Ltd.
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31
32# -----------------------------------------------------------------------------
33# SQLITE IMPLEMENTATION
34# -----------------------------------------------------------------------------
35
36sub open_infodb_write_handle
37{
38 my $infodb_file_path = shift(@_);
39 my $opt_append = shift(@_);
40
41 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
42 my $infodb_handle = undef;
43
44 if (!-e "$sqlite3_exe")
45 {
46 print STDERR "Error: Unable to find $sqlite3_exe\n";
47 return undef;
48 }
49
50 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
51 {
52 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
53 print STDERR " $!\n";
54 return undef;
55 }
56
57 binmode($infodb_handle,":utf8");
58
59 if (!(defined $opt_append) || ($opt_append ne "append")) {
60 print $infodb_handle "DROP TABLE IF EXISTS data;\n";
61 print $infodb_handle "DROP TABLE IF EXISTS document_metadata;\n";
62 }
63
64 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
65 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
66
67 # This is crucial for efficiency when importing large amounts of data
68 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
69
70 # This is very important for efficiency, otherwise each command will be actioned one at a time
71 print $infodb_handle "BEGIN TRANSACTION;\n";
72
73 return $infodb_handle;
74}
75
76
77sub close_infodb_write_handle
78{
79 my $infodb_handle = shift(@_);
80
81 # Close the transaction we began after opening the file
82 print $infodb_handle "END TRANSACTION;\n";
83
84 # This is crucial for efficient queries on the database!
85 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
86
87 close($infodb_handle);
88}
89
90
91sub read_infodb_cmd
92{
93 my $infodb_file_path = shift(@_);
94 my $sqlcmd = shift(@_);
95
96 my $result = "";
97
98 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
99 my $infodb_handle = undef;
100 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
101
102 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
103 {
104 print STDERR "Unable to execute: $cmd\n";
105 print STDERR "$!\n";
106 }
107 else {
108
109 binmode($infodb_handle, ":utf8");
110 my $line;
111 while (defined($line=<$infodb_handle>)) {
112 $result .= $line;
113 }
114
115 close($infodb_handle);
116 }
117
118 return $result;
119}
120
121sub get_infodb_file_path
122{
123 my $collection_name = shift(@_);
124 my $infodb_directory_path = shift(@_);
125
126 my $infodb_file_extension = ".db";
127 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
128 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
129}
130
131
132sub read_infodb_file
133{
134 my $infodb_file_path = shift(@_);
135 my $infodb_map = shift(@_);
136
137
138 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data ORDER BY key;");
139
140 my @keys = split(/\n/,$keys_str);
141
142 foreach my $k (@keys) {
143
144 my $k_safe = &sqlite_safe($k);
145 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
146
147 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
148
149 $infodb_map->{$k} = $val_str;
150 }
151
152}
153
154
155sub read_infodb_keys
156{
157 my $infodb_file_path = shift(@_);
158 my $infodb_map = shift(@_);
159
160
161 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
162
163 my @keys = split(/\n/,$keys_str);
164
165 foreach my $key (@keys)
166 {
167 $infodb_map->{$key} = 1;
168 }
169}
170
171sub read_infodb_rawentry
172{
173 my $infodb_file_path = shift(@_);
174 my $infodb_key = shift(@_);
175
176
177 my $key_safe = &sqlite_safe($infodb_key);
178 my $select_val_cmd = "SELECT value FROM data WHERE key='$key_safe';";
179
180 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
181
182 return $val_str
183}
184
185sub read_infodb_entry
186{
187 my $infodb_file_path = shift(@_);
188 my $infodb_key = shift(@_);
189
190 my $val_str = read_infodb_rawentry($infodb_file_path,$infodb_key);
191
192 my $rec_hash = &dbutil::convert_infodb_string_to_hash($val_str);
193
194 return $rec_hash;
195}
196
197
198sub write_infodb_entry
199{
200 my $infodb_handle = shift(@_);
201 my $infodb_key = shift(@_);
202 my $infodb_map = shift(@_);
203
204 # Add the key -> value mapping into the "data" table
205 my $infodb_entry_value = "";
206 foreach my $infodb_value_key (keys(%$infodb_map))
207 {
208 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
209 {
210 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
211 }
212 }
213
214 my $safe_infodb_key = &sqlite_safe($infodb_key);
215 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
216
217 # If this infodb entry is for a document, add all the interesting document metadata to the
218 # "document_metadata" table (for use by the dynamic classifiers)
219 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
220 {
221 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
222
223 foreach my $infodb_value_key (keys(%$infodb_map))
224 {
225 # We're not interested in most of the automatically added document metadata
226 next if ($infodb_value_key eq "archivedir" ||
227 $infodb_value_key eq "assocfilepath" ||
228 $infodb_value_key eq "childtype" ||
229 $infodb_value_key eq "contains" ||
230 $infodb_value_key eq "docnum" ||
231 $infodb_value_key eq "doctype" ||
232 $infodb_value_key eq "Encoding" ||
233 $infodb_value_key eq "FileSize" ||
234 $infodb_value_key eq "hascover" ||
235 $infodb_value_key eq "hastxt" ||
236 $infodb_value_key eq "lastmodified" ||
237 $infodb_value_key eq "metadataset" ||
238 $infodb_value_key eq "thistype" ||
239 $infodb_value_key =~ /^metadatafreq\-/ ||
240 $infodb_value_key =~ /^metadatalist\-/);
241
242 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
243 {
244 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
245 }
246 }
247 }
248}
249
250sub write_infodb_rawentry
251{
252 my $infodb_handle = shift(@_);
253 my $infodb_key = shift(@_);
254 my $infodb_val = shift(@_);
255
256 my $safe_infodb_key = &sqlite_safe($infodb_key);
257 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
258}
259
260
261sub set_infodb_entry
262{
263 my $infodb_file_path = shift(@_);
264 my $infodb_key = shift(@_);
265 my $infodb_map = shift(@_);
266
267 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
268 my $infodb_handle = undef;
269 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\"";
270
271 if (!-e "$sqlite3_exe" || !open($infodb_handle, "|$cmd"))
272 {
273 print STDERR "Unable to execute: $cmd\n";
274 print STDERR "$!\n";
275 return -1;
276 }
277 else {
278
279 binmode($infodb_handle, ":utf8");
280 write_infodb_entry($infodb_handle,$infodb_key,$infodb_map);
281 close($infodb_handle);
282 }
283
284 # Not currently checking for errors on write to DB
285 return 0;
286
287
288 }
289
290
291
292
293sub delete_infodb_entry
294{
295 my $infodb_handle = shift(@_);
296 my $infodb_key = shift(@_);
297
298 # Delete the key from the "data" table
299
300 my $safe_infodb_key = &sqlite_safe($infodb_key);
301
302 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
303
304 # If this infodb entry is for a document, delete the
305 # "document_metadata" table entry also (for use by the dynamic classifiers)
306 if ($infodb_key !~ /\./)
307 {
308 # Possible for there not to be a docOID matching this infodb_key
309 # (entries are only made when <doctype> == doc
310 # Attempt to delete it, and don't complain if one isn't found
311
312 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
313
314 }
315}
316
317
318sub sqlite_safe
319{
320 my $value = shift(@_);
321
322 # Escape any single quotes in the value
323 $value =~ s/\'/\'\'/g;
324
325 return $value;
326}
327
328
3291;
Note: See TracBrowser for help on using the repository browser.