source: main/trunk/greenstone2/perllib/dbutil/sqlite.pm@ 23166

Last change on this file since 23166 was 23166, checked in by kjdon, 14 years ago

copying code from gdbm.pm: Attached :utf8 encoding to db pipes to prevent double encoding problem with incremental buildcol.pl

File size: 7.8 KB
RevLine 
[21411]1###########################################################################
2#
3# dbutil::sqlite -- utility functions for writing to sqlite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
[22735]9# Copyright (C) 2009-2010 DL Consulting Ltd.
[21411]10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package dbutil::sqlite;
28
29use strict;
30
31
32# -----------------------------------------------------------------------------
33# SQLITE IMPLEMENTATION
34# -----------------------------------------------------------------------------
35
36sub open_infodb_write_handle
37{
38 my $infodb_file_path = shift(@_);
39
40 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
41 my $infodb_handle = undef;
[22076]42
43 if (!-e "$sqlite3_exe")
[21411]44 {
[22076]45 print STDERR "Error: Unable to find $sqlite3_exe\n";
46 return undef;
47 }
48
49 if(!open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
50 {
51 print STDERR "Error: Failed to open pipe to \"$sqlite3_exe\" \"$infodb_file_path\"\n";
52 print STDERR " $!\n";
53 return undef;
[21411]54 }
55
[23166]56 binmode($infodb_handle,":utf8");
57
[21411]58 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
59 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
60
61 # This is crucial for efficiency when importing large amounts of data
62 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
63
64 # This is very important for efficiency, otherwise each command will be actioned one at a time
65 print $infodb_handle "BEGIN TRANSACTION;\n";
66
67 return $infodb_handle;
68}
69
70
71sub close_infodb_write_handle
72{
73 my $infodb_handle = shift(@_);
74
75 # Close the transaction we began after opening the file
76 print $infodb_handle "END TRANSACTION;\n";
77
78 # This is crucial for efficient queries on the database!
79 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
80
81 close($infodb_handle);
82}
83
84
[21856]85sub read_infodb_cmd
86{
87 my $infodb_file_path = shift(@_);
88 my $sqlcmd = shift(@_);
89
90 my $result = "";
91
92 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
93 my $infodb_handle = undef;
[21872]94 my $cmd = "\"$sqlite3_exe\" \"$infodb_file_path\" \"$sqlcmd\"";
[21856]95
96 if (!-e "$sqlite3_exe" || !open($infodb_handle, "$cmd |"))
97 {
98 print STDERR "Unable to execute: $cmd\n";
99 print STDERR "$!\n";
100 }
101 else {
102
[23166]103 binmode($infodb_handle, ":utf8");
[21856]104 my $line;
105 while (defined($line=<$infodb_handle>)) {
106 $result .= $line;
107 }
108
109 close($infodb_handle);
110 }
111
112 return $result;
113}
114
[21411]115sub get_infodb_file_path
116{
117 my $collection_name = shift(@_);
118 my $infodb_directory_path = shift(@_);
119
120 my $infodb_file_extension = ".db";
121 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
122 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
123}
124
125
126sub read_infodb_file
127{
128 my $infodb_file_path = shift(@_);
129 my $infodb_map = shift(@_);
130
[21856]131
132 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
133
134 my @keys = split(/\n/,$keys_str);
135
136 foreach my $k (@keys) {
137
138 my $k_safe = &sqlite_safe($k);
139 my $select_val_cmd = "SELECT value FROM data WHERE key='$k_safe';";
140
141 my $val_str = read_infodb_cmd($infodb_file_path,$select_val_cmd);
142
143 $infodb_map->{$k} = $val_str;
144 }
145
[21411]146}
147
148
[22485]149sub read_infodb_keys
150{
151 my $infodb_file_path = shift(@_);
152 my $infodb_map = shift(@_);
153
154
155 my $keys_str = read_infodb_cmd($infodb_file_path,"SELECT key FROM data;");
156
157 my @keys = split(/\n/,$keys_str);
158
159 foreach my $key (@keys)
160 {
161 $infodb_map->{$key} = 1;
162 }
163}
164
165
[21411]166sub write_infodb_entry
167{
168 my $infodb_handle = shift(@_);
169 my $infodb_key = shift(@_);
170 my $infodb_map = shift(@_);
171
172 # Add the key -> value mapping into the "data" table
173 my $infodb_entry_value = "";
174 foreach my $infodb_value_key (keys(%$infodb_map))
175 {
176 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
177 {
178 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
179 }
180 }
181
182 my $safe_infodb_key = &sqlite_safe($infodb_key);
183 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
184
185 # If this infodb entry is for a document, add all the interesting document metadata to the
186 # "document_metadata" table (for use by the dynamic classifiers)
187 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
188 {
189 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
190
191 foreach my $infodb_value_key (keys(%$infodb_map))
192 {
193 # We're not interested in most of the automatically added document metadata
194 next if ($infodb_value_key eq "archivedir" ||
195 $infodb_value_key eq "assocfilepath" ||
196 $infodb_value_key eq "childtype" ||
197 $infodb_value_key eq "contains" ||
198 $infodb_value_key eq "docnum" ||
199 $infodb_value_key eq "doctype" ||
200 $infodb_value_key eq "Encoding" ||
201 $infodb_value_key eq "FileSize" ||
202 $infodb_value_key eq "hascover" ||
203 $infodb_value_key eq "hastxt" ||
204 $infodb_value_key eq "lastmodified" ||
205 $infodb_value_key eq "metadataset" ||
206 $infodb_value_key eq "thistype" ||
207 $infodb_value_key =~ /^metadatafreq\-/ ||
208 $infodb_value_key =~ /^metadatalist\-/);
209
210 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
211 {
212 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
213 }
214 }
215 }
216}
217
218
219
[21856]220sub write_infodb_rawentry
221{
222 my $infodb_handle = shift(@_);
223 my $infodb_key = shift(@_);
224 my $infodb_val = shift(@_);
225
226 my $safe_infodb_key = &sqlite_safe($infodb_key);
227 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_val) . "');\n";
228}
229
230
231
[21411]232sub delete_infodb_entry
233{
234 my $infodb_handle = shift(@_);
235 my $infodb_key = shift(@_);
236
237 # Delete the key from the "data" table
238
239 my $safe_infodb_key = &sqlite_safe($infodb_key);
240
241 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
242
243 # If this infodb entry is for a document, delete the
244 # "document_metadata" table entry also (for use by the dynamic classifiers)
245 if ($infodb_key !~ /\./)
246 {
247 # Possible for there not to be a docOID matching this infodb_key
248 # (entries are only made when <doctype> == doc
249 # Attempt to delete it, and don't complain if one isn't found
250
251 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
252
253 }
254}
255
256
257sub sqlite_safe
258{
259 my $value = shift(@_);
260
261 # Escape any single quotes in the value
262 $value =~ s/\'/\'\'/g;
263
264 return $value;
265}
266
267
2681;
Note: See TracBrowser for help on using the repository browser.