source: gsdl/trunk/perllib/dbutil.pm@ 16223

Last change on this file since 16223 was 16223, checked in by mdewsnip, 16 years ago

Added a couple of comments and now creates an index on the document_metadata table's "element" field, to greatly improve the speed of queries.

File size: 9.3 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
31sub close_infodb_write_handle
32{
33 my $infodb_type = shift(@_);
34 my $infodb_handle = shift(@_);
35
36 if ($infodb_type eq "sqlite")
37 {
38 return &close_infodb_write_handle_sqlite($infodb_handle);
39 }
40
41 # Use GDBM if the infodb type is empty or not one of the values above
42 return &close_infodb_write_handle_gdbm($infodb_handle);
43}
44
45
46sub get_default_infodb_type
47{
48 return "gdbm";
49}
50
51
52sub get_infodb_file_path
53{
54 my $infodb_type = shift(@_);
55 my $collection_name = shift(@_);
56 my $infodb_directory_path = shift(@_);
57
58 if ($infodb_type eq "sqlite")
59 {
60 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
61 }
62
63 # Use GDBM if the infodb type is empty or not one of the values above
64 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
65}
66
67
68sub open_infodb_write_handle
69{
70 my $infodb_type = shift(@_);
71 my $infodb_file_path = shift(@_);
72
73 if ($infodb_type eq "sqlite")
74 {
75 return &open_infodb_write_handle_sqlite($infodb_file_path);
76 }
77
78 # Use GDBM if the infodb type is empty or not one of the values above
79 return &open_infodb_write_handle_gdbm($infodb_file_path);
80}
81
82
83sub read_infodb_file
84{
85 my $infodb_type = shift(@_);
86 my $infodb_file_path = shift(@_);
87 my $infodb_map = shift(@_);
88
89 if ($infodb_type eq "sqlite")
90 {
91 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
92 }
93
94 # Use GDBM if the infodb type is empty or not one of the values above
95 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
96}
97
98
99sub write_infodb_entry
100{
101 my $infodb_type = shift(@_);
102 my $infodb_handle = shift(@_);
103 my $infodb_key = shift(@_);
104 my $infodb_map = shift(@_);
105
106 if ($infodb_type eq "sqlite")
107 {
108 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
109 }
110
111 # Use GDBM if the infodb type is empty or not one of the values above
112 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
113}
114
115
116
117# ----------------------------------------------------------------------------------------
118# GDBM IMPLEMENTATION
119# ----------------------------------------------------------------------------------------
120
121sub close_infodb_write_handle_gdbm
122{
123 my $infodb_handle = shift(@_);
124
125 close($infodb_handle);
126}
127
128
129sub get_infodb_file_path_gdbm
130{
131 my $collection_name = shift(@_);
132 my $infodb_directory_path = shift(@_);
133
134 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
135 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
136 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
137}
138
139
140sub open_infodb_write_handle_gdbm
141{
142 my $infodb_file_path = shift(@_);
143
144 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
145 my $infodb_file_handle = undef;
146 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| $txt2db_exe \"$infodb_file_path\""))
147 {
148 return undef;
149 }
150
151 return $infodb_file_handle;
152}
153
154
155sub read_infodb_file_gdbm
156{
157 my $infodb_file_path = shift(@_);
158 my $infodb_map = shift(@_);
159
160 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
161 my $infodb_line = "";
162 my $infodb_key = "";
163 my $infodb_value = "";
164 while (defined ($infodb_line = <PIPEIN>))
165 {
166 if ($infodb_line =~ /^\[([^\]]+)\]$/)
167 {
168 $infodb_key = $1;
169 }
170 elsif ($infodb_line =~ /^-{70}$/)
171 {
172 $infodb_map->{$infodb_key} = $infodb_value;
173 $infodb_key = "";
174 $infodb_value = "";
175 }
176 else
177 {
178 $infodb_value .= $infodb_line;
179 }
180 }
181
182 close (PIPEIN);
183}
184
185
186sub write_infodb_entry_gdbm
187{
188 my $infodb_handle = shift(@_);
189 my $infodb_key = shift(@_);
190 my $infodb_map = shift(@_);
191
192 print $infodb_handle "[$infodb_key]\n";
193 foreach my $infodb_value_key (keys(%$infodb_map))
194 {
195 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
196 {
197 if ($infodb_value =~ /-{70,}/)
198 {
199 # if value contains 70 or more hyphens in a row we need to escape them
200 # to prevent txt2db from treating them as a separator
201 $infodb_value =~ s/-/&\#045;/gi;
202 }
203 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
204 }
205 }
206 print $infodb_handle '-' x 70, "\n";
207}
208
209
210
211# ----------------------------------------------------------------------------------------
212# SQLITE IMPLEMENTATION
213# ----------------------------------------------------------------------------------------
214
215sub close_infodb_write_handle_sqlite
216{
217 my $infodb_handle = shift(@_);
218
219 # Close the transaction we began after opening the file
220 print $infodb_handle "END TRANSACTION;\n";
221
222 # This is crucial for efficient queries on the database!
223 print $infodb_handle "CREATE INDEX dme ON document_metadata(element);\n";
224
225 close($infodb_handle);
226}
227
228
229sub get_infodb_file_path_sqlite
230{
231 my $collection_name = shift(@_);
232 my $infodb_directory_path = shift(@_);
233
234 my $infodb_file_extension = ".db";
235 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
236 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
237}
238
239
240sub open_infodb_write_handle_sqlite
241{
242 my $infodb_file_path = shift(@_);
243
244 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
245 my $infodb_handle = undef;
246 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| $sqlite3_exe \"$infodb_file_path\""))
247 {
248 return undef;
249 }
250
251 print $infodb_handle "CREATE TABLE data (key TEXT PRIMARY KEY, value TEXT);\n";
252 print $infodb_handle "CREATE TABLE document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
253
254 # This is very important for efficiency, otherwise each command will be actioned one at a time
255 print $infodb_handle "BEGIN TRANSACTION;\n";
256
257 return $infodb_handle;
258}
259
260
261sub read_infodb_file_sqlite
262{
263 my $infodb_file_path = shift(@_);
264 my $infodb_map = shift(@_);
265
266 # !! TO IMPLEMENT
267}
268
269
270sub write_infodb_entry_sqlite
271{
272 my $infodb_handle = shift(@_);
273 my $infodb_key = shift(@_);
274 my $infodb_map = shift(@_);
275
276 # Add the key -> value mapping into the "data" table
277 my $infodb_entry_value = "";
278 foreach my $infodb_value_key (keys(%$infodb_map))
279 {
280 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
281 {
282 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
283 }
284 }
285
286 my $safe_infodb_key = &sqlite_safe($infodb_key);
287 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
288 print $infodb_handle "INSERT INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
289
290 # If this infodb entry is for a document, add all the interesting document metadata to the
291 # "document_metadata" table (for use by the dynamic classifiers)
292 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
293 {
294 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
295
296 foreach my $infodb_value_key (keys(%$infodb_map))
297 {
298 # We're not interested in most of the automatically added document metadata
299 next if ($infodb_value_key eq "archivedir" ||
300 $infodb_value_key eq "assocfilepath" ||
301 $infodb_value_key eq "childtype" ||
302 $infodb_value_key eq "contains" ||
303 $infodb_value_key eq "docnum" ||
304 $infodb_value_key eq "doctype" ||
305 $infodb_value_key eq "Encoding" ||
306 $infodb_value_key eq "FileSize" ||
307 $infodb_value_key eq "hascover" ||
308 $infodb_value_key eq "hastxt" ||
309 $infodb_value_key eq "lastmodified" ||
310 $infodb_value_key eq "metadataset" ||
311 $infodb_value_key eq "thistype" ||
312 $infodb_value_key =~ /^metadatafreq\-/ ||
313 $infodb_value_key =~ /^metadatalist\-/);
314
315 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
316 {
317 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
318 }
319 }
320 }
321}
322
323
324sub sqlite_safe
325{
326 my $value = shift(@_);
327
328 # Escape any single quotes in the value
329 $value =~ s/\'/\'\'/g;
330
331 return $value;
332}
333
334
3351;
Note: See TracBrowser for help on using the repository browser.