source: gsdl/trunk/perllib/dbutil.pm@ 15887

Last change on this file since 15887 was 15748, checked in by mdewsnip, 16 years ago

(Adding dynamic classifiers) Added new code into write_infodb_entry_sqlite() to add document metadata to a new "document_metadata" table, for use by the dynamic classifiers.

File size: 8.4 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
31sub get_default_infodb_type
32{
33 return "gdbm";
34}
35
36
37sub get_infodb_file_path
38{
39 my $infodb_type = shift(@_);
40 my $collection_name = shift(@_);
41 my $infodb_directory_path = shift(@_);
42
43 if ($infodb_type eq "sqlite")
44 {
45 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
46 }
47
48 # Use GDBM if the infodb type is empty or not one of the values above
49 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
50}
51
52
53sub open_infodb_write_handle
54{
55 my $infodb_type = shift(@_);
56 my $infodb_file_path = shift(@_);
57
58 if ($infodb_type eq "sqlite")
59 {
60 return &open_infodb_write_handle_sqlite($infodb_file_path);
61 }
62
63 # Use GDBM if the infodb type is empty or not one of the values above
64 return &open_infodb_write_handle_gdbm($infodb_file_path);
65}
66
67
68sub read_infodb_file
69{
70 my $infodb_type = shift(@_);
71 my $infodb_file_path = shift(@_);
72 my $infodb_map = shift(@_);
73
74 if ($infodb_type eq "sqlite")
75 {
76 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
77 }
78
79 # Use GDBM if the infodb type is empty or not one of the values above
80 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
81}
82
83
84sub write_infodb_entry
85{
86 my $infodb_type = shift(@_);
87 my $infodb_handle = shift(@_);
88 my $infodb_key = shift(@_);
89 my $infodb_map = shift(@_);
90
91 if ($infodb_type eq "sqlite")
92 {
93 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
94 }
95
96 # Use GDBM if the infodb type is empty or not one of the values above
97 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
98}
99
100
101
102# ----------------------------------------------------------------------------------------
103# GDBM IMPLEMENTATION
104# ----------------------------------------------------------------------------------------
105
106sub get_infodb_file_path_gdbm
107{
108 my $collection_name = shift(@_);
109 my $infodb_directory_path = shift(@_);
110
111 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
112 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
113 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
114}
115
116
117sub open_infodb_write_handle_gdbm
118{
119 my $infodb_file_path = shift(@_);
120
121 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
122 my $infodb_file_handle = undef;
123 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| $txt2db_exe \"$infodb_file_path\""))
124 {
125 return undef;
126 }
127
128 return $infodb_file_handle;
129}
130
131
132sub read_infodb_file_gdbm
133{
134 my $infodb_file_path = shift(@_);
135 my $infodb_map = shift(@_);
136
137 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
138 my $infodb_line = "";
139 my $infodb_key = "";
140 my $infodb_value = "";
141 while (defined ($infodb_line = <PIPEIN>))
142 {
143 if ($infodb_line =~ /^\[([^\]]+)\]$/)
144 {
145 $infodb_key = $1;
146 }
147 elsif ($infodb_line =~ /^-{70}$/)
148 {
149 $infodb_map->{$infodb_key} = $infodb_value;
150 $infodb_key = "";
151 $infodb_value = "";
152 }
153 else
154 {
155 $infodb_value .= $infodb_line;
156 }
157 }
158
159 close (PIPEIN);
160}
161
162
163sub write_infodb_entry_gdbm
164{
165 my $infodb_handle = shift(@_);
166 my $infodb_key = shift(@_);
167 my $infodb_map = shift(@_);
168
169 print $infodb_handle "[$infodb_key]\n";
170 foreach my $infodb_value_key (keys(%$infodb_map))
171 {
172 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
173 {
174 if ($infodb_value =~ /-{70,}/)
175 {
176 # if value contains 70 or more hyphens in a row we need to escape them
177 # to prevent txt2db from treating them as a separator
178 $infodb_value =~ s/-/&\#045;/gi;
179 }
180 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
181 }
182 }
183 print $infodb_handle '-' x 70, "\n";
184}
185
186
187
188# ----------------------------------------------------------------------------------------
189# SQLITE IMPLEMENTATION
190# ----------------------------------------------------------------------------------------
191
192sub get_infodb_file_path_sqlite
193{
194 my $collection_name = shift(@_);
195 my $infodb_directory_path = shift(@_);
196
197 my $infodb_file_extension = ".db";
198 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
199 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
200}
201
202
203sub open_infodb_write_handle_sqlite
204{
205 my $infodb_file_path = shift(@_);
206
207 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
208 my $infodb_file_handle = undef;
209 if (!-e "$sqlite3_exe" || !open($infodb_file_handle, "| $sqlite3_exe \"$infodb_file_path\""))
210 {
211 return undef;
212 }
213
214 print $infodb_file_handle "CREATE TABLE data (key TEXT, value TEXT, PRIMARY KEY(key));\n";
215 print $infodb_file_handle "CREATE TABLE document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
216
217 return $infodb_file_handle;
218}
219
220
221sub read_infodb_file_sqlite
222{
223 my $infodb_file_path = shift(@_);
224 my $infodb_map = shift(@_);
225
226 # !! TO IMPLEMENT
227}
228
229
230sub write_infodb_entry_sqlite
231{
232 my $infodb_handle = shift(@_);
233 my $infodb_key = shift(@_);
234 my $infodb_map = shift(@_);
235
236 # Add the key -> value mapping into the "data" table
237 my $infodb_entry_value = "";
238 foreach my $infodb_value_key (keys(%$infodb_map))
239 {
240 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
241 {
242 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
243 }
244 }
245
246 my $safe_infodb_key = &sqlite_safe($infodb_key);
247 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
248 print $infodb_handle "INSERT INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
249
250 # If this infodb entry is for a document, add all the interesting document metadata to the
251 # "document_metadata" table (for use by the dynamic classifiers)
252 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
253 {
254 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
255
256 foreach my $infodb_value_key (keys(%$infodb_map))
257 {
258 # We're not interested in most of the automatically added document metadata
259 next if ($infodb_value_key eq "archivedir" ||
260 $infodb_value_key eq "assocfilepath" ||
261 $infodb_value_key eq "childtype" ||
262 $infodb_value_key eq "contains" ||
263 $infodb_value_key eq "docnum" ||
264 $infodb_value_key eq "doctype" ||
265 $infodb_value_key eq "Encoding" ||
266 $infodb_value_key eq "FileSize" ||
267 $infodb_value_key eq "hascover" ||
268 $infodb_value_key eq "hastxt" ||
269 $infodb_value_key eq "lastmodified" ||
270 $infodb_value_key eq "metadataset" ||
271 $infodb_value_key eq "thistype" ||
272 $infodb_value_key =~ /^metadatafreq\-/ ||
273 $infodb_value_key =~ /^metadatalist\-/);
274
275 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
276 {
277 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
278 }
279 }
280 }
281}
282
283
284sub sqlite_safe
285{
286 my $value = shift(@_);
287
288 # Escape any single quotes in the value
289 $value =~ s/\'/\'\'/g;
290
291 return $value;
292}
293
294
2951;
Note: See TracBrowser for help on using the repository browser.