root/gsdl/trunk/perllib/dbutil.pm @ 16339

Revision 16339, 9.4 KB (checked in by davidb, 12 years ago)

Added quotes around exec of 'txt2db' so it will work from within a folder with spaces in its name

Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008  DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use strict;
29
30
31sub close_infodb_write_handle
32{
33    my $infodb_type = shift(@_);
34    my $infodb_handle = shift(@_);
35
36    if ($infodb_type eq "sqlite")
37    {
38    return &close_infodb_write_handle_sqlite($infodb_handle);
39    }
40
41    # Use GDBM if the infodb type is empty or not one of the values above
42    return &close_infodb_write_handle_gdbm($infodb_handle);
43}
44
45
46sub get_default_infodb_type
47{
48    return "gdbm";
49}
50
51
52sub get_infodb_file_path
53{
54    my $infodb_type = shift(@_);
55    my $collection_name = shift(@_);
56    my $infodb_directory_path = shift(@_);
57
58    if ($infodb_type eq "sqlite")
59    {
60    return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
61    }
62
63    # Use GDBM if the infodb type is empty or not one of the values above
64    return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
65}
66
67
68sub open_infodb_write_handle
69{
70    my $infodb_type = shift(@_);
71    my $infodb_file_path = shift(@_);
72
73    if ($infodb_type eq "sqlite")
74    {
75    return &open_infodb_write_handle_sqlite($infodb_file_path);
76    }
77
78    # Use GDBM if the infodb type is empty or not one of the values above
79    return &open_infodb_write_handle_gdbm($infodb_file_path);
80}
81
82
83sub read_infodb_file
84{
85    my $infodb_type = shift(@_);
86    my $infodb_file_path = shift(@_);
87    my $infodb_map = shift(@_);
88
89    if ($infodb_type eq "sqlite")
90    {
91    return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
92    }
93
94    # Use GDBM if the infodb type is empty or not one of the values above
95    return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
96}
97
98
99sub write_infodb_entry
100{
101    my $infodb_type = shift(@_);
102    my $infodb_handle = shift(@_);
103    my $infodb_key = shift(@_);
104    my $infodb_map = shift(@_);
105
106    if ($infodb_type eq "sqlite")
107    {
108    return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
109    }
110
111    # Use GDBM if the infodb type is empty or not one of the values above
112    return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
113}
114
115
116
117# ----------------------------------------------------------------------------------------
118#   GDBM IMPLEMENTATION
119# ----------------------------------------------------------------------------------------
120
121sub close_infodb_write_handle_gdbm
122{
123    my $infodb_handle = shift(@_);
124
125    close($infodb_handle);
126}
127
128
129sub get_infodb_file_path_gdbm
130{
131    my $collection_name = shift(@_);
132    my $infodb_directory_path = shift(@_);
133
134    my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
135    my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
136    return &util::filename_cat($infodb_directory_path, $infodb_file_name);
137}
138
139
140sub open_infodb_write_handle_gdbm
141{
142    my $infodb_file_path = shift(@_);
143
144    my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
145    my $infodb_file_handle = undef;
146    if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
147    {
148    return undef;
149    }
150
151    return $infodb_file_handle;
152}
153
154
155sub read_infodb_file_gdbm
156{
157    my $infodb_file_path = shift(@_);
158    my $infodb_map = shift(@_);
159
160    open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
161    my $infodb_line = "";
162    my $infodb_key = "";
163    my $infodb_value = "";
164    while (defined ($infodb_line = <PIPEIN>))
165    {
166    if ($infodb_line =~ /^\[([^\]]+)\]$/)
167    {
168        $infodb_key = $1;
169    }
170    elsif ($infodb_line =~ /^-{70}$/)
171    {
172        $infodb_map->{$infodb_key} = $infodb_value;
173        $infodb_key = "";
174        $infodb_value = "";
175    }
176    else
177    {
178        $infodb_value .= $infodb_line;
179    }
180    }
181
182    close (PIPEIN);
183}
184
185   
186sub write_infodb_entry_gdbm
187{
188    my $infodb_handle = shift(@_);
189    my $infodb_key = shift(@_);
190    my $infodb_map = shift(@_);
191
192    print $infodb_handle "[$infodb_key]\n";
193    foreach my $infodb_value_key (keys(%$infodb_map))
194    {
195    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
196    {
197        if ($infodb_value =~ /-{70,}/)
198        {
199        # if value contains 70 or more hyphens in a row we need to escape them
200        # to prevent txt2db from treating them as a separator
201        $infodb_value =~ s/-/&\#045;/gi;
202        }
203        print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
204    }
205    }
206    print $infodb_handle '-' x 70, "\n";
207}
208
209
210
211# ----------------------------------------------------------------------------------------
212#   SQLITE IMPLEMENTATION
213# ----------------------------------------------------------------------------------------
214
215sub close_infodb_write_handle_sqlite
216{
217    my $infodb_handle = shift(@_);
218
219    # Close the transaction we began after opening the file
220    print $infodb_handle "END TRANSACTION;\n";
221
222    # This is crucial for efficient queries on the database!
223    print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
224
225    close($infodb_handle);
226}
227
228
229sub get_infodb_file_path_sqlite
230{
231    my $collection_name = shift(@_);
232    my $infodb_directory_path = shift(@_);
233
234    my $infodb_file_extension = ".db";
235    my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
236    return &util::filename_cat($infodb_directory_path, $infodb_file_name);
237}
238
239
240sub open_infodb_write_handle_sqlite
241{
242    my $infodb_file_path = shift(@_);
243
244    my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
245    my $infodb_handle = undef;
246    if (!-e "$sqlite3_exe" || !open($infodb_handle, "| $sqlite3_exe \"$infodb_file_path\""))
247    {
248    return undef;
249    }
250
251    print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
252    print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
253
254    # This is crucial for efficiency when importing large amounts of data
255    print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
256
257    # This is very important for efficiency, otherwise each command will be actioned one at a time
258    print $infodb_handle "BEGIN TRANSACTION;\n";
259
260    return $infodb_handle;
261}
262
263
264sub read_infodb_file_sqlite
265{
266    my $infodb_file_path = shift(@_);
267    my $infodb_map = shift(@_);
268
269    # !! TO IMPLEMENT
270}
271
272
273sub write_infodb_entry_sqlite
274{
275    my $infodb_handle = shift(@_);
276    my $infodb_key = shift(@_);
277    my $infodb_map = shift(@_);
278
279    # Add the key -> value mapping into the "data" table
280    my $infodb_entry_value = "";
281    foreach my $infodb_value_key (keys(%$infodb_map))
282    {
283    foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
284    {
285        $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
286    }
287    }
288
289    my $safe_infodb_key = &sqlite_safe($infodb_key);
290    print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
291
292    # If this infodb entry is for a document, add all the interesting document metadata to the
293    #   "document_metadata" table (for use by the dynamic classifiers)
294    if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
295    {
296    print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
297
298    foreach my $infodb_value_key (keys(%$infodb_map))
299    {
300        # We're not interested in most of the automatically added document metadata
301        next if ($infodb_value_key eq "archivedir" ||
302             $infodb_value_key eq "assocfilepath" ||
303             $infodb_value_key eq "childtype" ||
304             $infodb_value_key eq "contains" ||
305             $infodb_value_key eq "docnum" ||
306             $infodb_value_key eq "doctype" ||
307             $infodb_value_key eq "Encoding" ||
308             $infodb_value_key eq "FileSize" ||
309             $infodb_value_key eq "hascover" ||
310             $infodb_value_key eq "hastxt" ||
311             $infodb_value_key eq "lastmodified" ||
312             $infodb_value_key eq "metadataset" ||
313             $infodb_value_key eq "thistype" ||
314             $infodb_value_key =~ /^metadatafreq\-/ ||
315             $infodb_value_key =~ /^metadatalist\-/);
316
317        foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
318        {
319        print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
320        }
321    }
322    }
323}
324
325
326sub sqlite_safe
327{
328    my $value = shift(@_);
329
330    # Escape any single quotes in the value
331    $value =~ s/\'/\'\'/g;
332
333    return $value;
334}
335
336
3371;
Note: See TracBrowser for help on using the browser.