source: trunk/gsdl/perllib/plugins/DBPlug.pm@ 6812

Last change on this file since 6812 was 6408, checked in by jmt12, 20 years ago

Added two new attributes for script arguments. HiddenGLI controls whether the argument will be visible at all in GLI, while ModeGLI defines the lowest detail mode under which the argument will be visible (only really for import and buildcol). Also ensured that the scripts were reporting their correct default process expressions, and further refined argument types by adding the catagory regexp for any regular expression (which can then be hidden under lower detail modes in GLI)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1###########################################################################
2#
3# DBPlug.pm -- plugin to import records from a database
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2003 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27#
28# See <GSDLHOME>/etc/packages/example.dbi for an example config file!!
29#
30
31# Written by John McPherson for the NZDL project
32# Mar, Apr 2003
33
34package DBPlug;
35
36use BasPlug;
37use unicode;
38use parsargv;
39
40use DBI; # database independent stuff
41
42sub BEGIN {
43 @ISA = ('BasPlug');
44}
45
46my $arguments =
47 [ { 'name' => "process_exp",
48 'desc' => "{BasPlug.process_exp}",
49 'type' => "regexp",
50 'deft' => &get_default_process_exp(),
51 'reqd' => "no" }];
52
53my $options = { 'name' => "DBPlug",
54 'desc' => "{DBPlug.desc}",
55 'abstract' => "no",
56 'inherits' => "yes",
57 'args' => $arguments };
58
59sub new {
60 my ($class) = @_;
61 my $self = new BasPlug ($class, @_);
62 $self->{'plugin_type'} = "DBPlug";
63 my $option_list = $self->{'option_list'};
64 push( @{$option_list}, $options );
65
66 # no plugin-specific options
67# if (!parsargv::parse(\@_, "allow_extra_options")) {
68# $self->print_txt_usage(""); # Use default resource bundle
69# die "\n";
70# }
71
72
73 return bless $self, $class;
74}
75
76sub get_default_process_exp {
77 my $self = shift (@_);
78
79 return q^(?i)\.dbi$^;
80}
81# we don't have a per-greenstone document process() function!
82sub process {
83
84}
85
86
87sub read {
88 my $self = shift (@_);
89 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
90
91 # see if we can handle the passed file...
92 my $filename = $file;
93 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
94 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
95 # this plugin can't process this file type...
96 return undef;
97 }
98
99 my $outhandle = $self->{'outhandle'};
100 my $verbosity = $self->{'verbosity'};
101
102 print $outhandle "DBPlug: processing $file\n"
103 if $self->{'verbosity'} > 1;
104
105 # calculate the document hash, for document ids
106 my $hash="";
107
108 my $osexe = &util::get_os_exe();
109 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
110 $ENV{'GSDLOS'},"hashfile$osexe");
111 if (-e "$hashfile_exe") {
112 $hash = `hashfile$osexe \"$filename\"`;
113 $hash =~ /:\s*([0-9a-f]+)/i;
114 $hash="HASH$1";
115 }
116
117
118 # default options - may be overridden by config file
119 my $language=undef;
120 my $encoding=undef;
121 my $dbplug_debug=0;
122 my $username='';
123 my $password='';
124
125 # these settings must be set by the config file:
126 my $db=undef;
127
128# get id of pages from "nonempty", get latest version number from "recent", and
129# then get pagename from "page" and content from "version" !
130
131 my $sql_query = undef ;
132
133 my %db_to_greenstone_fields=();
134
135 # read in config file.
136 eval `cat $filename`;
137
138 if (!defined($db)) {
139 print $outhandle "DBPlug: error: $filename does not specify a db!\n";
140 return 0;
141 }
142 if (!defined($sql_query)) {
143 print $outhandle "DBPlug: error: no SQL query specified!\n";
144 return 0;
145 }
146 # connect to database
147 my $dbhandle=DBI->connect($db, $username, $password);
148
149 if (!defined($dbhandle)) {
150 die "DBPlug: could not connect to database, exiting.\n";
151 }
152 if (defined($dbplug_debug) && $dbplug_debug==1) {
153 print $outhandle "DBPlug (debug): connected ok\n";
154 }
155
156 my $statement_hand=$dbhandle->prepare($sql_query);
157 $statement_hand->execute;
158
159 # get the array-ref for the field names and cast it to array
160 my @field_names;
161 @field_names=@{ $statement_hand->{NAME} };
162
163 foreach my $fieldname (@field_names) {
164 if (defined($db_to_greenstone_fields{$fieldname})) {
165 if (defined($dbplug_debug) && $dbplug_debug==1) {
166 print $outhandle "DBPlug (debug): mapping db field "
167 . "'$fieldname' to "
168 . $db_to_greenstone_fields{$fieldname} . "\n";
169 }
170 $fieldname=$db_to_greenstone_fields{$fieldname};
171 }
172 }
173
174
175# print "DBPlug: names: " . join (", ", @field_names) . ".\n";
176 # get rows
177
178 my $count = 0;
179 my @row_array;
180
181 @row_array=$statement_hand->fetchrow_array; # fetchrow_hashref?
182
183 while (scalar(@row_array)) {
184 if (defined($dbplug_debug) && $dbplug_debug==1) {
185 print $outhandle "DBPlug (debug): retrieved a row from query\n";
186 }
187
188 # create a new document
189 my $doc_obj = new doc ($filename, "indexed_doc");
190 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
191 my $cursection = $doc_obj->get_top_section();
192
193 if (defined($language)) {
194 # if not set in config file, will use BasPlug's default
195 $doc_obj->add_utf8_metadata($cursection, "Language", $language);
196 }
197 if (defined($encoding)) {
198 # if not set in config file, will use BasPlug's default
199 $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding);
200 }
201 $doc_obj->add_utf8_metadata($cursection,
202 "Source", &ghtml::dmsafe($db));
203 if ($self->{'cover_image'}) {
204 $self->associate_cover_image($doc_obj, $filename);
205 }
206 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
207
208
209 # include any metadata passed in from previous plugins
210 # note that this metadata is associated with the top level section
211 $self->extra_metadata ($doc_obj, $cursection,
212 $metadata);
213
214 # do any automatic metadata extraction
215 $self->auto_extract_metadata ($doc_obj);
216
217 my $unique_id=undef;
218
219 foreach my $fieldname (@field_names) {
220 my $fielddata=shift @row_array;
221 # use the specified encoding, defaulting to utf-8
222 if (defined($encoding) && $encoding ne "ascii"
223 && $encoding ne "utf8") {
224 $fielddata=&unicode::unicode2utf8(
225 &unicode::convert2unicode($encoding, \$fielddata)
226 );
227 }
228 if ($fieldname eq "text") {
229 # see if we have a text_callback() function defined
230 if (defined(&text_callback)) {
231 $fielddata=text_callback($fielddata);
232 }
233 # add as document text
234 $fielddata=~s@<@&lt;@g;
235 $fielddata=~s@>@&gt;@g; # for xml protection...
236 $fielddata=~s@_@\\_@g; # for macro language protection...
237 $doc_obj->add_utf8_text($cursection, $fielddata);
238 } elsif ($fieldname eq "Identifier") {
239 # use as greenstone's unique record id
240 if ($fielddata =~ /^\d+$/) {
241 # don't allow IDs that are completely numeric
242 $unique_id="id" . $fielddata;
243 } else {
244 $unique_id=$fielddata;
245 }
246 } else {
247 # add as document metadata
248 $fielddata=~s/\[/&#91;/g;
249 $fielddata=~s/\]/&#93;/g;
250 $doc_obj->add_utf8_metadata($cursection,
251 $fieldname, $fielddata);
252
253 }
254 }
255
256 if (!defined $unique_id) {
257 $doc_obj->set_OID($hash . "s$count");
258 } else {
259 # use our id from the database...
260 $doc_obj->set_OID($unique_id);
261 }
262
263
264 # process the document
265 $processor->process($doc_obj);
266
267
268 $count++;
269
270 # get next row
271 @row_array=$statement_hand->fetchrow_array; # fetchrow_hashref?
272 } # end of row_array is not empty
273
274 # check "$sth->err" if empty array for error
275 if ($statement_hand->err) {
276 print "received error: \"" . $statement_hand->errstr . "\"\n";
277 }
278
279 # clean up connection to database
280 $statement_hand->finish();
281 $dbhandle->disconnect();
282
283 # num of input files, rather than documents created?
284 $self->{'num_processed'}++;
285
286 return $count;
287}
288
2891;
Note: See TracBrowser for help on using the repository browser.