source: main/trunk/greenstone2/perllib/plugin.pm@ 21564

Last change on this file since 21564 was 21564, checked in by mdewsnip, 14 years ago

Changed lots of occurrences of "GDBM" in comments, variable names and function names, where the code isn't GDBM-specific. Part of making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.7 KB
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28use inexport;
29
30use strict; # to pick up typos and undeclared variables...
31no strict 'refs'; # ...but allow filehandles to be variables and vice versa
32no strict 'subs';
33
34require util;
35use gsprintf 'gsprintf';
36
37# mapping from old plugin names to new ones for backwards compatibility
38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
39my $plugin_name_map = {
40 'GAPlug' => 'GreenstoneXMLPlugin',
41 'ArcPlug' => 'ArchivesInfPlugin',
42 'RecPlug' => 'DirectoryPlugin',
43 'TEXTPlug' => 'TextPlugin',
44 'XMLPlug' => 'ReadXMLFile',
45 'EMAILPlug' => 'EmailPlugin',
46 'SRCPlug' => 'SourceCodePlugin',
47 'NULPlug' => 'NulPlugin',
48 'W3ImgPlug' => 'HTMLImagePlugin',
49 'PagedImgPlug' => 'PagedImagePlugin',
50 'METSPlug' => 'GreenstoneMETSPlugin',
51 'PPTPlug' => 'PowerPointPlugin',
52 'PSPlug' => 'PostScriptPlugin',
53 'DBPlug' => 'DatabasePlugin'
54 };
55
56# global variables
57my $stats = {'num_processed' => 0,
58 'num_blocked' => 0,
59 'num_not_processed' => 0,
60 'num_not_recognised' => 0,
61 'num_archives' => 0
62 };
63
64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
66
67sub get_valid_pluginname {
68 my ($pluginname) = @_;
69 my $valid_name = $pluginname;
70 if (defined $plugin_name_map->{$pluginname}) {
71 $valid_name = $plugin_name_map->{$pluginname};
72 } elsif ($pluginname =~ /Plug$/) {
73 $valid_name =~ s/Plug/Plugin/;
74
75 }
76 return $valid_name;
77}
78
79sub load_plugin_require
80{
81 my ($pluginname) = @_;
82
83 my @check_list = ();
84
85 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86 my $pp_plugname
87 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
88 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
90 # find the plugin
91 if (defined($ENV{'GSDLCOLLECTION'}))
92 {
93 my $customplugname
94 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95 $pp_plugname);
96 push(@check_list,$customplugname);
97 }
98
99 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
100 push(@check_list,$colplugname);
101
102 if (defined $ENV{'GSDLEXTS'}) {
103
104 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
105
106 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107 foreach my $e (@extensions) {
108 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
109 push(@check_list,$extplugname);
110
111 }
112 }
113 if (defined $ENV{'GSDL3EXTS'}) {
114
115 my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");
116
117 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118 foreach my $e (@extensions) {
119 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
120 push(@check_list,$extplugname);
121
122 }
123 }
124
125
126 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
127 push(@check_list,$mainplugname);
128
129 my $success=0;
130 foreach my $plugname (@check_list) {
131 if (-e $plugname) {
132 require $plugname;
133 $success=1;
134 last;
135 }
136 }
137
138 if (!$success) {
139 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
140 $pluginname);
141 die "\n";
142 }
143}
144
145sub load_plugin_for_info {
146 my ($pluginname) = shift @_;
147 $pluginname = &get_valid_pluginname($pluginname);
148 load_plugin_require($pluginname);
149
150 # create a plugin object
151 my ($plugobj);
152 my $options = "-gsdlinfo";
153
154 eval ("\$plugobj = new \$pluginname([],[$options])");
155 die "$@" if $@;
156
157 return $plugobj;
158}
159
160sub load_plugins {
161 my ($plugin_list) = shift @_;
162 my $incremental_mode;
163 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
164 my @plugin_objects = ();
165 $verbosity = 2 unless defined $verbosity;
166 $outhandle = 'STDERR' unless defined $outhandle;
167 $failhandle = 'STDERR' unless defined $failhandle;
168
169 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
170 unshift (@INC, $colplugindir);
171
172 map { $_ = "\"$_\""; } @$globaloptions;
173 my $globals = join (",", @$globaloptions);
174
175 foreach my $pluginoptions (@$plugin_list) {
176 my $pluginname = shift @$pluginoptions;
177 next unless defined $pluginname;
178 $pluginname = &get_valid_pluginname($pluginname);
179 load_plugin_require($pluginname);
180
181 # create a plugin object
182 my ($plugobj);
183 map { $_ = "\"$_\""; } @$pluginoptions;
184 my $options = join (",", @$pluginoptions);
185 if ($globals) {
186 if (@$pluginoptions) {
187 $options .= ",";
188 }
189 $options .= "$globals";
190 }
191 # need to escape backslash before putting in to the eval
192 $options =~ s/\\/\\\\/g;
193 $options =~ s/\$/\\\$/g;
194 eval ("\$plugobj = new \$pluginname([],[$options])");
195 die "$@" if $@;
196
197 # initialize plugin
198 $plugobj->init($verbosity, $outhandle, $failhandle);
199
200 $plugobj->set_incremental($incremental_mode);
201
202 # add this object to the list
203 push (@plugin_objects, $plugobj);
204 }
205
206 return \@plugin_objects;
207}
208
209
210sub begin {
211 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
212
213 map { $_->{'gli'} = $gli; } @$pluginfo;
214 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
215}
216
217 sub remove_all {
218 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
219
220 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
221}
222
223sub remove_some {
224 my ($pluginfo, $archivedir, $deleted_files) = @_;
225 print STDERR "in remove some\n";
226 return if (scalar(@$deleted_files)==0);
227 my $arcinfo_src_filename = &inexport::src_db_file($archivedir);
228
229 foreach my $file (@$deleted_files) {
230 # use 'archiveinf-src' info database to look up all the OIDs
231 # that this file is used in (note in most cases, it's just one OID)
232
233 my $src_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_src_filename, $file);
234 my $src_rec = &dbutil::convert_infodb_string_to_hash($src_rec_string);
235 my $oids = $src_rec->{'oid'};
236 my $rv;
237 foreach my $plugobj (@$pluginfo) {
238
239 $rv = $plugobj->remove_one($file, $oids, $archivedir);
240 if (defined $rv && $rv != -1) {
241 return $rv;
242 } # else undefined (was not recognised by the plugin) or there was an error, try the next one
243 }
244 return 0;
245 }
246
247}
248sub file_block_read {
249 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
250
251
252 $gli = 0 unless defined $gli;
253
254 my $rv = 0;
255 my $glifile = $file;
256
257 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
258
259 # Announce to GLI that we are handling a file
260 print STDERR "<File n='$glifile'>\n" if $gli;
261
262 # the .kill file is a handy (if not very elegant) way of aborting
263 # an import.pl or buildcol.pl process
264 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
265 gsprintf($outhandle, "{plugin.kill_file}\n");
266 die "\n";
267 }
268
269 foreach my $plugobj (@$pluginfo) {
270
271 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
272 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
273 }
274
275}
276
277
278sub metadata_read {
279 my ($pluginfo, $base_dir, $file, $block_hash,
280 $extrametakeys, $extrametadata, $extrametafile,
281 $processor, $maxdocs, $gli, $aux) = @_;
282
283 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
284 $gli = 0 unless defined $gli;
285
286 my $rv = 0;
287 my $glifile = $file;
288
289 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
290
291 # Announce to GLI that we are handling a file
292 print STDERR "<File n='$glifile'>\n" if $gli;
293
294 # the .kill file is a handy (if not very elegant) way of aborting
295 # an import.pl or buildcol.pl process
296 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
297 gsprintf($outhandle, "{plugin.kill_file}\n");
298 die "\n";
299 }
300
301 my $had_error = 0;
302 # pass this file by each of the plugins in turn until one
303 # is found which will process it
304 # read must return:
305 # undef - could not recognise
306 # -1 - tried but error
307 # 0 - blocked
308 # anything else for successful processing
309
310 foreach my $plugobj (@$pluginfo) {
311
312 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
313 $extrametakeys, $extrametadata, $extrametafile,
314 $processor, $maxdocs, $gli, $aux);
315
316 if (defined $rv) {
317 if ($rv == -1) {
318 # an error has occurred
319 $had_error = 1;
320 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
321 } else {
322 return $rv;
323 }
324 } # else undefined - was not recognised by the plugin
325 }
326
327 return 0;
328}
329
330sub read {
331 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
332
333 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
334 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
335 $gli = 0 unless defined $gli;
336
337 my $rv = 0;
338 my $glifile = $file;
339
340 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
341
342 # Announce to GLI that we are handling a file
343 print STDERR "<File n='$glifile'>\n" if $gli;
344
345 # the .kill file is a handy (if not very elegant) way of aborting
346 # an import.pl or buildcol.pl process
347 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
348 gsprintf($outhandle, "{plugin.kill_file}\n");
349 die "\n";
350 }
351
352 my $had_error = 0;
353 # pass this file by each of the plugins in turn until one
354 # is found which will process it
355 # read must return:
356 # undef - could not recognise
357 # -1 - tried but error
358 # 0 - blocked
359 # anything else for successful processing
360
361 foreach my $plugobj (@$pluginfo) {
362
363 $rv = $plugobj->read($pluginfo, $base_dir, $file,
364 $block_hash, $metadata, $processor, $maxdocs,
365 $total_count, $gli, $aux);
366
367 if (defined $rv) {
368 if ($rv == -1) {
369 # an error has occurred
370 $had_error = 1;
371 } else {
372 return $rv;
373 }
374 } # else undefined - was not recognised by the plugin
375 }
376
377 if ($had_error) {
378 # was recognised but couldn't be processed
379 if ($verbosity >= 2) {
380 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
381 }
382 # tell the GLI that it was not processed
383 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
384
385 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
386 $stats->{'num_not_processed'} ++;
387 } else {
388 # was not recognised
389 if ($verbosity >= 2) {
390 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
391 }
392 # tell the GLI that it was not processed
393 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
394
395 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
396 $stats->{'num_not_recognised'} ++;
397 }
398 return 0;
399}
400
401# write out some general stats that the plugins have compiled - note that
402# the buildcol.pl process doesn't currently call this process so the stats
403# are only output after import.pl -
404sub write_stats {
405 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
406
407 $gli = 0 unless defined $gli;
408
409 foreach my $plugobj (@$pluginfo) {
410 $plugobj->compile_stats($stats);
411 }
412
413 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
414 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
415
416 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
417
418 if ($total == 1) {
419 gsprintf($statshandle, "* {plugin.one_considered}\n");
420 } else {
421 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
422 }
423 if ($stats->{'num_archives'}) {
424 if ($stats->{'num_archives'} == 1) {
425 gsprintf($statshandle, " ({plugin.including_archive})\n");
426 }
427 else {
428 gsprintf($statshandle, " ({plugin.including_archives})\n",
429 $stats->{'num_archives'});
430 }
431 }
432 if ($stats->{'num_processed'} == 1) {
433 gsprintf($statshandle, "* {plugin.one_included}\n");
434 } else {
435 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
436 }
437 if ($stats->{'num_not_recognised'}) {
438 if ($stats->{'num_not_recognised'} == 1) {
439 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
440 } else {
441 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
442 $stats->{'num_not_recognised'});
443 }
444
445 }
446 if ($stats->{'num_not_processed'}) {
447 if ($stats->{'num_not_processed'} == 1) {
448 gsprintf($statshandle, "* {plugin.one_rejected}\n");
449 } else {
450 gsprintf($statshandle, "* {plugin.n_rejected}\n",
451 $stats->{'num_not_processed'});
452 }
453 }
454 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
455 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
456 }
457}
458
459sub end {
460 my ($pluginfo, $processor) = @_;
461 map { $_->end($processor); } @$pluginfo;
462}
463
464sub deinit {
465 my ($pluginfo, $processor) = @_;
466
467
468 map { $_->deinit($processor); } @$pluginfo;
469}
470
4711;
Note: See TracBrowser for help on using the repository browser.