source: gsdl/trunk/perllib/plugin.pm@ 17745

Last change on this file since 17745 was 17731, checked in by kjdon, 16 years ago

a couple more changes to plugin_name_map

  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[7829]28use strict; # to pick up typos and undeclared variables...
29no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]30no strict 'subs';
[7829]31
[134]32require util;
[7829]33use gsprintf 'gsprintf';
[4]34
[15873]35# mapping from old plugin names to new ones for backwards compatibility
[17032]36# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
[15873]37my $plugin_name_map = {
38 'ArcPlug' => 'ArchivesInfPlugin',
39 'RecPlug' => 'DirectoryPlugin',
40 'TEXTPlug' => 'TextPlugin',
[17731]41 'XMLPlug' => 'ReadXMLFile',
[15873]42 'EMAILPlug' => 'EmailPlugin',
43 'SRCPlug' => 'SourceCodePlugin',
44 'NULPlug' => 'NulPlugin',
[17731]45 'W3ImgPlug' => 'HTMLImagePlugin',
[17030]46 'PagedImgPlug' => 'PagedImagePlugin',
[17724]47 'METSPlug' => 'GreenstoneMETSPlugin',
48 'PPTPlug' => 'PowerPointPlugin',
[17731]49 'PSPlug' => 'PostScriptPlugin',
[17724]50 'DBPlug' => 'DatabasePlugin'
[15873]51 };
52
[7829]53# global variables
[2785]54my $stats = {'num_processed' => 0,
55 'num_blocked' => 0,
56 'num_not_processed' => 0,
[7363]57 'num_not_recognised' => 0,
[2785]58 'num_archives' => 0
59 };
60
[7829]61#globaloptions contains any options that should be passed to all plugins
62my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]63
[15873]64sub get_valid_pluginname {
65 my ($pluginname) = @_;
66 my $valid_name = $pluginname;
67 if (defined $plugin_name_map->{$pluginname}) {
68 $valid_name = $plugin_name_map->{$pluginname};
69 } elsif ($pluginname =~ /Plug$/) {
70 $valid_name =~ s/Plug/Plugin/;
71
72 }
73 return $valid_name;
74}
[14933]75sub load_plugin_require
76{
77 my ($pluginname) = @_;
78
79 my @check_list = ();
80
81 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
82 my $pp_plugname
83 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
84 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
85
[10579]86 # find the plugin
[14239]87 if (defined($ENV{'GSDLCOLLECTION'}))
88 {
[14933]89 my $customplugname
90 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
91 $pp_plugname);
92 push(@check_list,$customplugname);
[14239]93 }
[14933]94
95 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
96 push(@check_list,$colplugname);
97
98 if (defined $ENV{'GSDLEXTS'}) {
99
100 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
101
102 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
103 foreach my $e (@extensions) {
104 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
105 push(@check_list,$extplugname);
106
107 }
108 }
109
110
111 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
112 push(@check_list,$mainplugname);
113
114 my $success=0;
115 foreach my $plugname (@check_list) {
116 if (-e $plugname) {
117 require $plugname;
118 $success=1;
119 last;
120 }
121 }
122
123 if (!$success) {
[10579]124 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]125 $pluginname);
[10579]126 die "\n";
127 }
[14933]128}
[10579]129
[14933]130sub load_plugin_for_info {
131 my ($pluginname) = shift @_;
[15873]132 $pluginname = &get_valid_pluginname($pluginname);
[14933]133 load_plugin_require($pluginname);
134
[10579]135 # create a plugin object
136 my ($plugobj);
137 my $options = "-gsdlinfo";
138
139 eval ("\$plugobj = new \$pluginname([],[$options])");
140 die "$@" if $@;
141
142 return $plugobj;
143}
144
[4]145sub load_plugins {
[1431]146 my ($plugin_list) = shift @_;
[12968]147 my $incremental;
148 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental) = @_; # globals
[4]149 my @plugin_objects = ();
[12968]150 $incremental = 0 unless (defined $incremental && $incremental == 1);
[1243]151 $verbosity = 2 unless defined $verbosity;
[7829]152 $outhandle = 'STDERR' unless defined $outhandle;
153 $failhandle = 'STDERR' unless defined $failhandle;
[1243]154
[13933]155 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
156 unshift (@INC, $colplugindir);
157
[6584]158 map { $_ = "\"$_\""; } @$globaloptions;
159 my $globals = join (",", @$globaloptions);
160
[7829]161 foreach my $pluginoptions (@$plugin_list) {
[809]162 my $pluginname = shift @$pluginoptions;
163 next unless defined $pluginname;
[15873]164 $pluginname = &get_valid_pluginname($pluginname);
[14933]165 load_plugin_require($pluginname);
[4]166
167 # create a plugin object
168 my ($plugobj);
[809]169 map { $_ = "\"$_\""; } @$pluginoptions;
170 my $options = join (",", @$pluginoptions);
[6584]171 if ($globals) {
172 if (@$pluginoptions) {
173 $options .= ",";
174 }
175 $options .= "$globals";
176 }
[1244]177 $options =~ s/\$/\\\$/g;
[7904]178
[10218]179 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]180 die "$@" if $@;
[809]181
[1243]182 # initialize plugin
[2785]183 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]184
[12968]185 $plugobj->set_incremental($incremental);
[1243]186
[4]187 # add this object to the list
188 push (@plugin_objects, $plugobj);
189 }
190
191 return \@plugin_objects;
192}
193
[835]194
195sub begin {
[11333]196 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]197
[11333]198 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]199 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
200}
201
[16381]202sub file_block_read {
203 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]204
[16381]205
206 $gli = 0 unless defined $gli;
207
208 my $rv = 0;
209 my $glifile = $file;
210
211 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
212
213 # Announce to GLI that we are handling a file
214 print STDERR "<File n='$glifile'>\n" if $gli;
215
216 # the .kill file is a handy (if not very elegant) way of aborting
217 # an import.pl or buildcol.pl process
218 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
219 gsprintf($outhandle, "{plugin.kill_file}\n");
220 die "\n";
221 }
222
223 foreach my $plugobj (@$pluginfo) {
224
225 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
226 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
227 }
228
229}
230
231
[8515]232sub metadata_read {
[17300]233 my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli, $aux) = @_;
[8515]234
235 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
236 $gli = 0 unless defined $gli;
237
238 my $rv = 0;
239 my $glifile = $file;
240
241 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
242
243 # Announce to GLI that we are handling a file
244 print STDERR "<File n='$glifile'>\n" if $gli;
245
246 # the .kill file is a handy (if not very elegant) way of aborting
247 # an import.pl or buildcol.pl process
248 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
249 gsprintf($outhandle, "{plugin.kill_file}\n");
250 die "\n";
251 }
252
253 my $had_error = 0;
254 # pass this file by each of the plugins in turn until one
255 # is found which will process it
256 # read must return:
257 # undef - could not recognise
258 # -1 - tried but error
259 # 0 - blocked
260 # anything else for successful processing
261
262 foreach my $plugobj (@$pluginfo) {
263
[16381]264 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[17300]265 $extrametakeys, $extrametadata, $processor, $maxdocs, $gli, $aux);
[8515]266
267 if (defined $rv) {
268 if ($rv == -1) {
269 # an error has occurred
270 $had_error = 1;
271 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
272 } else {
273 return $rv;
274 }
275 } # else undefined - was not recognised by the plugin
276 }
277
278 return 0;
279}
280
[4]281sub read {
[16381]282 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]283
[809]284 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]285 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]286 $gli = 0 unless defined $gli;
287
[315]288 my $rv = 0;
[7363]289 my $glifile = $file;
[7904]290
[7363]291 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]292
[6332]293 # Announce to GLI that we are handling a file
[7363]294 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]295
[1454]296 # the .kill file is a handy (if not very elegant) way of aborting
297 # an import.pl or buildcol.pl process
298 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
[7829]299 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]300 die "\n";
301 }
[16381]302
[7363]303 my $had_error = 0;
[4]304 # pass this file by each of the plugins in turn until one
305 # is found which will process it
[7363]306 # read must return:
307 # undef - could not recognise
308 # -1 - tried but error
309 # 0 - blocked
310 # anything else for successful processing
[8515]311
[7829]312 foreach my $plugobj (@$pluginfo) {
[8515]313
314 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]315 $block_hash, $metadata, $processor, $maxdocs,
316 $total_count, $gli, $aux);
[8515]317
318 if (defined $rv) {
[7363]319 if ($rv == -1) {
[7904]320 # an error has occurred
[7363]321 $had_error = 1;
322 } else {
[7904]323 return $rv;
[7363]324 }
325 } # else undefined - was not recognised by the plugin
[4]326 }
[7904]327
[7363]328 if ($had_error) {
329 # was recognised but couldn't be processed
330 if ($verbosity >= 2) {
[7829]331 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]332 }
333 # tell the GLI that it was not processed
334 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]335
[7829]336 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]337 $stats->{'num_not_processed'} ++;
338 } else {
339 # was not recognised
340 if ($verbosity >= 2) {
[7829]341 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]342 }
343 # tell the GLI that it was not processed
344 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
345
[7829]346 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]347 $stats->{'num_not_recognised'} ++;
[170]348 }
[315]349 return 0;
[4]350}
351
[2785]352# write out some general stats that the plugins have compiled - note that
353# the buildcol.pl process doesn't currently call this process so the stats
354# are only output after import.pl -
355sub write_stats {
[6332]356 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]357
[6332]358 $gli = 0 unless defined $gli;
359
[7829]360 foreach my $plugobj (@$pluginfo) {
[2785]361 $plugobj->compile_stats($stats);
362 }
363
364 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]365 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]366
[7363]367 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]368
[2785]369 if ($total == 1) {
[7829]370 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]371 } else {
[7829]372 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]373 }
374 if ($stats->{'num_archives'}) {
[5682]375 if ($stats->{'num_archives'} == 1) {
[7829]376 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]377 }
378 else {
[7829]379 gsprintf($statshandle, " ({plugin.including_archives})\n",
380 $stats->{'num_archives'});
[5682]381 }
[2785]382 }
383 if ($stats->{'num_processed'} == 1) {
[7829]384 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]385 } else {
[7829]386 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]387 }
[7363]388 if ($stats->{'num_not_recognised'}) {
389 if ($stats->{'num_not_recognised'} == 1) {
[7829]390 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]391 } else {
[7829]392 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
393 $stats->{'num_not_recognised'});
[7363]394 }
395
396 }
[2797]397 if ($stats->{'num_not_processed'}) {
398 if ($stats->{'num_not_processed'} == 1) {
[7829]399 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]400 } else {
[7829]401 gsprintf($statshandle, "* {plugin.n_rejected}\n",
402 $stats->{'num_not_processed'});
[5682]403 }
[7363]404 }
405 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]406 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]407 }
[2785]408}
409
[835]410sub end {
[1587]411 my ($pluginfo, $processor) = @_;
412 map { $_->end($processor); } @$pluginfo;
[835]413}
[4]414
[10155]415sub deinit {
416 my ($pluginfo, $processor) = @_;
417
418
419 map { $_->deinit($processor); } @$pluginfo;
420}
421
[4]4221;
Note: See TracBrowser for help on using the repository browser.