source: gsdl/trunk/perllib/plugin.pm@ 20105

Last change on this file since 20105 was 19497, checked in by davidb, 15 years ago

Introduction of new extrametafile to track which metadata.xml file a piece of metadata came from

  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[7829]28use strict; # to pick up typos and undeclared variables...
29no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]30no strict 'subs';
[7829]31
[134]32require util;
[7829]33use gsprintf 'gsprintf';
[4]34
[15873]35# mapping from old plugin names to new ones for backwards compatibility
[17032]36# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
[15873]37my $plugin_name_map = {
[17746]38 'GAPlug' => 'GreenstoneXMLPlugin',
[15873]39 'ArcPlug' => 'ArchivesInfPlugin',
40 'RecPlug' => 'DirectoryPlugin',
41 'TEXTPlug' => 'TextPlugin',
[17731]42 'XMLPlug' => 'ReadXMLFile',
[15873]43 'EMAILPlug' => 'EmailPlugin',
44 'SRCPlug' => 'SourceCodePlugin',
45 'NULPlug' => 'NulPlugin',
[17731]46 'W3ImgPlug' => 'HTMLImagePlugin',
[17030]47 'PagedImgPlug' => 'PagedImagePlugin',
[17724]48 'METSPlug' => 'GreenstoneMETSPlugin',
49 'PPTPlug' => 'PowerPointPlugin',
[17731]50 'PSPlug' => 'PostScriptPlugin',
[17724]51 'DBPlug' => 'DatabasePlugin'
[15873]52 };
53
[7829]54# global variables
[2785]55my $stats = {'num_processed' => 0,
56 'num_blocked' => 0,
57 'num_not_processed' => 0,
[7363]58 'num_not_recognised' => 0,
[2785]59 'num_archives' => 0
60 };
61
[7829]62#globaloptions contains any options that should be passed to all plugins
63my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]64
[15873]65sub get_valid_pluginname {
66 my ($pluginname) = @_;
67 my $valid_name = $pluginname;
68 if (defined $plugin_name_map->{$pluginname}) {
69 $valid_name = $plugin_name_map->{$pluginname};
70 } elsif ($pluginname =~ /Plug$/) {
71 $valid_name =~ s/Plug/Plugin/;
72
73 }
74 return $valid_name;
75}
[14933]76sub load_plugin_require
77{
78 my ($pluginname) = @_;
79
80 my @check_list = ();
81
82 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
83 my $pp_plugname
84 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
85 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
86
[10579]87 # find the plugin
[14239]88 if (defined($ENV{'GSDLCOLLECTION'}))
89 {
[14933]90 my $customplugname
91 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
92 $pp_plugname);
93 push(@check_list,$customplugname);
[14239]94 }
[14933]95
96 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
97 push(@check_list,$colplugname);
98
99 if (defined $ENV{'GSDLEXTS'}) {
100
101 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
102
103 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
104 foreach my $e (@extensions) {
105 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
106 push(@check_list,$extplugname);
107
108 }
109 }
110
111
112 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
113 push(@check_list,$mainplugname);
114
115 my $success=0;
116 foreach my $plugname (@check_list) {
117 if (-e $plugname) {
118 require $plugname;
119 $success=1;
120 last;
121 }
122 }
123
124 if (!$success) {
[10579]125 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]126 $pluginname);
[10579]127 die "\n";
128 }
[14933]129}
[10579]130
[14933]131sub load_plugin_for_info {
132 my ($pluginname) = shift @_;
[15873]133 $pluginname = &get_valid_pluginname($pluginname);
[14933]134 load_plugin_require($pluginname);
135
[10579]136 # create a plugin object
137 my ($plugobj);
138 my $options = "-gsdlinfo";
139
140 eval ("\$plugobj = new \$pluginname([],[$options])");
141 die "$@" if $@;
142
143 return $plugobj;
144}
145
[4]146sub load_plugins {
[1431]147 my ($plugin_list) = shift @_;
[12968]148 my $incremental;
149 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental) = @_; # globals
[4]150 my @plugin_objects = ();
[12968]151 $incremental = 0 unless (defined $incremental && $incremental == 1);
[1243]152 $verbosity = 2 unless defined $verbosity;
[7829]153 $outhandle = 'STDERR' unless defined $outhandle;
154 $failhandle = 'STDERR' unless defined $failhandle;
[1243]155
[13933]156 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
157 unshift (@INC, $colplugindir);
158
[6584]159 map { $_ = "\"$_\""; } @$globaloptions;
160 my $globals = join (",", @$globaloptions);
161
[7829]162 foreach my $pluginoptions (@$plugin_list) {
[809]163 my $pluginname = shift @$pluginoptions;
164 next unless defined $pluginname;
[15873]165 $pluginname = &get_valid_pluginname($pluginname);
[14933]166 load_plugin_require($pluginname);
[4]167
168 # create a plugin object
169 my ($plugobj);
[809]170 map { $_ = "\"$_\""; } @$pluginoptions;
171 my $options = join (",", @$pluginoptions);
[6584]172 if ($globals) {
173 if (@$pluginoptions) {
174 $options .= ",";
175 }
176 $options .= "$globals";
177 }
[1244]178 $options =~ s/\$/\\\$/g;
[7904]179
[10218]180 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]181 die "$@" if $@;
[809]182
[1243]183 # initialize plugin
[2785]184 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]185
[12968]186 $plugobj->set_incremental($incremental);
[1243]187
[4]188 # add this object to the list
189 push (@plugin_objects, $plugobj);
190 }
191
192 return \@plugin_objects;
193}
194
[835]195
196sub begin {
[11333]197 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]198
[11333]199 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]200 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
201}
202
[16381]203sub file_block_read {
204 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]205
[16381]206
207 $gli = 0 unless defined $gli;
208
209 my $rv = 0;
210 my $glifile = $file;
211
212 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
213
214 # Announce to GLI that we are handling a file
215 print STDERR "<File n='$glifile'>\n" if $gli;
216
217 # the .kill file is a handy (if not very elegant) way of aborting
218 # an import.pl or buildcol.pl process
219 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
220 gsprintf($outhandle, "{plugin.kill_file}\n");
221 die "\n";
222 }
223
224 foreach my $plugobj (@$pluginfo) {
225
226 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
227 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
228 }
229
230}
231
232
[8515]233sub metadata_read {
[19497]234 my ($pluginfo, $base_dir, $file, $block_hash,
235 $extrametakeys, $extrametadata, $extrametafile,
236 $processor, $maxdocs, $gli, $aux) = @_;
[8515]237
238 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
239 $gli = 0 unless defined $gli;
240
241 my $rv = 0;
242 my $glifile = $file;
243
244 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
245
246 # Announce to GLI that we are handling a file
247 print STDERR "<File n='$glifile'>\n" if $gli;
248
249 # the .kill file is a handy (if not very elegant) way of aborting
250 # an import.pl or buildcol.pl process
251 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
252 gsprintf($outhandle, "{plugin.kill_file}\n");
253 die "\n";
254 }
255
256 my $had_error = 0;
257 # pass this file by each of the plugins in turn until one
258 # is found which will process it
259 # read must return:
260 # undef - could not recognise
261 # -1 - tried but error
262 # 0 - blocked
263 # anything else for successful processing
264
265 foreach my $plugobj (@$pluginfo) {
266
[16381]267 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[19497]268 $extrametakeys, $extrametadata, $extrametafile,
269 $processor, $maxdocs, $gli, $aux);
[8515]270
271 if (defined $rv) {
272 if ($rv == -1) {
273 # an error has occurred
274 $had_error = 1;
275 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
276 } else {
277 return $rv;
278 }
279 } # else undefined - was not recognised by the plugin
280 }
281
282 return 0;
283}
284
[4]285sub read {
[16381]286 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]287
[809]288 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]289 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]290 $gli = 0 unless defined $gli;
291
[315]292 my $rv = 0;
[7363]293 my $glifile = $file;
[7904]294
[7363]295 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]296
[6332]297 # Announce to GLI that we are handling a file
[7363]298 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]299
[1454]300 # the .kill file is a handy (if not very elegant) way of aborting
301 # an import.pl or buildcol.pl process
302 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
[7829]303 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]304 die "\n";
305 }
[18441]306
[7363]307 my $had_error = 0;
[4]308 # pass this file by each of the plugins in turn until one
309 # is found which will process it
[7363]310 # read must return:
311 # undef - could not recognise
312 # -1 - tried but error
313 # 0 - blocked
314 # anything else for successful processing
[8515]315
[7829]316 foreach my $plugobj (@$pluginfo) {
[8515]317
318 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]319 $block_hash, $metadata, $processor, $maxdocs,
320 $total_count, $gli, $aux);
[8515]321
322 if (defined $rv) {
[7363]323 if ($rv == -1) {
[7904]324 # an error has occurred
[7363]325 $had_error = 1;
326 } else {
[7904]327 return $rv;
[7363]328 }
329 } # else undefined - was not recognised by the plugin
[4]330 }
[7904]331
[7363]332 if ($had_error) {
333 # was recognised but couldn't be processed
334 if ($verbosity >= 2) {
[7829]335 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]336 }
337 # tell the GLI that it was not processed
338 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]339
[7829]340 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]341 $stats->{'num_not_processed'} ++;
342 } else {
343 # was not recognised
344 if ($verbosity >= 2) {
[7829]345 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]346 }
347 # tell the GLI that it was not processed
348 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
349
[7829]350 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]351 $stats->{'num_not_recognised'} ++;
[170]352 }
[315]353 return 0;
[4]354}
355
[2785]356# write out some general stats that the plugins have compiled - note that
357# the buildcol.pl process doesn't currently call this process so the stats
358# are only output after import.pl -
359sub write_stats {
[6332]360 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]361
[6332]362 $gli = 0 unless defined $gli;
363
[7829]364 foreach my $plugobj (@$pluginfo) {
[2785]365 $plugobj->compile_stats($stats);
366 }
367
368 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]369 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]370
[7363]371 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]372
[2785]373 if ($total == 1) {
[7829]374 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]375 } else {
[7829]376 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]377 }
378 if ($stats->{'num_archives'}) {
[5682]379 if ($stats->{'num_archives'} == 1) {
[7829]380 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]381 }
382 else {
[7829]383 gsprintf($statshandle, " ({plugin.including_archives})\n",
384 $stats->{'num_archives'});
[5682]385 }
[2785]386 }
387 if ($stats->{'num_processed'} == 1) {
[7829]388 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]389 } else {
[7829]390 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]391 }
[7363]392 if ($stats->{'num_not_recognised'}) {
393 if ($stats->{'num_not_recognised'} == 1) {
[7829]394 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]395 } else {
[7829]396 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
397 $stats->{'num_not_recognised'});
[7363]398 }
399
400 }
[2797]401 if ($stats->{'num_not_processed'}) {
402 if ($stats->{'num_not_processed'} == 1) {
[7829]403 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]404 } else {
[7829]405 gsprintf($statshandle, "* {plugin.n_rejected}\n",
406 $stats->{'num_not_processed'});
[5682]407 }
[7363]408 }
409 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]410 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]411 }
[2785]412}
413
[835]414sub end {
[1587]415 my ($pluginfo, $processor) = @_;
416 map { $_->end($processor); } @$pluginfo;
[835]417}
[4]418
[10155]419sub deinit {
420 my ($pluginfo, $processor) = @_;
421
422
423 map { $_->deinit($processor); } @$pluginfo;
424}
425
[4]4261;
Note: See TracBrowser for help on using the repository browser.