source: gsdl/trunk/perllib/plugin.pm@ 16841

Last change on this file since 16841 was 16381, checked in by kjdon, 16 years ago

global block pass: added in plugin:file_block_read, which is the global blocking pass. read and metadata_read modified to take an exta argument, block_hash which contains blocking info - this is really only usesd by DirectoryPlugin which does the actual blocking

  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[7829]28use strict; # to pick up typos and undeclared variables...
29no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]30no strict 'subs';
[7829]31
[134]32require util;
[7829]33use gsprintf 'gsprintf';
[4]34
[15873]35# mapping from old plugin names to new ones for backwards compatibility
36# can remove at sometime in future when we no longer want to support old xxPlug plugins
37my $plugin_name_map = {
38 'ArcPlug' => 'ArchivesInfPlugin',
39 'RecPlug' => 'DirectoryPlugin',
40 'TEXTPlug' => 'TextPlugin',
41 'EMAILPlug' => 'EmailPlugin',
42 'SRCPlug' => 'SourceCodePlugin',
43 'NULPlug' => 'NulPlugin',
44 'W3ImgPlug' => 'W3ImagePlugin',
45 'PagedImgPlug' => 'PagedImagePlugin'
46 };
47
[7829]48# global variables
[2785]49my $stats = {'num_processed' => 0,
50 'num_blocked' => 0,
51 'num_not_processed' => 0,
[7363]52 'num_not_recognised' => 0,
[2785]53 'num_archives' => 0
54 };
55
[7829]56#globaloptions contains any options that should be passed to all plugins
57my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]58
[15873]59sub get_valid_pluginname {
60 my ($pluginname) = @_;
61 my $valid_name = $pluginname;
62 if (defined $plugin_name_map->{$pluginname}) {
63 $valid_name = $plugin_name_map->{$pluginname};
64 } elsif ($pluginname =~ /Plug$/) {
65 $valid_name =~ s/Plug/Plugin/;
66
67 }
68 return $valid_name;
69}
[14933]70sub load_plugin_require
71{
72 my ($pluginname) = @_;
73
74 my @check_list = ();
75
76 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
77 my $pp_plugname
78 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
79 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
80
[10579]81 # find the plugin
[14239]82 if (defined($ENV{'GSDLCOLLECTION'}))
83 {
[14933]84 my $customplugname
85 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
86 $pp_plugname);
87 push(@check_list,$customplugname);
[14239]88 }
[14933]89
90 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
91 push(@check_list,$colplugname);
92
93 if (defined $ENV{'GSDLEXTS'}) {
94
95 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
96
97 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
98 foreach my $e (@extensions) {
99 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
100 push(@check_list,$extplugname);
101
102 }
103 }
104
105
106 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
107 push(@check_list,$mainplugname);
108
109 my $success=0;
110 foreach my $plugname (@check_list) {
111 if (-e $plugname) {
112 require $plugname;
113 $success=1;
114 last;
115 }
116 }
117
118 if (!$success) {
[10579]119 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]120 $pluginname);
[10579]121 die "\n";
122 }
[14933]123}
[10579]124
[14933]125sub load_plugin_for_info {
126 my ($pluginname) = shift @_;
[15873]127 $pluginname = &get_valid_pluginname($pluginname);
[14933]128 load_plugin_require($pluginname);
129
[10579]130 # create a plugin object
131 my ($plugobj);
132 my $options = "-gsdlinfo";
133
134 eval ("\$plugobj = new \$pluginname([],[$options])");
135 die "$@" if $@;
136
137 return $plugobj;
138}
139
[4]140sub load_plugins {
[1431]141 my ($plugin_list) = shift @_;
[12968]142 my $incremental;
143 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental) = @_; # globals
[4]144 my @plugin_objects = ();
[12968]145 $incremental = 0 unless (defined $incremental && $incremental == 1);
[1243]146 $verbosity = 2 unless defined $verbosity;
[7829]147 $outhandle = 'STDERR' unless defined $outhandle;
148 $failhandle = 'STDERR' unless defined $failhandle;
[1243]149
[13933]150 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
151 unshift (@INC, $colplugindir);
152
[6584]153 map { $_ = "\"$_\""; } @$globaloptions;
154 my $globals = join (",", @$globaloptions);
155
[7829]156 foreach my $pluginoptions (@$plugin_list) {
[809]157 my $pluginname = shift @$pluginoptions;
158 next unless defined $pluginname;
[15873]159 $pluginname = &get_valid_pluginname($pluginname);
[14933]160 load_plugin_require($pluginname);
[4]161
162 # create a plugin object
163 my ($plugobj);
[809]164 map { $_ = "\"$_\""; } @$pluginoptions;
165 my $options = join (",", @$pluginoptions);
[6584]166 if ($globals) {
167 if (@$pluginoptions) {
168 $options .= ",";
169 }
170 $options .= "$globals";
171 }
[1244]172 $options =~ s/\$/\\\$/g;
[7904]173
[10218]174 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]175 die "$@" if $@;
[809]176
[1243]177 # initialize plugin
[2785]178 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]179
[12968]180 $plugobj->set_incremental($incremental);
[1243]181
[4]182 # add this object to the list
183 push (@plugin_objects, $plugobj);
184 }
185
186 return \@plugin_objects;
187}
188
[835]189
190sub begin {
[11333]191 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]192
[11333]193 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]194 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
195}
196
[16381]197sub file_block_read {
198 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]199
[16381]200
201 $gli = 0 unless defined $gli;
202
203 my $rv = 0;
204 my $glifile = $file;
205
206 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
207
208 # Announce to GLI that we are handling a file
209 print STDERR "<File n='$glifile'>\n" if $gli;
210
211 # the .kill file is a handy (if not very elegant) way of aborting
212 # an import.pl or buildcol.pl process
213 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
214 gsprintf($outhandle, "{plugin.kill_file}\n");
215 die "\n";
216 }
217
218 foreach my $plugobj (@$pluginfo) {
219
220 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
221 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
222 }
223
224}
225
226
[8515]227sub metadata_read {
[16381]228 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli, $aux) = @_;
[8515]229
230 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
231 $gli = 0 unless defined $gli;
232
233 my $rv = 0;
234 my $glifile = $file;
235
236 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
237
238 # Announce to GLI that we are handling a file
239 print STDERR "<File n='$glifile'>\n" if $gli;
240
241 # the .kill file is a handy (if not very elegant) way of aborting
242 # an import.pl or buildcol.pl process
243 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
244 gsprintf($outhandle, "{plugin.kill_file}\n");
245 die "\n";
246 }
247
248 my $had_error = 0;
249 # pass this file by each of the plugins in turn until one
250 # is found which will process it
251 # read must return:
252 # undef - could not recognise
253 # -1 - tried but error
254 # 0 - blocked
255 # anything else for successful processing
256
257 foreach my $plugobj (@$pluginfo) {
258
[16381]259 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[8515]260 $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli, $aux);
261
262 if (defined $rv) {
263 if ($rv == -1) {
264 # an error has occurred
265 $had_error = 1;
266 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
267 } else {
268 return $rv;
269 }
270 } # else undefined - was not recognised by the plugin
271 }
272
273 return 0;
274}
275
[4]276sub read {
[16381]277 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]278
[809]279 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]280 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]281 $gli = 0 unless defined $gli;
282
[315]283 my $rv = 0;
[7363]284 my $glifile = $file;
[7904]285
[7363]286 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]287
[6332]288 # Announce to GLI that we are handling a file
[7363]289 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]290
[1454]291 # the .kill file is a handy (if not very elegant) way of aborting
292 # an import.pl or buildcol.pl process
293 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
[7829]294 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]295 die "\n";
296 }
[16381]297
[7363]298 my $had_error = 0;
[4]299 # pass this file by each of the plugins in turn until one
300 # is found which will process it
[7363]301 # read must return:
302 # undef - could not recognise
303 # -1 - tried but error
304 # 0 - blocked
305 # anything else for successful processing
[8515]306
[7829]307 foreach my $plugobj (@$pluginfo) {
[8515]308
309 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]310 $block_hash, $metadata, $processor, $maxdocs,
311 $total_count, $gli, $aux);
[8515]312
313 if (defined $rv) {
[7363]314 if ($rv == -1) {
[7904]315 # an error has occurred
[7363]316 $had_error = 1;
317 } else {
[7904]318 return $rv;
[7363]319 }
320 } # else undefined - was not recognised by the plugin
[4]321 }
[7904]322
[7363]323 if ($had_error) {
324 # was recognised but couldn't be processed
325 if ($verbosity >= 2) {
[7829]326 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]327 }
328 # tell the GLI that it was not processed
329 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]330
[7829]331 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]332 $stats->{'num_not_processed'} ++;
333 } else {
334 # was not recognised
335 if ($verbosity >= 2) {
[7829]336 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]337 }
338 # tell the GLI that it was not processed
339 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
340
[7829]341 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]342 $stats->{'num_not_recognised'} ++;
[170]343 }
[315]344 return 0;
[4]345}
346
[2785]347# write out some general stats that the plugins have compiled - note that
348# the buildcol.pl process doesn't currently call this process so the stats
349# are only output after import.pl -
350sub write_stats {
[6332]351 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]352
[6332]353 $gli = 0 unless defined $gli;
354
[7829]355 foreach my $plugobj (@$pluginfo) {
[2785]356 $plugobj->compile_stats($stats);
357 }
358
359 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]360 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]361
[7363]362 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]363
[2785]364 if ($total == 1) {
[7829]365 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]366 } else {
[7829]367 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]368 }
369 if ($stats->{'num_archives'}) {
[5682]370 if ($stats->{'num_archives'} == 1) {
[7829]371 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]372 }
373 else {
[7829]374 gsprintf($statshandle, " ({plugin.including_archives})\n",
375 $stats->{'num_archives'});
[5682]376 }
[2785]377 }
378 if ($stats->{'num_processed'} == 1) {
[7829]379 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]380 } else {
[7829]381 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]382 }
[7363]383 if ($stats->{'num_not_recognised'}) {
384 if ($stats->{'num_not_recognised'} == 1) {
[7829]385 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]386 } else {
[7829]387 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
388 $stats->{'num_not_recognised'});
[7363]389 }
390
391 }
[2797]392 if ($stats->{'num_not_processed'}) {
393 if ($stats->{'num_not_processed'} == 1) {
[7829]394 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]395 } else {
[7829]396 gsprintf($statshandle, "* {plugin.n_rejected}\n",
397 $stats->{'num_not_processed'});
[5682]398 }
[7363]399 }
400 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]401 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]402 }
[2785]403}
404
[835]405sub end {
[1587]406 my ($pluginfo, $processor) = @_;
407 map { $_->end($processor); } @$pluginfo;
[835]408}
[4]409
[10155]410sub deinit {
411 my ($pluginfo, $processor) = @_;
412
413
414 map { $_->deinit($processor); } @$pluginfo;
415}
416
[4]4171;
Note: See TracBrowser for help on using the repository browser.