source: main/trunk/greenstone2/perllib/plugin.pm@ 32539

Last change on this file since 32539 was 32539, checked in by ak19, 5 years ago

New plugin parameter site_name (only set for GS3) that is passed to plugin::load_plugins() (but not to plugin::load_plugin_for_info()/gsdlinfo mode) by inexport.pm during import.pl and by basebuilder.pm during buildcol.pl. Like the gs_version parameter, it is parsed by plugins/PrintInfo.pm and will appear before gs_version (to preserve the way things were being parsed until now)

  • Property svn:keywords set to Author Date Id Revision
File size: 15.1 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[21307]28
[7829]29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]31no strict 'subs';
[7829]32
[134]33require util;
[27303]34use FileUtils;
[7829]35use gsprintf 'gsprintf';
[4]36
[15873]37# mapping from old plugin names to new ones for backwards compatibility
[17032]38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
[15873]39my $plugin_name_map = {
[17746]40 'GAPlug' => 'GreenstoneXMLPlugin',
[15873]41 'ArcPlug' => 'ArchivesInfPlugin',
42 'RecPlug' => 'DirectoryPlugin',
43 'TEXTPlug' => 'TextPlugin',
[17731]44 'XMLPlug' => 'ReadXMLFile',
[15873]45 'EMAILPlug' => 'EmailPlugin',
46 'SRCPlug' => 'SourceCodePlugin',
47 'NULPlug' => 'NulPlugin',
[17731]48 'W3ImgPlug' => 'HTMLImagePlugin',
[17030]49 'PagedImgPlug' => 'PagedImagePlugin',
[17724]50 'METSPlug' => 'GreenstoneMETSPlugin',
51 'PPTPlug' => 'PowerPointPlugin',
[17731]52 'PSPlug' => 'PostScriptPlugin',
[17724]53 'DBPlug' => 'DatabasePlugin'
[15873]54 };
55
[7829]56# global variables
[2785]57my $stats = {'num_processed' => 0,
58 'num_blocked' => 0,
59 'num_not_processed' => 0,
[7363]60 'num_not_recognised' => 0,
[2785]61 'num_archives' => 0
62 };
63
[7829]64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]66
[15873]67sub get_valid_pluginname {
68 my ($pluginname) = @_;
69 my $valid_name = $pluginname;
70 if (defined $plugin_name_map->{$pluginname}) {
71 $valid_name = $plugin_name_map->{$pluginname};
72 } elsif ($pluginname =~ /Plug$/) {
73 $valid_name =~ s/Plug/Plugin/;
74
75 }
76 return $valid_name;
77}
[21290]78
[14933]79sub load_plugin_require
80{
81 my ($pluginname) = @_;
82
83 my @check_list = ();
84
85 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86 my $pp_plugname
[27303]87 = &FileUtils::filenameConcatenate('perllib', 'plugins', "${pluginname}.pm");
[14933]88 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
[10579]90 # find the plugin
[14239]91 if (defined($ENV{'GSDLCOLLECTION'}))
92 {
[14933]93 my $customplugname
[27303]94 = &FileUtils::filenameConcatenate($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
[14933]95 $pp_plugname);
96 push(@check_list,$customplugname);
[14239]97 }
[14933]98
[27303]99 my $colplugname = &FileUtils::filenameConcatenate($collectdir, $pp_plugname);
[14933]100 push(@check_list,$colplugname);
101
102 if (defined $ENV{'GSDLEXTS'}) {
103
[27303]104 my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "ext");
[14933]105
106 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107 foreach my $e (@extensions) {
[27303]108 my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
[14933]109 push(@check_list,$extplugname);
110
111 }
112 }
[21290]113 if (defined $ENV{'GSDL3EXTS'}) {
[14933]114
[27303]115 my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "ext");
[14933]116
[21290]117 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118 foreach my $e (@extensions) {
[27303]119 my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
[21290]120 push(@check_list,$extplugname);
121
122 }
123 }
124
125
[27303]126 my $mainplugname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, $pp_plugname);
[14933]127 push(@check_list,$mainplugname);
128
129 my $success=0;
130 foreach my $plugname (@check_list) {
[27623]131 if (&FileUtils::fileExists($plugname)) {
[26223]132 # lets add perllib folder to INC
[27303]133 # check it isn't already there first [jmt12]
[26223]134 my ($perllibfolder) = $plugname =~ /^(.*[\/\\]perllib)[\/\\]plugins/;
[27623]135 if (&FileUtils::directoryExists($perllibfolder))
[27303]136 {
137 my $found_perllibfolder = 0;
138 foreach my $path (@INC)
139 {
140 if ($path eq $perllibfolder)
141 {
142 $found_perllibfolder = 1;
143 last;
144 }
145 }
146 if (!$found_perllibfolder)
147 {
[26223]148 unshift (@INC, $perllibfolder);
[27303]149 }
150 }
[14933]151 require $plugname;
152 $success=1;
153 last;
154 }
155 }
156
157 if (!$success) {
[10579]158 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]159 $pluginname);
[10579]160 die "\n";
161 }
[14933]162}
[10579]163
[14933]164sub load_plugin_for_info {
[25957]165 my ($pluginname, $gs_version) = (@_);
[15873]166 $pluginname = &get_valid_pluginname($pluginname);
[14933]167 load_plugin_require($pluginname);
168
[10579]169 # create a plugin object
170 my ($plugobj);
[25957]171 my $options = "-gsdlinfo,-gs_version,$gs_version";
[10579]172
173 eval ("\$plugobj = new \$pluginname([],[$options])");
174 die "$@" if $@;
175
176 return $plugobj;
177}
178
[4]179sub load_plugins {
[1431]180 my ($plugin_list) = shift @_;
[32539]181 my ($incremental_mode, $gs_version, $site);
182 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode, $gs_version, $site) = @_; # globals
[4]183 my @plugin_objects = ();
[1243]184 $verbosity = 2 unless defined $verbosity;
[7829]185 $outhandle = 'STDERR' unless defined $outhandle;
186 $failhandle = 'STDERR' unless defined $failhandle;
[1243]187
[27303]188 # before pushing collection perl and plugin directories onto INC, test that
189 # they aren't already there [jmt12]
190 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib'));
191 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib','plugins'));
[13933]192
[6584]193 map { $_ = "\"$_\""; } @$globaloptions;
194 my $globals = join (",", @$globaloptions);
195
[7829]196 foreach my $pluginoptions (@$plugin_list) {
[809]197 my $pluginname = shift @$pluginoptions;
198 next unless defined $pluginname;
[15873]199 $pluginname = &get_valid_pluginname($pluginname);
[14933]200 load_plugin_require($pluginname);
[4]201
202 # create a plugin object
203 my ($plugobj);
[25803]204 # put quotes around each option to the plugin, unless the option is already quoted
205 map { $_ = "\"$_\"" unless ($_ =~ m/^\s*\".*\"\s*$/) ; } @$pluginoptions;
[32539]206 my $site_option = $site ? "\"-site_name\",\"$site\"," : "";
207 my $options = "$site_option"."-gs_version,$gs_version,".join (",", @$pluginoptions);
[6584]208 if ($globals) {
209 if (@$pluginoptions) {
210 $options .= ",";
211 }
212 $options .= "$globals";
213 }
[20613]214 # need to escape backslash before putting in to the eval
[22087]215 # but watch out for any \" (which shouldn't be further escaped)
[22221]216 $options =~ s/\\([^"])/\\\\$1/g; #"
[1244]217 $options =~ s/\$/\\\$/g;
[22087]218
[10218]219 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]220 die "$@" if $@;
[809]221
[1243]222 # initialize plugin
[2785]223 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]224
[20578]225 $plugobj->set_incremental($incremental_mode);
[1243]226
[4]227 # add this object to the list
228 push (@plugin_objects, $plugobj);
229 }
230
231 return \@plugin_objects;
232}
233
[835]234
235sub begin {
[11333]236 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]237
[11333]238 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]239 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
240}
241
[21307]242 sub remove_all {
[21290]243 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
[24345]244
[21307]245 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
[21290]246}
247
[21307]248sub remove_some {
[21618]249 my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
[21307]250 return if (scalar(@$deleted_files)==0);
[23170]251 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
[21618]252 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
[21290]253
[21307]254 foreach my $file (@$deleted_files) {
[21564]255 # use 'archiveinf-src' info database to look up all the OIDs
[21307]256 # that this file is used in (note in most cases, it's just one OID)
257
[30597]258 my $file_with_placeholders = &util::abspath_to_placeholders($file);
259 my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file_with_placeholders);
[21307]260 my $oids = $src_rec->{'oid'};
[21314]261 my $rv;
262 foreach my $plugobj (@$pluginfo) {
263
264 $rv = $plugobj->remove_one($file, $oids, $archivedir);
265 if (defined $rv && $rv != -1) {
266 return $rv;
267 } # else undefined (was not recognised by the plugin) or there was an error, try the next one
268 }
269 return 0;
[21307]270 }
271
272}
[16381]273sub file_block_read {
274 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]275
[16381]276
277 $gli = 0 unless defined $gli;
278
279 my $rv = 0;
280 my $glifile = $file;
281
282 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
283
284 # Announce to GLI that we are handling a file
285 print STDERR "<File n='$glifile'>\n" if $gli;
286
287 # the .kill file is a handy (if not very elegant) way of aborting
288 # an import.pl or buildcol.pl process
[27623]289 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
[16381]290 gsprintf($outhandle, "{plugin.kill_file}\n");
291 die "\n";
292 }
293
294 foreach my $plugobj (@$pluginfo) {
295
296 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
297 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
298 }
299
300}
301
302
[8515]303sub metadata_read {
[19497]304 my ($pluginfo, $base_dir, $file, $block_hash,
305 $extrametakeys, $extrametadata, $extrametafile,
[23212]306 $processor, $gli, $aux) = @_;
[8515]307
308 $gli = 0 unless defined $gli;
309
310 my $rv = 0;
311 my $glifile = $file;
312
313 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
314
315 # Announce to GLI that we are handling a file
316 print STDERR "<File n='$glifile'>\n" if $gli;
317
318 # the .kill file is a handy (if not very elegant) way of aborting
319 # an import.pl or buildcol.pl process
[27623]320 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
[8515]321 gsprintf($outhandle, "{plugin.kill_file}\n");
322 die "\n";
323 }
324
325 my $had_error = 0;
326 # pass this file by each of the plugins in turn until one
327 # is found which will process it
328 # read must return:
329 # undef - could not recognise
330 # -1 - tried but error
331 # 0 - blocked
332 # anything else for successful processing
333
334 foreach my $plugobj (@$pluginfo) {
335
[16381]336 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[19497]337 $extrametakeys, $extrametadata, $extrametafile,
[23212]338 $processor, $gli, $aux);
[8515]339
340 if (defined $rv) {
341 if ($rv == -1) {
342 # an error has occurred
343 $had_error = 1;
344 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
345 } else {
346 return $rv;
347 }
348 } # else undefined - was not recognised by the plugin
349 }
350
351 return 0;
352}
353
[4]354sub read {
[16381]355 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]356
[809]357 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]358 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]359 $gli = 0 unless defined $gli;
360
[315]361 my $rv = 0;
[7363]362 my $glifile = $file;
[7904]363
[7363]364 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]365
[6332]366 # Announce to GLI that we are handling a file
[7363]367 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]368
[1454]369 # the .kill file is a handy (if not very elegant) way of aborting
370 # an import.pl or buildcol.pl process
[27623]371 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
[7829]372 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]373 die "\n";
374 }
[18441]375
[7363]376 my $had_error = 0;
[4]377 # pass this file by each of the plugins in turn until one
378 # is found which will process it
[7363]379 # read must return:
380 # undef - could not recognise
381 # -1 - tried but error
382 # 0 - blocked
383 # anything else for successful processing
[8515]384
[7829]385 foreach my $plugobj (@$pluginfo) {
[8515]386
387 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]388 $block_hash, $metadata, $processor, $maxdocs,
389 $total_count, $gli, $aux);
[8515]390
391 if (defined $rv) {
[7363]392 if ($rv == -1) {
[7904]393 # an error has occurred
[7363]394 $had_error = 1;
395 } else {
[7904]396 return $rv;
[7363]397 }
398 } # else undefined - was not recognised by the plugin
[4]399 }
[7904]400
[7363]401 if ($had_error) {
402 # was recognised but couldn't be processed
403 if ($verbosity >= 2) {
[7829]404 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]405 }
406 # tell the GLI that it was not processed
407 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]408
[7829]409 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]410 $stats->{'num_not_processed'} ++;
411 } else {
412 # was not recognised
413 if ($verbosity >= 2) {
[7829]414 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]415 }
416 # tell the GLI that it was not processed
417 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
418
[7829]419 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]420 $stats->{'num_not_recognised'} ++;
[170]421 }
[315]422 return 0;
[4]423}
424
[2785]425# write out some general stats that the plugins have compiled - note that
426# the buildcol.pl process doesn't currently call this process so the stats
427# are only output after import.pl -
428sub write_stats {
[6332]429 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]430
[6332]431 $gli = 0 unless defined $gli;
432
[7829]433 foreach my $plugobj (@$pluginfo) {
[2785]434 $plugobj->compile_stats($stats);
435 }
436
437 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]438 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]439
[7363]440 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]441
[2785]442 if ($total == 1) {
[7829]443 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]444 } else {
[7829]445 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]446 }
447 if ($stats->{'num_archives'}) {
[5682]448 if ($stats->{'num_archives'} == 1) {
[7829]449 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]450 }
451 else {
[7829]452 gsprintf($statshandle, " ({plugin.including_archives})\n",
453 $stats->{'num_archives'});
[5682]454 }
[2785]455 }
456 if ($stats->{'num_processed'} == 1) {
[7829]457 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]458 } else {
[7829]459 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]460 }
[7363]461 if ($stats->{'num_not_recognised'}) {
462 if ($stats->{'num_not_recognised'} == 1) {
[7829]463 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]464 } else {
[7829]465 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
466 $stats->{'num_not_recognised'});
[7363]467 }
468
469 }
[2797]470 if ($stats->{'num_not_processed'}) {
471 if ($stats->{'num_not_processed'} == 1) {
[7829]472 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]473 } else {
[7829]474 gsprintf($statshandle, "* {plugin.n_rejected}\n",
475 $stats->{'num_not_processed'});
[5682]476 }
[7363]477 }
478 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]479 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]480 }
[2785]481}
482
[835]483sub end {
[1587]484 my ($pluginfo, $processor) = @_;
485 map { $_->end($processor); } @$pluginfo;
[835]486}
[4]487
[10155]488sub deinit {
489 my ($pluginfo, $processor) = @_;
490
491
492 map { $_->deinit($processor); } @$pluginfo;
493}
494
[4]4951;
Note: See TracBrowser for help on using the repository browser.