source: main/trunk/greenstone2/perllib/plugin.pm@ 21307

Last change on this file since 21307 was 21307, checked in by kjdon, 14 years ago

removeold renamed to remove_all, added remove_some - a list of files to be deleted(or reindexed)

  • Property svn:keywords set to Author Date Id Revision
File size: 13.4 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[21307]28use inexport;
29
[7829]30use strict; # to pick up typos and undeclared variables...
31no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]32no strict 'subs';
[7829]33
[134]34require util;
[7829]35use gsprintf 'gsprintf';
[4]36
[15873]37# mapping from old plugin names to new ones for backwards compatibility
[17032]38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
[15873]39my $plugin_name_map = {
[17746]40 'GAPlug' => 'GreenstoneXMLPlugin',
[15873]41 'ArcPlug' => 'ArchivesInfPlugin',
42 'RecPlug' => 'DirectoryPlugin',
43 'TEXTPlug' => 'TextPlugin',
[17731]44 'XMLPlug' => 'ReadXMLFile',
[15873]45 'EMAILPlug' => 'EmailPlugin',
46 'SRCPlug' => 'SourceCodePlugin',
47 'NULPlug' => 'NulPlugin',
[17731]48 'W3ImgPlug' => 'HTMLImagePlugin',
[17030]49 'PagedImgPlug' => 'PagedImagePlugin',
[17724]50 'METSPlug' => 'GreenstoneMETSPlugin',
51 'PPTPlug' => 'PowerPointPlugin',
[17731]52 'PSPlug' => 'PostScriptPlugin',
[17724]53 'DBPlug' => 'DatabasePlugin'
[15873]54 };
55
[7829]56# global variables
[2785]57my $stats = {'num_processed' => 0,
58 'num_blocked' => 0,
59 'num_not_processed' => 0,
[7363]60 'num_not_recognised' => 0,
[2785]61 'num_archives' => 0
62 };
63
[7829]64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]66
[15873]67sub get_valid_pluginname {
68 my ($pluginname) = @_;
69 my $valid_name = $pluginname;
70 if (defined $plugin_name_map->{$pluginname}) {
71 $valid_name = $plugin_name_map->{$pluginname};
72 } elsif ($pluginname =~ /Plug$/) {
73 $valid_name =~ s/Plug/Plugin/;
74
75 }
76 return $valid_name;
77}
[21290]78
[14933]79sub load_plugin_require
80{
81 my ($pluginname) = @_;
82
83 my @check_list = ();
84
85 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86 my $pp_plugname
87 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
88 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
[10579]90 # find the plugin
[14239]91 if (defined($ENV{'GSDLCOLLECTION'}))
92 {
[14933]93 my $customplugname
94 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95 $pp_plugname);
96 push(@check_list,$customplugname);
[14239]97 }
[14933]98
99 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
100 push(@check_list,$colplugname);
101
102 if (defined $ENV{'GSDLEXTS'}) {
103
104 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
105
106 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107 foreach my $e (@extensions) {
108 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
109 push(@check_list,$extplugname);
110
111 }
112 }
[21290]113 if (defined $ENV{'GSDL3EXTS'}) {
[14933]114
[21290]115 my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");
[14933]116
[21290]117 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118 foreach my $e (@extensions) {
119 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
120 push(@check_list,$extplugname);
121
122 }
123 }
124
125
[14933]126 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
127 push(@check_list,$mainplugname);
128
129 my $success=0;
130 foreach my $plugname (@check_list) {
131 if (-e $plugname) {
132 require $plugname;
133 $success=1;
134 last;
135 }
136 }
137
138 if (!$success) {
[10579]139 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]140 $pluginname);
[10579]141 die "\n";
142 }
[14933]143}
[10579]144
[14933]145sub load_plugin_for_info {
146 my ($pluginname) = shift @_;
[15873]147 $pluginname = &get_valid_pluginname($pluginname);
[14933]148 load_plugin_require($pluginname);
149
[10579]150 # create a plugin object
151 my ($plugobj);
152 my $options = "-gsdlinfo";
153
154 eval ("\$plugobj = new \$pluginname([],[$options])");
155 die "$@" if $@;
156
157 return $plugobj;
158}
159
[4]160sub load_plugins {
[1431]161 my ($plugin_list) = shift @_;
[20578]162 my $incremental_mode;
163 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
[4]164 my @plugin_objects = ();
[1243]165 $verbosity = 2 unless defined $verbosity;
[7829]166 $outhandle = 'STDERR' unless defined $outhandle;
167 $failhandle = 'STDERR' unless defined $failhandle;
[1243]168
[13933]169 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
170 unshift (@INC, $colplugindir);
171
[6584]172 map { $_ = "\"$_\""; } @$globaloptions;
173 my $globals = join (",", @$globaloptions);
174
[7829]175 foreach my $pluginoptions (@$plugin_list) {
[809]176 my $pluginname = shift @$pluginoptions;
177 next unless defined $pluginname;
[15873]178 $pluginname = &get_valid_pluginname($pluginname);
[14933]179 load_plugin_require($pluginname);
[4]180
181 # create a plugin object
182 my ($plugobj);
[809]183 map { $_ = "\"$_\""; } @$pluginoptions;
184 my $options = join (",", @$pluginoptions);
[6584]185 if ($globals) {
186 if (@$pluginoptions) {
187 $options .= ",";
188 }
189 $options .= "$globals";
190 }
[20613]191 # need to escape backslash before putting in to the eval
192 $options =~ s/\\/\\\\/g;
[1244]193 $options =~ s/\$/\\\$/g;
[10218]194 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]195 die "$@" if $@;
[809]196
[1243]197 # initialize plugin
[2785]198 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]199
[20578]200 $plugobj->set_incremental($incremental_mode);
[1243]201
[4]202 # add this object to the list
203 push (@plugin_objects, $plugobj);
204 }
205
206 return \@plugin_objects;
207}
208
[835]209
210sub begin {
[11333]211 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]212
[11333]213 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]214 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
215}
216
[21307]217 sub remove_all {
[21290]218 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
219
[21307]220 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
[21290]221}
222
[21307]223sub remove_some {
224 my ($pluginfo, $archivedir, $deleted_files) = @_;
225 print STDERR "in remove some\n";
226 return if (scalar(@$deleted_files)==0);
227 my $arcinfo_src_filename = &inexport::src_db_file($archivedir);
[21290]228
[21307]229 foreach my $file (@$deleted_files) {
230 # use 'archiveinf-src' GDBM file to look up all the OIDs
231 # that this file is used in (note in most cases, it's just one OID)
232
233 my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file);
234 my $oids = $src_rec->{'oid'};
235 map {$_->remove_one($file, $oids); } @$pluginfo;
236 }
237
238}
[16381]239sub file_block_read {
240 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]241
[16381]242
243 $gli = 0 unless defined $gli;
244
245 my $rv = 0;
246 my $glifile = $file;
247
248 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
249
250 # Announce to GLI that we are handling a file
251 print STDERR "<File n='$glifile'>\n" if $gli;
252
253 # the .kill file is a handy (if not very elegant) way of aborting
254 # an import.pl or buildcol.pl process
255 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
256 gsprintf($outhandle, "{plugin.kill_file}\n");
257 die "\n";
258 }
259
260 foreach my $plugobj (@$pluginfo) {
261
262 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
263 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
264 }
265
266}
267
268
[8515]269sub metadata_read {
[19497]270 my ($pluginfo, $base_dir, $file, $block_hash,
271 $extrametakeys, $extrametadata, $extrametafile,
272 $processor, $maxdocs, $gli, $aux) = @_;
[8515]273
274 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
275 $gli = 0 unless defined $gli;
276
277 my $rv = 0;
278 my $glifile = $file;
279
280 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
281
282 # Announce to GLI that we are handling a file
283 print STDERR "<File n='$glifile'>\n" if $gli;
284
285 # the .kill file is a handy (if not very elegant) way of aborting
286 # an import.pl or buildcol.pl process
287 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
288 gsprintf($outhandle, "{plugin.kill_file}\n");
289 die "\n";
290 }
291
292 my $had_error = 0;
293 # pass this file by each of the plugins in turn until one
294 # is found which will process it
295 # read must return:
296 # undef - could not recognise
297 # -1 - tried but error
298 # 0 - blocked
299 # anything else for successful processing
300
301 foreach my $plugobj (@$pluginfo) {
302
[16381]303 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[19497]304 $extrametakeys, $extrametadata, $extrametafile,
305 $processor, $maxdocs, $gli, $aux);
[8515]306
307 if (defined $rv) {
308 if ($rv == -1) {
309 # an error has occurred
310 $had_error = 1;
311 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
312 } else {
313 return $rv;
314 }
315 } # else undefined - was not recognised by the plugin
316 }
317
318 return 0;
319}
320
[4]321sub read {
[16381]322 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]323
[809]324 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]325 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]326 $gli = 0 unless defined $gli;
327
[315]328 my $rv = 0;
[7363]329 my $glifile = $file;
[7904]330
[7363]331 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]332
[6332]333 # Announce to GLI that we are handling a file
[7363]334 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]335
[1454]336 # the .kill file is a handy (if not very elegant) way of aborting
337 # an import.pl or buildcol.pl process
338 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
[7829]339 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]340 die "\n";
341 }
[18441]342
[7363]343 my $had_error = 0;
[4]344 # pass this file by each of the plugins in turn until one
345 # is found which will process it
[7363]346 # read must return:
347 # undef - could not recognise
348 # -1 - tried but error
349 # 0 - blocked
350 # anything else for successful processing
[8515]351
[7829]352 foreach my $plugobj (@$pluginfo) {
[8515]353
354 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]355 $block_hash, $metadata, $processor, $maxdocs,
356 $total_count, $gli, $aux);
[8515]357
358 if (defined $rv) {
[7363]359 if ($rv == -1) {
[7904]360 # an error has occurred
[7363]361 $had_error = 1;
362 } else {
[7904]363 return $rv;
[7363]364 }
365 } # else undefined - was not recognised by the plugin
[4]366 }
[7904]367
[7363]368 if ($had_error) {
369 # was recognised but couldn't be processed
370 if ($verbosity >= 2) {
[7829]371 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]372 }
373 # tell the GLI that it was not processed
374 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]375
[7829]376 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]377 $stats->{'num_not_processed'} ++;
378 } else {
379 # was not recognised
380 if ($verbosity >= 2) {
[7829]381 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]382 }
383 # tell the GLI that it was not processed
384 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
385
[7829]386 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]387 $stats->{'num_not_recognised'} ++;
[170]388 }
[315]389 return 0;
[4]390}
391
[2785]392# write out some general stats that the plugins have compiled - note that
393# the buildcol.pl process doesn't currently call this process so the stats
394# are only output after import.pl -
395sub write_stats {
[6332]396 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]397
[6332]398 $gli = 0 unless defined $gli;
399
[7829]400 foreach my $plugobj (@$pluginfo) {
[2785]401 $plugobj->compile_stats($stats);
402 }
403
404 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]405 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]406
[7363]407 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]408
[2785]409 if ($total == 1) {
[7829]410 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]411 } else {
[7829]412 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]413 }
414 if ($stats->{'num_archives'}) {
[5682]415 if ($stats->{'num_archives'} == 1) {
[7829]416 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]417 }
418 else {
[7829]419 gsprintf($statshandle, " ({plugin.including_archives})\n",
420 $stats->{'num_archives'});
[5682]421 }
[2785]422 }
423 if ($stats->{'num_processed'} == 1) {
[7829]424 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]425 } else {
[7829]426 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]427 }
[7363]428 if ($stats->{'num_not_recognised'}) {
429 if ($stats->{'num_not_recognised'} == 1) {
[7829]430 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]431 } else {
[7829]432 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
433 $stats->{'num_not_recognised'});
[7363]434 }
435
436 }
[2797]437 if ($stats->{'num_not_processed'}) {
438 if ($stats->{'num_not_processed'} == 1) {
[7829]439 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]440 } else {
[7829]441 gsprintf($statshandle, "* {plugin.n_rejected}\n",
442 $stats->{'num_not_processed'});
[5682]443 }
[7363]444 }
445 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]446 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]447 }
[2785]448}
449
[835]450sub end {
[1587]451 my ($pluginfo, $processor) = @_;
452 map { $_->end($processor); } @$pluginfo;
[835]453}
[4]454
[10155]455sub deinit {
456 my ($pluginfo, $processor) = @_;
457
458
459 map { $_->deinit($processor); } @$pluginfo;
460}
461
[4]4621;
Note: See TracBrowser for help on using the repository browser.