source: main/trunk/greenstone2/perllib/plugin.pm@ 32571

Last change on this file since 32571 was 32566, checked in by ak19, 5 years ago

In some ways, it may be better if plugin.pm::remove_some() didn't return after the first failure, since the return value is never checked or acted upon, but continues attempting to process the remainder of its list of files to be deleted/reindexed.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.9 KB
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28
29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
31no strict 'subs';
32
33require util;
34use FileUtils;
35use gsprintf 'gsprintf';
36
37# mapping from old plugin names to new ones for backwards compatibility
38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
39my $plugin_name_map = {
40 'GAPlug' => 'GreenstoneXMLPlugin',
41 'ArcPlug' => 'ArchivesInfPlugin',
42 'RecPlug' => 'DirectoryPlugin',
43 'TEXTPlug' => 'TextPlugin',
44 'XMLPlug' => 'ReadXMLFile',
45 'EMAILPlug' => 'EmailPlugin',
46 'SRCPlug' => 'SourceCodePlugin',
47 'NULPlug' => 'NulPlugin',
48 'W3ImgPlug' => 'HTMLImagePlugin',
49 'PagedImgPlug' => 'PagedImagePlugin',
50 'METSPlug' => 'GreenstoneMETSPlugin',
51 'PPTPlug' => 'PowerPointPlugin',
52 'PSPlug' => 'PostScriptPlugin',
53 'DBPlug' => 'DatabasePlugin'
54 };
55
56# global variables
57my $stats = {'num_processed' => 0,
58 'num_blocked' => 0,
59 'num_not_processed' => 0,
60 'num_not_recognised' => 0,
61 'num_archives' => 0
62 };
63
64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
66
67sub get_valid_pluginname {
68 my ($pluginname) = @_;
69 my $valid_name = $pluginname;
70 if (defined $plugin_name_map->{$pluginname}) {
71 $valid_name = $plugin_name_map->{$pluginname};
72 } elsif ($pluginname =~ /Plug$/) {
73 $valid_name =~ s/Plug/Plugin/;
74
75 }
76 return $valid_name;
77}
78
79sub load_plugin_require
80{
81 my ($pluginname) = @_;
82
83 my @check_list = ();
84
85 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86 my $pp_plugname
87 = &FileUtils::filenameConcatenate('perllib', 'plugins', "${pluginname}.pm");
88 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
90 # find the plugin
91 if (defined($ENV{'GSDLCOLLECTION'}))
92 {
93 my $customplugname
94 = &FileUtils::filenameConcatenate($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95 $pp_plugname);
96 push(@check_list,$customplugname);
97 }
98
99 my $colplugname = &FileUtils::filenameConcatenate($collectdir, $pp_plugname);
100 push(@check_list,$colplugname);
101
102 if (defined $ENV{'GSDLEXTS'}) {
103
104 my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "ext");
105
106 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107 foreach my $e (@extensions) {
108 my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
109 push(@check_list,$extplugname);
110
111 }
112 }
113 if (defined $ENV{'GSDL3EXTS'}) {
114
115 my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "ext");
116
117 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118 foreach my $e (@extensions) {
119 my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
120 push(@check_list,$extplugname);
121
122 }
123 }
124
125
126 my $mainplugname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, $pp_plugname);
127 push(@check_list,$mainplugname);
128
129 my $success=0;
130 foreach my $plugname (@check_list) {
131 if (&FileUtils::fileExists($plugname)) {
132 # lets add perllib folder to INC
133 # check it isn't already there first [jmt12]
134 my ($perllibfolder) = $plugname =~ /^(.*[\/\\]perllib)[\/\\]plugins/;
135 if (&FileUtils::directoryExists($perllibfolder))
136 {
137 my $found_perllibfolder = 0;
138 foreach my $path (@INC)
139 {
140 if ($path eq $perllibfolder)
141 {
142 $found_perllibfolder = 1;
143 last;
144 }
145 }
146 if (!$found_perllibfolder)
147 {
148 unshift (@INC, $perllibfolder);
149 }
150 }
151 require $plugname;
152 $success=1;
153 last;
154 }
155 }
156
157 if (!$success) {
158 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
159 $pluginname);
160 die "\n";
161 }
162}
163
164sub load_plugin_for_info {
165 my ($pluginname, $gs_version) = (@_);
166 $pluginname = &get_valid_pluginname($pluginname);
167 load_plugin_require($pluginname);
168
169 # create a plugin object
170 my ($plugobj);
171 my $options = "-gsdlinfo,-gs_version,$gs_version";
172
173 eval ("\$plugobj = new \$pluginname([],[$options])");
174 die "$@" if $@;
175
176 return $plugobj;
177}
178
179sub load_plugins {
180 my ($plugin_list) = shift @_;
181 my ($incremental_mode, $gs_version, $site);
182 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode, $gs_version, $site) = @_; # globals
183 my @plugin_objects = ();
184 $verbosity = 2 unless defined $verbosity;
185 $outhandle = 'STDERR' unless defined $outhandle;
186 $failhandle = 'STDERR' unless defined $failhandle;
187
188 # before pushing collection perl and plugin directories onto INC, test that
189 # they aren't already there [jmt12]
190 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib'));
191 &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib','plugins'));
192
193 map { $_ = "\"$_\""; } @$globaloptions;
194 my $globals = join (",", @$globaloptions);
195
196 foreach my $pluginoptions (@$plugin_list) {
197 my $pluginname = shift @$pluginoptions;
198 next unless defined $pluginname;
199 $pluginname = &get_valid_pluginname($pluginname);
200 load_plugin_require($pluginname);
201
202 # create a plugin object
203 my ($plugobj);
204 # put quotes around each option to the plugin, unless the option is already quoted
205 map { $_ = "\"$_\"" unless ($_ =~ m/^\s*\".*\"\s*$/) ; } @$pluginoptions;
206 my $site_option = $site ? "\"-site_name\",\"$site\"," : "";
207 my $options = "$site_option"."-gs_version,$gs_version,".join (",", @$pluginoptions);
208 if ($globals) {
209 if (@$pluginoptions) {
210 $options .= ",";
211 }
212 $options .= "$globals";
213 }
214 # need to escape backslash before putting in to the eval
215 # but watch out for any \" (which shouldn't be further escaped)
216 $options =~ s/\\([^"])/\\\\$1/g; #"
217 $options =~ s/\$/\\\$/g;
218
219 eval ("\$plugobj = new \$pluginname([],[$options])");
220 die "$@" if $@;
221
222 # initialize plugin
223 $plugobj->init($verbosity, $outhandle, $failhandle);
224
225 $plugobj->set_incremental($incremental_mode);
226
227 # add this object to the list
228 push (@plugin_objects, $plugobj);
229 }
230
231 return \@plugin_objects;
232}
233
234
235sub begin {
236 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
237
238 map { $_->{'gli'} = $gli; } @$pluginfo;
239 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
240}
241
242 sub remove_all {
243 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
244
245 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
246}
247
248sub remove_some {
249 my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
250 return if (scalar(@$deleted_files)==0);
251 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
252 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
253
254 my $all_files_processed_successfully = 1;
255
256 foreach my $file (@$deleted_files) {
257 # use 'archiveinf-src' info database to look up all the OIDs
258 # that this file is used in (note in most cases, it's just one OID)
259
260 my $processed_file = 0; # set to 1 if a plugin could process the file and did so successfully
261
262 my $file_with_placeholders = &util::abspath_to_placeholders($file);
263 my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file_with_placeholders);
264 my $oids = $src_rec->{'oid'};
265 my $rv;
266 foreach my $plugobj (@$pluginfo) {
267
268 $rv = $plugobj->remove_one($file, $oids, $archivedir);
269 if (defined $rv && $rv != -1) {
270 #return $rv;
271 $processed_file = 1;
272 last; # break and continue with outer for loop, to process other deleted files
273 } # else undefined (was not recognised by the plugin) or there was an error, try the next one
274 }
275 #return 0;
276
277 if (!$processed_file) { # no plugin could recognise file.
278 # Should we continue processing other deleted files or not?
279 print STDERR "WARNING: plugin::remove_some() failed to process $file with oid(s) ". join(",", @$oids) . "\n";
280 #return 0;
281 $all_files_processed_successfully = $processed_file && $all_files_processed_successfully;
282 } # else some plugin processed the current deleted file
283 # continue to process next deleted file
284
285 }
286 return $all_files_processed_successfully; # callers don't seem to do anything with return val
287}
288
289sub file_block_read {
290 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
291
292
293 $gli = 0 unless defined $gli;
294
295 my $rv = 0;
296 my $glifile = $file;
297
298 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
299
300 # Announce to GLI that we are handling a file
301 print STDERR "<File n='$glifile'>\n" if $gli;
302
303 # the .kill file is a handy (if not very elegant) way of aborting
304 # an import.pl or buildcol.pl process
305 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
306 gsprintf($outhandle, "{plugin.kill_file}\n");
307 die "\n";
308 }
309
310 foreach my $plugobj (@$pluginfo) {
311
312 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
313 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
314 }
315
316}
317
318
319sub metadata_read {
320 my ($pluginfo, $base_dir, $file, $block_hash,
321 $extrametakeys, $extrametadata, $extrametafile,
322 $processor, $gli, $aux) = @_;
323
324 $gli = 0 unless defined $gli;
325
326 my $rv = 0;
327 my $glifile = $file;
328
329 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
330
331 # Announce to GLI that we are handling a file
332 print STDERR "<File n='$glifile'>\n" if $gli;
333
334 # the .kill file is a handy (if not very elegant) way of aborting
335 # an import.pl or buildcol.pl process
336 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
337 gsprintf($outhandle, "{plugin.kill_file}\n");
338 die "\n";
339 }
340
341 my $had_error = 0;
342 # pass this file by each of the plugins in turn until one
343 # is found which will process it
344 # read must return:
345 # undef - could not recognise
346 # -1 - tried but error
347 # 0 - blocked
348 # anything else for successful processing
349
350 foreach my $plugobj (@$pluginfo) {
351
352 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
353 $extrametakeys, $extrametadata, $extrametafile,
354 $processor, $gli, $aux);
355
356 if (defined $rv) {
357 if ($rv == -1) {
358 # an error has occurred
359 $had_error = 1;
360 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
361 } else {
362 return $rv;
363 }
364 } # else undefined - was not recognised by the plugin
365 }
366
367 return 0;
368}
369
370sub read {
371 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
372
373 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
374 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
375 $gli = 0 unless defined $gli;
376
377 my $rv = 0;
378 my $glifile = $file;
379
380 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
381
382 # Announce to GLI that we are handling a file
383 print STDERR "<File n='$glifile'>\n" if $gli;
384
385 # the .kill file is a handy (if not very elegant) way of aborting
386 # an import.pl or buildcol.pl process
387 if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
388 gsprintf($outhandle, "{plugin.kill_file}\n");
389 die "\n";
390 }
391
392 my $had_error = 0;
393 # pass this file by each of the plugins in turn until one
394 # is found which will process it
395 # read must return:
396 # undef - could not recognise
397 # -1 - tried but error
398 # 0 - blocked
399 # anything else for successful processing
400
401 foreach my $plugobj (@$pluginfo) {
402
403 $rv = $plugobj->read($pluginfo, $base_dir, $file,
404 $block_hash, $metadata, $processor, $maxdocs,
405 $total_count, $gli, $aux);
406
407 if (defined $rv) {
408 if ($rv == -1) {
409 # an error has occurred
410 $had_error = 1;
411 } else {
412 return $rv;
413 }
414 } # else undefined - was not recognised by the plugin
415 }
416
417 if ($had_error) {
418 # was recognised but couldn't be processed
419 if ($verbosity >= 2) {
420 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
421 }
422 # tell the GLI that it was not processed
423 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
424
425 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
426 $stats->{'num_not_processed'} ++;
427 } else {
428 # was not recognised
429 if ($verbosity >= 2) {
430 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
431 }
432 # tell the GLI that it was not processed
433 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
434
435 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
436 $stats->{'num_not_recognised'} ++;
437 }
438 return 0;
439}
440
441# write out some general stats that the plugins have compiled - note that
442# the buildcol.pl process doesn't currently call this process so the stats
443# are only output after import.pl -
444sub write_stats {
445 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
446
447 $gli = 0 unless defined $gli;
448
449 foreach my $plugobj (@$pluginfo) {
450 $plugobj->compile_stats($stats);
451 }
452
453 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
454 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
455
456 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
457
458 if ($total == 1) {
459 gsprintf($statshandle, "* {plugin.one_considered}\n");
460 } else {
461 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
462 }
463 if ($stats->{'num_archives'}) {
464 if ($stats->{'num_archives'} == 1) {
465 gsprintf($statshandle, " ({plugin.including_archive})\n");
466 }
467 else {
468 gsprintf($statshandle, " ({plugin.including_archives})\n",
469 $stats->{'num_archives'});
470 }
471 }
472 if ($stats->{'num_processed'} == 1) {
473 gsprintf($statshandle, "* {plugin.one_included}\n");
474 } else {
475 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
476 }
477 if ($stats->{'num_not_recognised'}) {
478 if ($stats->{'num_not_recognised'} == 1) {
479 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
480 } else {
481 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
482 $stats->{'num_not_recognised'});
483 }
484
485 }
486 if ($stats->{'num_not_processed'}) {
487 if ($stats->{'num_not_processed'} == 1) {
488 gsprintf($statshandle, "* {plugin.one_rejected}\n");
489 } else {
490 gsprintf($statshandle, "* {plugin.n_rejected}\n",
491 $stats->{'num_not_processed'});
492 }
493 }
494 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
495 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
496 }
497}
498
499sub end {
500 my ($pluginfo, $processor) = @_;
501 map { $_->end($processor); } @$pluginfo;
502}
503
504sub deinit {
505 my ($pluginfo, $processor) = @_;
506
507
508 map { $_->deinit($processor); } @$pluginfo;
509}
510
5111;
Note: See TracBrowser for help on using the repository browser.