root/main/trunk/greenstone2/perllib/plugin.pm @ 27303

Revision 27303, 14.8 KB (checked in by jmt12, 7 years ago)

Replacing hardcoded additions to INC and PATH environment variables with conditional ones - this allows us to use the order of values in these variables for precedence, thus allows better support for extensions that override classifiers, plugins etc. ENV and PATH functions already exists in util, but augmentINC() is a new function

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28
29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
31no strict 'subs';
32
33require util;
34use FileUtils;
35use gsprintf 'gsprintf';
36
37# mapping from old plugin names to new ones for backwards compatibility
38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
39my $plugin_name_map = {
40    'GAPlug' => 'GreenstoneXMLPlugin',
41    'ArcPlug' => 'ArchivesInfPlugin',
42    'RecPlug' => 'DirectoryPlugin',
43    'TEXTPlug' => 'TextPlugin',
44    'XMLPlug' => 'ReadXMLFile',
45    'EMAILPlug' => 'EmailPlugin',
46    'SRCPlug' => 'SourceCodePlugin',
47    'NULPlug' => 'NulPlugin',
48    'W3ImgPlug' => 'HTMLImagePlugin',
49    'PagedImgPlug' => 'PagedImagePlugin',
50    'METSPlug' => 'GreenstoneMETSPlugin',
51    'PPTPlug' => 'PowerPointPlugin',
52    'PSPlug' => 'PostScriptPlugin',
53    'DBPlug' => 'DatabasePlugin'
54    };
55
56# global variables
57my $stats = {'num_processed' => 0,
58         'num_blocked' => 0,
59         'num_not_processed' => 0,
60         'num_not_recognised' => 0,
61         'num_archives' => 0
62         };
63
64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
66
67sub get_valid_pluginname {
68    my ($pluginname) = @_;
69    my $valid_name = $pluginname;
70    if (defined $plugin_name_map->{$pluginname}) {
71    $valid_name = $plugin_name_map->{$pluginname};
72    } elsif ($pluginname =~ /Plug$/) {
73    $valid_name =~ s/Plug/Plugin/;
74   
75    }
76    return $valid_name;
77}
78
79sub load_plugin_require
80{
81    my ($pluginname) = @_;
82
83    my @check_list = ();
84
85    # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86    my $pp_plugname
87    = &FileUtils::filenameConcatenate('perllib', 'plugins', "${pluginname}.pm");
88    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
90    # find the plugin
91    if (defined($ENV{'GSDLCOLLECTION'}))
92    {
93    my $customplugname
94        = &FileUtils::filenameConcatenate($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95                  $pp_plugname);
96    push(@check_list,$customplugname);
97    }
98
99    my $colplugname = &FileUtils::filenameConcatenate($collectdir, $pp_plugname);
100    push(@check_list,$colplugname);
101
102    if (defined $ENV{'GSDLEXTS'}) {
103
104    my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "ext");
105
106    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107    foreach my $e (@extensions) {
108        my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
109        push(@check_list,$extplugname);
110
111    }
112    }
113    if (defined $ENV{'GSDL3EXTS'}) {
114
115    my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "ext");
116
117    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118    foreach my $e (@extensions) {
119        my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
120        push(@check_list,$extplugname);
121
122    }
123    }
124
125
126    my $mainplugname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, $pp_plugname);
127    push(@check_list,$mainplugname);
128
129    my $success=0;
130    foreach my $plugname (@check_list) {
131    if (-e $plugname) {
132        # lets add perllib folder to INC
133          # check it isn't already there first [jmt12]
134        my ($perllibfolder) = $plugname =~ /^(.*[\/\\]perllib)[\/\\]plugins/;
135        if (-d $perllibfolder)
136            {
137              my $found_perllibfolder = 0;
138              foreach my $path (@INC)
139              {
140                if ($path eq $perllibfolder)
141                {
142                  $found_perllibfolder = 1;
143                  last;
144                }
145              }
146              if (!$found_perllibfolder)
147              {
148        unshift (@INC, $perllibfolder);
149              }
150        }
151        require $plugname;
152        $success=1;
153        last;
154    }
155    }
156
157    if (!$success) {
158    &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
159          $pluginname);
160    die "\n";
161    }
162}
163
164sub load_plugin_for_info {
165    my ($pluginname, $gs_version) = (@_);
166    $pluginname = &get_valid_pluginname($pluginname);
167    load_plugin_require($pluginname);
168
169    # create a plugin object
170    my ($plugobj);
171    my $options = "-gsdlinfo,-gs_version,$gs_version";
172   
173    eval ("\$plugobj = new \$pluginname([],[$options])");
174    die "$@" if $@;
175
176    return $plugobj;
177}
178
179sub load_plugins {
180    my ($plugin_list) = shift @_;
181    my ($incremental_mode, $gs_version);
182    ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode, $gs_version) = @_; # globals
183    my @plugin_objects = ();
184    $verbosity = 2 unless defined $verbosity;
185    $outhandle = 'STDERR' unless defined $outhandle;
186    $failhandle = 'STDERR' unless defined $failhandle;
187
188    # before pushing collection perl and plugin directories onto INC, test that
189    # they aren't already there [jmt12]
190    &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib'));
191    &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib','plugins'));
192
193    map { $_ = "\"$_\""; } @$globaloptions;
194    my $globals = join (",", @$globaloptions);
195
196    foreach my $pluginoptions (@$plugin_list) {
197    my $pluginname = shift @$pluginoptions;
198    next unless defined $pluginname;
199    $pluginname = &get_valid_pluginname($pluginname);
200    load_plugin_require($pluginname);
201
202    # create a plugin object
203    my ($plugobj);
204    # put quotes around each option to the plugin, unless the option is already quoted
205    map { $_ = "\"$_\"" unless ($_ =~ m/^\s*\".*\"\s*$/) ; } @$pluginoptions;
206    my $options = "-gs_version,$gs_version,".join (",", @$pluginoptions);
207    if ($globals) {
208        if (@$pluginoptions) {
209        $options .= ",";
210        }
211        $options .= "$globals";
212    }
213    # need to escape backslash before putting in to the eval
214    # but watch out for any \" (which shouldn't be further escaped)
215    $options =~ s/\\([^"])/\\\\$1/g; #"
216    $options =~ s/\$/\\\$/g;
217
218    eval ("\$plugobj = new \$pluginname([],[$options])");
219    die "$@" if $@;
220   
221    # initialize plugin
222    $plugobj->init($verbosity, $outhandle, $failhandle);
223   
224    $plugobj->set_incremental($incremental_mode);
225
226    # add this object to the list
227    push (@plugin_objects, $plugobj);
228    }
229
230    return \@plugin_objects;
231}
232
233
234sub begin {
235    my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
236
237    map { $_->{'gli'} = $gli; } @$pluginfo;
238    map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
239}
240
241 sub remove_all {
242    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
243
244    map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
245}
246 
247sub remove_some {
248    my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
249    return if (scalar(@$deleted_files)==0);
250    $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
251    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
252
253    foreach my $file (@$deleted_files) {
254    # use 'archiveinf-src' info database to look up all the OIDs
255    # that this file is used in (note in most cases, it's just one OID)
256   
257    my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file);
258    my $oids = $src_rec->{'oid'};
259    my $rv;
260    foreach my $plugobj (@$pluginfo) {
261
262        $rv = $plugobj->remove_one($file, $oids, $archivedir);
263        if (defined $rv && $rv != -1) {
264        return $rv;
265        } # else undefined (was not recognised by the plugin) or there was an error, try the next one
266    }
267    return 0;
268    }
269
270}
271sub file_block_read {
272    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
273
274
275    $gli = 0 unless defined $gli;
276
277    my $rv = 0;
278    my $glifile = $file;
279   
280    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
281   
282    # Announce to GLI that we are handling a file
283    print STDERR "<File n='$glifile'>\n" if $gli;
284   
285    # the .kill file is a handy (if not very elegant) way of aborting
286    # an import.pl or buildcol.pl process
287    if (-e &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
288    gsprintf($outhandle, "{plugin.kill_file}\n");
289    die "\n";
290    }
291   
292    foreach my $plugobj (@$pluginfo) {
293
294        $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
295    #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
296    }
297   
298}
299
300
301sub metadata_read {
302    my ($pluginfo, $base_dir, $file, $block_hash,
303    $extrametakeys, $extrametadata, $extrametafile,
304    $processor, $gli, $aux) = @_;
305
306    $gli = 0 unless defined $gli;
307
308    my $rv = 0;
309    my $glifile = $file;
310   
311    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
312   
313    # Announce to GLI that we are handling a file
314    print STDERR "<File n='$glifile'>\n" if $gli;
315   
316    # the .kill file is a handy (if not very elegant) way of aborting
317    # an import.pl or buildcol.pl process
318    if (-e &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
319    gsprintf($outhandle, "{plugin.kill_file}\n");
320    die "\n";
321    }
322
323    my $had_error = 0;
324    # pass this file by each of the plugins in turn until one
325    # is found which will process it
326    # read must return:
327    # undef - could not recognise
328    # -1 - tried but error
329    # 0 - blocked
330    # anything else for successful processing
331   
332    foreach my $plugobj (@$pluginfo) {
333
334    $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
335                 $extrametakeys, $extrametadata, $extrametafile,
336                 $processor, $gli, $aux);
337
338    if (defined $rv) {
339        if ($rv == -1) {
340            # an error has occurred
341        $had_error = 1;
342        print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
343        } else {
344            return $rv;
345        }
346    } # else undefined - was not recognised by the plugin
347    }
348
349    return 0;
350}
351
352sub read {
353    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
354
355    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
356    $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
357    $gli = 0 unless defined $gli;
358
359    my $rv = 0;
360    my $glifile = $file;
361   
362    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
363   
364    # Announce to GLI that we are handling a file
365    print STDERR "<File n='$glifile'>\n" if $gli;
366   
367    # the .kill file is a handy (if not very elegant) way of aborting
368    # an import.pl or buildcol.pl process
369    if (-e &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
370    gsprintf($outhandle, "{plugin.kill_file}\n");
371    die "\n";
372    }
373
374    my $had_error = 0;
375    # pass this file by each of the plugins in turn until one
376    # is found which will process it
377    # read must return:
378    # undef - could not recognise
379    # -1 - tried but error
380    # 0 - blocked
381    # anything else for successful processing
382   
383    foreach my $plugobj (@$pluginfo) {
384
385        $rv = $plugobj->read($pluginfo, $base_dir, $file,
386                 $block_hash, $metadata, $processor, $maxdocs,
387                 $total_count, $gli, $aux);
388
389    if (defined $rv) {
390        if ($rv == -1) {
391            # an error has occurred
392        $had_error = 1;
393        } else {
394            return $rv;
395        }
396    } # else undefined - was not recognised by the plugin
397    }
398
399    if ($had_error) {
400    # was recognised but couldn't be processed
401    if ($verbosity >= 2) {
402        gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
403    }
404    # tell the GLI that it was not processed
405    print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
406     
407    gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
408    $stats->{'num_not_processed'} ++;
409    } else {
410    # was not recognised
411    if ($verbosity >= 2) {
412        gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
413    }
414    # tell the GLI that it was not processed
415    print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
416   
417    gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
418    $stats->{'num_not_recognised'} ++;
419    }
420    return 0;
421}
422
423# write out some general stats that the plugins have compiled - note that
424# the buildcol.pl process doesn't currently call this process so the stats
425# are only output after import.pl -
426sub write_stats {
427    my ($pluginfo, $statshandle, $faillog, $gli) = @_;
428
429    $gli = 0 unless defined $gli;
430
431    foreach my $plugobj (@$pluginfo) {
432    $plugobj->compile_stats($stats);
433    }
434
435    my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
436    $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
437
438    print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
439
440    if ($total == 1) {
441    gsprintf($statshandle, "* {plugin.one_considered}\n");
442    } else {
443    gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
444    }
445    if ($stats->{'num_archives'}) {
446    if ($stats->{'num_archives'} == 1) {
447        gsprintf($statshandle, "   ({plugin.including_archive})\n");
448    }
449    else {
450        gsprintf($statshandle, "   ({plugin.including_archives})\n",
451             $stats->{'num_archives'});
452    }
453    }
454    if ($stats->{'num_processed'} == 1) {
455    gsprintf($statshandle, "* {plugin.one_included}\n");
456    } else {
457    gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
458    }
459    if ($stats->{'num_not_recognised'}) {
460    if ($stats->{'num_not_recognised'} == 1) {
461        gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
462    } else {
463        gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
464             $stats->{'num_not_recognised'});
465    }
466
467    }
468    if ($stats->{'num_not_processed'}) {
469    if ($stats->{'num_not_processed'} == 1) {
470        gsprintf($statshandle, "* {plugin.one_rejected}\n");
471    } else {
472        gsprintf($statshandle, "* {plugin.n_rejected}\n",
473             $stats->{'num_not_processed'});
474    }
475    }
476    if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
477    gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
478    }
479}
480
481sub end {
482    my ($pluginfo, $processor) = @_;
483    map { $_->end($processor); } @$pluginfo;
484}
485
486sub deinit {
487    my ($pluginfo, $processor) = @_;
488   
489
490    map { $_->deinit($processor); } @$pluginfo;
491}
492
4931;
Note: See TracBrowser for help on using the browser.