root/main/trunk/greenstone2/perllib/plugin.pm @ 32566

Revision 32566, 15.9 KB (checked in by ak19, 10 months ago)

In some ways, it may be better if plugin.pm::remove_some() didn't return after the first failure, since the return value is never checked or acted upon, but continues attempting to process the remainder of its list of files to be deleted/reindexed.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28
29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
31no strict 'subs';
32
33require util;
34use FileUtils;
35use gsprintf 'gsprintf';
36
37# mapping from old plugin names to new ones for backwards compatibility
38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
39my $plugin_name_map = {
40    'GAPlug' => 'GreenstoneXMLPlugin',
41    'ArcPlug' => 'ArchivesInfPlugin',
42    'RecPlug' => 'DirectoryPlugin',
43    'TEXTPlug' => 'TextPlugin',
44    'XMLPlug' => 'ReadXMLFile',
45    'EMAILPlug' => 'EmailPlugin',
46    'SRCPlug' => 'SourceCodePlugin',
47    'NULPlug' => 'NulPlugin',
48    'W3ImgPlug' => 'HTMLImagePlugin',
49    'PagedImgPlug' => 'PagedImagePlugin',
50    'METSPlug' => 'GreenstoneMETSPlugin',
51    'PPTPlug' => 'PowerPointPlugin',
52    'PSPlug' => 'PostScriptPlugin',
53    'DBPlug' => 'DatabasePlugin'
54    };
55
56# global variables
57my $stats = {'num_processed' => 0,
58         'num_blocked' => 0,
59         'num_not_processed' => 0,
60         'num_not_recognised' => 0,
61         'num_archives' => 0
62         };
63
64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
66
67sub get_valid_pluginname {
68    my ($pluginname) = @_;
69    my $valid_name = $pluginname;
70    if (defined $plugin_name_map->{$pluginname}) {
71    $valid_name = $plugin_name_map->{$pluginname};
72    } elsif ($pluginname =~ /Plug$/) {
73    $valid_name =~ s/Plug/Plugin/;
74   
75    }
76    return $valid_name;
77}
78
79sub load_plugin_require
80{
81    my ($pluginname) = @_;
82
83    my @check_list = ();
84
85    # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86    my $pp_plugname
87    = &FileUtils::filenameConcatenate('perllib', 'plugins', "${pluginname}.pm");
88    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
90    # find the plugin
91    if (defined($ENV{'GSDLCOLLECTION'}))
92    {
93    my $customplugname
94        = &FileUtils::filenameConcatenate($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95                  $pp_plugname);
96    push(@check_list,$customplugname);
97    }
98
99    my $colplugname = &FileUtils::filenameConcatenate($collectdir, $pp_plugname);
100    push(@check_list,$colplugname);
101
102    if (defined $ENV{'GSDLEXTS'}) {
103
104    my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "ext");
105
106    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107    foreach my $e (@extensions) {
108        my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
109        push(@check_list,$extplugname);
110
111    }
112    }
113    if (defined $ENV{'GSDL3EXTS'}) {
114
115    my $ext_prefix = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "ext");
116
117    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118    foreach my $e (@extensions) {
119        my $extplugname = &FileUtils::filenameConcatenate($ext_prefix, $e, $pp_plugname);
120        push(@check_list,$extplugname);
121
122    }
123    }
124
125
126    my $mainplugname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, $pp_plugname);
127    push(@check_list,$mainplugname);
128
129    my $success=0;
130    foreach my $plugname (@check_list) {
131    if (&FileUtils::fileExists($plugname)) {
132        # lets add perllib folder to INC
133          # check it isn't already there first [jmt12]
134        my ($perllibfolder) = $plugname =~ /^(.*[\/\\]perllib)[\/\\]plugins/;
135        if (&FileUtils::directoryExists($perllibfolder))
136            {
137              my $found_perllibfolder = 0;
138              foreach my $path (@INC)
139              {
140                if ($path eq $perllibfolder)
141                {
142                  $found_perllibfolder = 1;
143                  last;
144                }
145              }
146              if (!$found_perllibfolder)
147              {
148        unshift (@INC, $perllibfolder);
149              }
150        }
151        require $plugname;
152        $success=1;
153        last;
154    }
155    }
156
157    if (!$success) {
158    &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
159          $pluginname);
160    die "\n";
161    }
162}
163
164sub load_plugin_for_info {
165    my ($pluginname, $gs_version) = (@_);
166    $pluginname = &get_valid_pluginname($pluginname);
167    load_plugin_require($pluginname);
168
169    # create a plugin object
170    my ($plugobj);
171    my $options = "-gsdlinfo,-gs_version,$gs_version";
172   
173    eval ("\$plugobj = new \$pluginname([],[$options])");
174    die "$@" if $@;
175
176    return $plugobj;
177}
178
179sub load_plugins {
180    my ($plugin_list) = shift @_;
181    my ($incremental_mode, $gs_version, $site);
182    ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode, $gs_version, $site) = @_; # globals
183    my @plugin_objects = ();
184    $verbosity = 2 unless defined $verbosity;
185    $outhandle = 'STDERR' unless defined $outhandle;
186    $failhandle = 'STDERR' unless defined $failhandle;
187
188    # before pushing collection perl and plugin directories onto INC, test that
189    # they aren't already there [jmt12]
190    &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib'));
191    &util::augmentINC(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},'perllib','plugins'));
192
193    map { $_ = "\"$_\""; } @$globaloptions;
194    my $globals = join (",", @$globaloptions);
195
196    foreach my $pluginoptions (@$plugin_list) {
197    my $pluginname = shift @$pluginoptions;
198    next unless defined $pluginname;
199    $pluginname = &get_valid_pluginname($pluginname);
200    load_plugin_require($pluginname);
201
202    # create a plugin object
203    my ($plugobj);
204    # put quotes around each option to the plugin, unless the option is already quoted
205    map { $_ = "\"$_\"" unless ($_ =~ m/^\s*\".*\"\s*$/) ; } @$pluginoptions;
206    my $site_option = $site ? "\"-site_name\",\"$site\"," : "";
207    my $options = "$site_option"."-gs_version,$gs_version,".join (",", @$pluginoptions);   
208    if ($globals) {
209        if (@$pluginoptions) {
210        $options .= ",";
211        }
212        $options .= "$globals";
213    }
214    # need to escape backslash before putting in to the eval
215    # but watch out for any \" (which shouldn't be further escaped)
216    $options =~ s/\\([^"])/\\\\$1/g; #"
217    $options =~ s/\$/\\\$/g;
218
219    eval ("\$plugobj = new \$pluginname([],[$options])");
220    die "$@" if $@;
221   
222    # initialize plugin
223    $plugobj->init($verbosity, $outhandle, $failhandle);
224   
225    $plugobj->set_incremental($incremental_mode);
226
227    # add this object to the list
228    push (@plugin_objects, $plugobj);
229    }
230
231    return \@plugin_objects;
232}
233
234
235sub begin {
236    my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
237
238    map { $_->{'gli'} = $gli; } @$pluginfo;
239    map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
240}
241
242 sub remove_all {
243    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
244
245    map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
246}
247 
248sub remove_some {
249    my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
250    return if (scalar(@$deleted_files)==0);
251    $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
252    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
253
254    my $all_files_processed_successfully = 1;
255   
256    foreach my $file (@$deleted_files) {
257    # use 'archiveinf-src' info database to look up all the OIDs
258    # that this file is used in (note in most cases, it's just one OID)
259
260    my $processed_file = 0; # set to 1 if a plugin could process the file and did so successfully
261   
262    my $file_with_placeholders = &util::abspath_to_placeholders($file);
263    my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file_with_placeholders);
264    my $oids = $src_rec->{'oid'};
265    my $rv;
266    foreach my $plugobj (@$pluginfo) {
267
268        $rv = $plugobj->remove_one($file, $oids, $archivedir);
269        if (defined $rv && $rv != -1) {
270        #return $rv;
271        $processed_file = 1;
272        last; # break and continue with outer for loop, to process other deleted files
273        } # else undefined (was not recognised by the plugin) or there was an error, try the next one
274    }
275    #return 0;
276   
277    if (!$processed_file) { # no plugin could recognise file.
278        # Should we continue processing other deleted files or not?
279        print STDERR "WARNING: plugin::remove_some() failed to process $file with oid(s) ". join(",", @$oids) . "\n";
280        #return 0;
281        $all_files_processed_successfully = $processed_file && $all_files_processed_successfully;       
282    } # else some plugin processed the current deleted file
283      # continue to process next deleted file
284   
285    }
286    return $all_files_processed_successfully; # callers don't seem to do anything with return val
287}
288
289sub file_block_read {
290    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
291
292
293    $gli = 0 unless defined $gli;
294
295    my $rv = 0;
296    my $glifile = $file;
297   
298    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
299   
300    # Announce to GLI that we are handling a file
301    print STDERR "<File n='$glifile'>\n" if $gli;
302   
303    # the .kill file is a handy (if not very elegant) way of aborting
304    # an import.pl or buildcol.pl process
305    if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
306    gsprintf($outhandle, "{plugin.kill_file}\n");
307    die "\n";
308    }
309   
310    foreach my $plugobj (@$pluginfo) {
311
312        $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
313    #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
314    }
315   
316}
317
318
319sub metadata_read {
320    my ($pluginfo, $base_dir, $file, $block_hash,
321    $extrametakeys, $extrametadata, $extrametafile,
322    $processor, $gli, $aux) = @_;
323
324    $gli = 0 unless defined $gli;
325
326    my $rv = 0;
327    my $glifile = $file;
328   
329    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
330   
331    # Announce to GLI that we are handling a file
332    print STDERR "<File n='$glifile'>\n" if $gli;
333   
334    # the .kill file is a handy (if not very elegant) way of aborting
335    # an import.pl or buildcol.pl process
336    if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
337    gsprintf($outhandle, "{plugin.kill_file}\n");
338    die "\n";
339    }
340
341    my $had_error = 0;
342    # pass this file by each of the plugins in turn until one
343    # is found which will process it
344    # read must return:
345    # undef - could not recognise
346    # -1 - tried but error
347    # 0 - blocked
348    # anything else for successful processing
349   
350    foreach my $plugobj (@$pluginfo) {
351
352    $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
353                 $extrametakeys, $extrametadata, $extrametafile,
354                 $processor, $gli, $aux);
355
356    if (defined $rv) {
357        if ($rv == -1) {
358            # an error has occurred
359        $had_error = 1;
360        print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
361        } else {
362            return $rv;
363        }
364    } # else undefined - was not recognised by the plugin
365    }
366
367    return 0;
368}
369
370sub read {
371    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
372
373    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
374    $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
375    $gli = 0 unless defined $gli;
376
377    my $rv = 0;
378    my $glifile = $file;
379   
380    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
381   
382    # Announce to GLI that we are handling a file
383    print STDERR "<File n='$glifile'>\n" if $gli;
384   
385    # the .kill file is a handy (if not very elegant) way of aborting
386    # an import.pl or buildcol.pl process
387    if (&FileUtils::fileExists(&FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, ".kill"))) {
388    gsprintf($outhandle, "{plugin.kill_file}\n");
389    die "\n";
390    }
391
392    my $had_error = 0;
393    # pass this file by each of the plugins in turn until one
394    # is found which will process it
395    # read must return:
396    # undef - could not recognise
397    # -1 - tried but error
398    # 0 - blocked
399    # anything else for successful processing
400   
401    foreach my $plugobj (@$pluginfo) {
402
403        $rv = $plugobj->read($pluginfo, $base_dir, $file,
404                 $block_hash, $metadata, $processor, $maxdocs,
405                 $total_count, $gli, $aux);
406
407    if (defined $rv) {
408        if ($rv == -1) {
409            # an error has occurred
410        $had_error = 1;
411        } else {
412            return $rv;
413        }
414    } # else undefined - was not recognised by the plugin
415    }
416
417    if ($had_error) {
418    # was recognised but couldn't be processed
419    if ($verbosity >= 2) {
420        gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
421    }
422    # tell the GLI that it was not processed
423    print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
424     
425    gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
426    $stats->{'num_not_processed'} ++;
427    } else {
428    # was not recognised
429    if ($verbosity >= 2) {
430        gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
431    }
432    # tell the GLI that it was not processed
433    print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
434   
435    gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
436    $stats->{'num_not_recognised'} ++;
437    }
438    return 0;
439}
440
441# write out some general stats that the plugins have compiled - note that
442# the buildcol.pl process doesn't currently call this process so the stats
443# are only output after import.pl -
444sub write_stats {
445    my ($pluginfo, $statshandle, $faillog, $gli) = @_;
446
447    $gli = 0 unless defined $gli;
448
449    foreach my $plugobj (@$pluginfo) {
450    $plugobj->compile_stats($stats);
451    }
452
453    my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
454    $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
455
456    print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
457
458    if ($total == 1) {
459    gsprintf($statshandle, "* {plugin.one_considered}\n");
460    } else {
461    gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
462    }
463    if ($stats->{'num_archives'}) {
464    if ($stats->{'num_archives'} == 1) {
465        gsprintf($statshandle, "   ({plugin.including_archive})\n");
466    }
467    else {
468        gsprintf($statshandle, "   ({plugin.including_archives})\n",
469             $stats->{'num_archives'});
470    }
471    }
472    if ($stats->{'num_processed'} == 1) {
473    gsprintf($statshandle, "* {plugin.one_included}\n");
474    } else {
475    gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
476    }
477    if ($stats->{'num_not_recognised'}) {
478    if ($stats->{'num_not_recognised'} == 1) {
479        gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
480    } else {
481        gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
482             $stats->{'num_not_recognised'});
483    }
484
485    }
486    if ($stats->{'num_not_processed'}) {
487    if ($stats->{'num_not_processed'} == 1) {
488        gsprintf($statshandle, "* {plugin.one_rejected}\n");
489    } else {
490        gsprintf($statshandle, "* {plugin.n_rejected}\n",
491             $stats->{'num_not_processed'});
492    }
493    }
494    if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
495    gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
496    }
497}
498
499sub end {
500    my ($pluginfo, $processor) = @_;
501    map { $_->end($processor); } @$pluginfo;
502}
503
504sub deinit {
505    my ($pluginfo, $processor) = @_;
506   
507
508    map { $_->deinit($processor); } @$pluginfo;
509}
510
5111;
Note: See TracBrowser for help on using the browser.