root/main/trunk/greenstone2/perllib/plugin.pm @ 21307

Revision 21307, 13.4 KB (checked in by kjdon, 11 years ago)

removeold renamed to remove_all, added remove_some - a list of files to be deleted(or reindexed)

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28use inexport;
29
30use strict; # to pick up typos and undeclared variables...
31no strict 'refs'; # ...but allow filehandles to be variables and vice versa
32no strict 'subs';
33
34require util;
35use gsprintf 'gsprintf';
36
37# mapping from old plugin names to new ones for backwards compatibility
38# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
39my $plugin_name_map = {
40    'GAPlug' => 'GreenstoneXMLPlugin',
41    'ArcPlug' => 'ArchivesInfPlugin',
42    'RecPlug' => 'DirectoryPlugin',
43    'TEXTPlug' => 'TextPlugin',
44    'XMLPlug' => 'ReadXMLFile',
45    'EMAILPlug' => 'EmailPlugin',
46    'SRCPlug' => 'SourceCodePlugin',
47    'NULPlug' => 'NulPlugin',
48    'W3ImgPlug' => 'HTMLImagePlugin',
49    'PagedImgPlug' => 'PagedImagePlugin',
50    'METSPlug' => 'GreenstoneMETSPlugin',
51    'PPTPlug' => 'PowerPointPlugin',
52    'PSPlug' => 'PostScriptPlugin',
53    'DBPlug' => 'DatabasePlugin'
54    };
55
56# global variables
57my $stats = {'num_processed' => 0,
58         'num_blocked' => 0,
59         'num_not_processed' => 0,
60         'num_not_recognised' => 0,
61         'num_archives' => 0
62         };
63
64#globaloptions contains any options that should be passed to all plugins
65my ($verbosity, $outhandle, $failhandle, $globaloptions);
66
67sub get_valid_pluginname {
68    my ($pluginname) = @_;
69    my $valid_name = $pluginname;
70    if (defined $plugin_name_map->{$pluginname}) {
71    $valid_name = $plugin_name_map->{$pluginname};
72    } elsif ($pluginname =~ /Plug$/) {
73    $valid_name =~ s/Plug/Plugin/;
74   
75    }
76    return $valid_name;
77}
78
79sub load_plugin_require
80{
81    my ($pluginname) = @_;
82
83    my @check_list = ();
84
85    # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
86    my $pp_plugname
87    = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
88    my $collectdir = $ENV{'GSDLCOLLECTDIR'};
89
90    # find the plugin
91    if (defined($ENV{'GSDLCOLLECTION'}))
92    {
93    my $customplugname
94        = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
95                  $pp_plugname);
96    push(@check_list,$customplugname);
97    }
98
99    my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
100    push(@check_list,$colplugname);
101
102    if (defined $ENV{'GSDLEXTS'}) {
103
104    my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
105
106    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
107    foreach my $e (@extensions) {
108        my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
109        push(@check_list,$extplugname);
110
111    }
112    }
113    if (defined $ENV{'GSDL3EXTS'}) {
114
115    my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");
116
117    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
118    foreach my $e (@extensions) {
119        my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
120        push(@check_list,$extplugname);
121
122    }
123    }
124
125
126    my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
127    push(@check_list,$mainplugname);
128
129    my $success=0;
130    foreach my $plugname (@check_list) {
131    if (-e $plugname) {
132        require $plugname;
133        $success=1;
134        last;
135    }
136    }
137
138    if (!$success) {
139    &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
140          $pluginname);
141    die "\n";
142    }
143}
144
145sub load_plugin_for_info {
146    my ($pluginname) = shift @_;
147    $pluginname = &get_valid_pluginname($pluginname);
148    load_plugin_require($pluginname);
149
150    # create a plugin object
151    my ($plugobj);
152    my $options = "-gsdlinfo";
153   
154    eval ("\$plugobj = new \$pluginname([],[$options])");
155    die "$@" if $@;
156
157    return $plugobj;
158}
159
160sub load_plugins {
161    my ($plugin_list) = shift @_;
162    my $incremental_mode;
163    ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
164    my @plugin_objects = ();
165    $verbosity = 2 unless defined $verbosity;
166    $outhandle = 'STDERR' unless defined $outhandle;
167    $failhandle = 'STDERR' unless defined $failhandle;
168
169    my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
170     unshift (@INC, $colplugindir); 
171
172    map { $_ = "\"$_\""; } @$globaloptions;
173    my $globals = join (",", @$globaloptions);
174
175    foreach my $pluginoptions (@$plugin_list) {
176    my $pluginname = shift @$pluginoptions;
177    next unless defined $pluginname;
178    $pluginname = &get_valid_pluginname($pluginname);
179    load_plugin_require($pluginname);
180
181    # create a plugin object
182    my ($plugobj);
183    map { $_ = "\"$_\""; } @$pluginoptions;
184    my $options = join (",", @$pluginoptions);
185    if ($globals) {
186        if (@$pluginoptions) {
187        $options .= ",";
188        }
189        $options .= "$globals";
190    }
191    # need to escape backslash before putting in to the eval
192    $options =~ s/\\/\\\\/g;
193    $options =~ s/\$/\\\$/g;
194    eval ("\$plugobj = new \$pluginname([],[$options])");
195    die "$@" if $@;
196   
197    # initialize plugin
198    $plugobj->init($verbosity, $outhandle, $failhandle);
199   
200    $plugobj->set_incremental($incremental_mode);
201
202    # add this object to the list
203    push (@plugin_objects, $plugobj);
204    }
205
206    return \@plugin_objects;
207}
208
209
210sub begin {
211    my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
212
213    map { $_->{'gli'} = $gli; } @$pluginfo;
214    map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
215}
216
217 sub remove_all {
218    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
219   
220    map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
221}
222 
223sub remove_some {
224    my ($pluginfo, $archivedir, $deleted_files) = @_;
225    print STDERR "in remove some\n";
226    return if (scalar(@$deleted_files)==0);
227    my $arcinfo_src_filename = &inexport::src_db_file($archivedir);
228
229    foreach my $file (@$deleted_files) {
230    # use 'archiveinf-src' GDBM file to look up all the OIDs
231    # that this file is used in (note in most cases, it's just one OID)
232   
233    my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file);
234    my $oids = $src_rec->{'oid'};
235    map {$_->remove_one($file, $oids); } @$pluginfo;
236    }
237
238}
239sub file_block_read {
240    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
241
242
243    $gli = 0 unless defined $gli;
244
245    my $rv = 0;
246    my $glifile = $file;
247   
248    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
249   
250    # Announce to GLI that we are handling a file
251    print STDERR "<File n='$glifile'>\n" if $gli;
252   
253    # the .kill file is a handy (if not very elegant) way of aborting
254    # an import.pl or buildcol.pl process
255    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
256    gsprintf($outhandle, "{plugin.kill_file}\n");
257    die "\n";
258    }
259   
260    foreach my $plugobj (@$pluginfo) {
261
262        $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
263    #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
264    }
265   
266}
267
268
269sub metadata_read {
270    my ($pluginfo, $base_dir, $file, $block_hash,
271    $extrametakeys, $extrametadata, $extrametafile,
272    $processor, $maxdocs, $gli, $aux) = @_;
273
274    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
275    $gli = 0 unless defined $gli;
276
277    my $rv = 0;
278    my $glifile = $file;
279   
280    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
281   
282    # Announce to GLI that we are handling a file
283    print STDERR "<File n='$glifile'>\n" if $gli;
284   
285    # the .kill file is a handy (if not very elegant) way of aborting
286    # an import.pl or buildcol.pl process
287    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
288    gsprintf($outhandle, "{plugin.kill_file}\n");
289    die "\n";
290    }
291
292    my $had_error = 0;
293    # pass this file by each of the plugins in turn until one
294    # is found which will process it
295    # read must return:
296    # undef - could not recognise
297    # -1 - tried but error
298    # 0 - blocked
299    # anything else for successful processing
300   
301    foreach my $plugobj (@$pluginfo) {
302
303    $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
304                 $extrametakeys, $extrametadata, $extrametafile,
305                 $processor, $maxdocs, $gli, $aux);
306
307    if (defined $rv) {
308        if ($rv == -1) {
309            # an error has occurred
310        $had_error = 1;
311        print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
312        } else {
313            return $rv;
314        }
315    } # else undefined - was not recognised by the plugin
316    }
317
318    return 0;
319}
320
321sub read {
322    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
323
324    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
325    $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
326    $gli = 0 unless defined $gli;
327
328    my $rv = 0;
329    my $glifile = $file;
330   
331    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
332   
333    # Announce to GLI that we are handling a file
334    print STDERR "<File n='$glifile'>\n" if $gli;
335   
336    # the .kill file is a handy (if not very elegant) way of aborting
337    # an import.pl or buildcol.pl process
338    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
339    gsprintf($outhandle, "{plugin.kill_file}\n");
340    die "\n";
341    }
342
343    my $had_error = 0;
344    # pass this file by each of the plugins in turn until one
345    # is found which will process it
346    # read must return:
347    # undef - could not recognise
348    # -1 - tried but error
349    # 0 - blocked
350    # anything else for successful processing
351   
352    foreach my $plugobj (@$pluginfo) {
353
354        $rv = $plugobj->read($pluginfo, $base_dir, $file,
355                 $block_hash, $metadata, $processor, $maxdocs,
356                 $total_count, $gli, $aux);
357
358    if (defined $rv) {
359        if ($rv == -1) {
360            # an error has occurred
361        $had_error = 1;
362        } else {
363            return $rv;
364        }
365    } # else undefined - was not recognised by the plugin
366    }
367
368    if ($had_error) {
369    # was recognised but couldn't be processed
370    if ($verbosity >= 2) {
371        gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
372    }
373    # tell the GLI that it was not processed
374    print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
375     
376    gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
377    $stats->{'num_not_processed'} ++;
378    } else {
379    # was not recognised
380    if ($verbosity >= 2) {
381        gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
382    }
383    # tell the GLI that it was not processed
384    print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
385   
386    gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
387    $stats->{'num_not_recognised'} ++;
388    }
389    return 0;
390}
391
392# write out some general stats that the plugins have compiled - note that
393# the buildcol.pl process doesn't currently call this process so the stats
394# are only output after import.pl -
395sub write_stats {
396    my ($pluginfo, $statshandle, $faillog, $gli) = @_;
397
398    $gli = 0 unless defined $gli;
399
400    foreach my $plugobj (@$pluginfo) {
401    $plugobj->compile_stats($stats);
402    }
403
404    my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
405    $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
406
407    print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
408
409    if ($total == 1) {
410    gsprintf($statshandle, "* {plugin.one_considered}\n");
411    } else {
412    gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
413    }
414    if ($stats->{'num_archives'}) {
415    if ($stats->{'num_archives'} == 1) {
416        gsprintf($statshandle, "   ({plugin.including_archive})\n");
417    }
418    else {
419        gsprintf($statshandle, "   ({plugin.including_archives})\n",
420             $stats->{'num_archives'});
421    }
422    }
423    if ($stats->{'num_processed'} == 1) {
424    gsprintf($statshandle, "* {plugin.one_included}\n");
425    } else {
426    gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
427    }
428    if ($stats->{'num_not_recognised'}) {
429    if ($stats->{'num_not_recognised'} == 1) {
430        gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
431    } else {
432        gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
433             $stats->{'num_not_recognised'});
434    }
435
436    }
437    if ($stats->{'num_not_processed'}) {
438    if ($stats->{'num_not_processed'} == 1) {
439        gsprintf($statshandle, "* {plugin.one_rejected}\n");
440    } else {
441        gsprintf($statshandle, "* {plugin.n_rejected}\n",
442             $stats->{'num_not_processed'});
443    }
444    }
445    if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
446    gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
447    }
448}
449
450sub end {
451    my ($pluginfo, $processor) = @_;
452    map { $_->end($processor); } @$pluginfo;
453}
454
455sub deinit {
456    my ($pluginfo, $processor) = @_;
457   
458
459    map { $_->deinit($processor); } @$pluginfo;
460}
461
4621;
Note: See TracBrowser for help on using the browser.