source: main/trunk/greenstone2/perllib/plugin.pm@ 27183

Last change on this file since 27183 was 26223, checked in by kjdon, 12 years ago

some mods to allow collection specific perl modules

  • Property svn:keywords set to Author Date Id Revision
File size: 14.2 KB
RevLine 
[537]1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[4]25
26package plugin;
27
[21307]28
[7829]29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
[10579]31no strict 'subs';
[7829]32
[134]33require util;
[7829]34use gsprintf 'gsprintf';
[4]35
[15873]36# mapping from old plugin names to new ones for backwards compatibility
[17032]37# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
[15873]38my $plugin_name_map = {
[17746]39 'GAPlug' => 'GreenstoneXMLPlugin',
[15873]40 'ArcPlug' => 'ArchivesInfPlugin',
41 'RecPlug' => 'DirectoryPlugin',
42 'TEXTPlug' => 'TextPlugin',
[17731]43 'XMLPlug' => 'ReadXMLFile',
[15873]44 'EMAILPlug' => 'EmailPlugin',
45 'SRCPlug' => 'SourceCodePlugin',
46 'NULPlug' => 'NulPlugin',
[17731]47 'W3ImgPlug' => 'HTMLImagePlugin',
[17030]48 'PagedImgPlug' => 'PagedImagePlugin',
[17724]49 'METSPlug' => 'GreenstoneMETSPlugin',
50 'PPTPlug' => 'PowerPointPlugin',
[17731]51 'PSPlug' => 'PostScriptPlugin',
[17724]52 'DBPlug' => 'DatabasePlugin'
[15873]53 };
54
[7829]55# global variables
[2785]56my $stats = {'num_processed' => 0,
57 'num_blocked' => 0,
58 'num_not_processed' => 0,
[7363]59 'num_not_recognised' => 0,
[2785]60 'num_archives' => 0
61 };
62
[7829]63#globaloptions contains any options that should be passed to all plugins
64my ($verbosity, $outhandle, $failhandle, $globaloptions);
[5682]65
[15873]66sub get_valid_pluginname {
67 my ($pluginname) = @_;
68 my $valid_name = $pluginname;
69 if (defined $plugin_name_map->{$pluginname}) {
70 $valid_name = $plugin_name_map->{$pluginname};
71 } elsif ($pluginname =~ /Plug$/) {
72 $valid_name =~ s/Plug/Plugin/;
73
74 }
75 return $valid_name;
76}
[21290]77
[14933]78sub load_plugin_require
79{
80 my ($pluginname) = @_;
81
82 my @check_list = ();
83
84 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
85 my $pp_plugname
86 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
87 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
88
[10579]89 # find the plugin
[14239]90 if (defined($ENV{'GSDLCOLLECTION'}))
91 {
[14933]92 my $customplugname
93 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
94 $pp_plugname);
95 push(@check_list,$customplugname);
[14239]96 }
[14933]97
98 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
99 push(@check_list,$colplugname);
100
101 if (defined $ENV{'GSDLEXTS'}) {
102
103 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
104
105 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
106 foreach my $e (@extensions) {
107 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
108 push(@check_list,$extplugname);
109
110 }
111 }
[21290]112 if (defined $ENV{'GSDL3EXTS'}) {
[14933]113
[21290]114 my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");
[14933]115
[21290]116 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
117 foreach my $e (@extensions) {
118 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
119 push(@check_list,$extplugname);
120
121 }
122 }
123
124
[14933]125 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
126 push(@check_list,$mainplugname);
127
128 my $success=0;
129 foreach my $plugname (@check_list) {
130 if (-e $plugname) {
[26223]131 # lets add perllib folder to INC
132 my ($perllibfolder) = $plugname =~ /^(.*[\/\\]perllib)[\/\\]plugins/;
133 if (-d $perllibfolder) {
134 unshift (@INC, $perllibfolder);
135 }
[14933]136 require $plugname;
137 $success=1;
138 last;
139 }
140 }
141
142 if (!$success) {
[10579]143 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
[14933]144 $pluginname);
[10579]145 die "\n";
146 }
[14933]147}
[10579]148
[14933]149sub load_plugin_for_info {
[25957]150 my ($pluginname, $gs_version) = (@_);
[15873]151 $pluginname = &get_valid_pluginname($pluginname);
[14933]152 load_plugin_require($pluginname);
153
[10579]154 # create a plugin object
155 my ($plugobj);
[25957]156 my $options = "-gsdlinfo,-gs_version,$gs_version";
[10579]157
158 eval ("\$plugobj = new \$pluginname([],[$options])");
159 die "$@" if $@;
160
161 return $plugobj;
162}
163
[4]164sub load_plugins {
[1431]165 my ($plugin_list) = shift @_;
[25957]166 my ($incremental_mode, $gs_version);
167 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode, $gs_version) = @_; # globals
[4]168 my @plugin_objects = ();
[1243]169 $verbosity = 2 unless defined $verbosity;
[7829]170 $outhandle = 'STDERR' unless defined $outhandle;
171 $failhandle = 'STDERR' unless defined $failhandle;
[1243]172
[26223]173 my $colperllibdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib");
174 unshift (@INC, $colperllibdir);
175 my $colplugindir = &util::filename_cat($colperllibdir,"plugins");
[13933]176 unshift (@INC, $colplugindir);
177
[6584]178 map { $_ = "\"$_\""; } @$globaloptions;
179 my $globals = join (",", @$globaloptions);
180
[7829]181 foreach my $pluginoptions (@$plugin_list) {
[809]182 my $pluginname = shift @$pluginoptions;
183 next unless defined $pluginname;
[15873]184 $pluginname = &get_valid_pluginname($pluginname);
[14933]185 load_plugin_require($pluginname);
[4]186
187 # create a plugin object
188 my ($plugobj);
[25803]189 # put quotes around each option to the plugin, unless the option is already quoted
190 map { $_ = "\"$_\"" unless ($_ =~ m/^\s*\".*\"\s*$/) ; } @$pluginoptions;
[25957]191 my $options = "-gs_version,$gs_version,".join (",", @$pluginoptions);
[6584]192 if ($globals) {
193 if (@$pluginoptions) {
194 $options .= ",";
195 }
196 $options .= "$globals";
197 }
[20613]198 # need to escape backslash before putting in to the eval
[22087]199 # but watch out for any \" (which shouldn't be further escaped)
[22221]200 $options =~ s/\\([^"])/\\\\$1/g; #"
[1244]201 $options =~ s/\$/\\\$/g;
[22087]202
[10218]203 eval ("\$plugobj = new \$pluginname([],[$options])");
[4]204 die "$@" if $@;
[809]205
[1243]206 # initialize plugin
[2785]207 $plugobj->init($verbosity, $outhandle, $failhandle);
[10478]208
[20578]209 $plugobj->set_incremental($incremental_mode);
[1243]210
[4]211 # add this object to the list
212 push (@plugin_objects, $plugobj);
213 }
214
215 return \@plugin_objects;
216}
217
[835]218
219sub begin {
[11333]220 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
[835]221
[11333]222 map { $_->{'gli'} = $gli; } @$pluginfo;
[835]223 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
224}
225
[21307]226 sub remove_all {
[21290]227 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
[24345]228
[21307]229 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
[21290]230}
231
[21307]232sub remove_some {
[21618]233 my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
[21307]234 return if (scalar(@$deleted_files)==0);
[23170]235 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
[21618]236 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
[21290]237
[21307]238 foreach my $file (@$deleted_files) {
[21564]239 # use 'archiveinf-src' info database to look up all the OIDs
[21307]240 # that this file is used in (note in most cases, it's just one OID)
241
[23485]242 my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file);
[21307]243 my $oids = $src_rec->{'oid'};
[21314]244 my $rv;
245 foreach my $plugobj (@$pluginfo) {
246
247 $rv = $plugobj->remove_one($file, $oids, $archivedir);
248 if (defined $rv && $rv != -1) {
249 return $rv;
250 } # else undefined (was not recognised by the plugin) or there was an error, try the next one
251 }
252 return 0;
[21307]253 }
254
255}
[16381]256sub file_block_read {
257 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
[10155]258
[16381]259
260 $gli = 0 unless defined $gli;
261
262 my $rv = 0;
263 my $glifile = $file;
264
265 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
266
267 # Announce to GLI that we are handling a file
268 print STDERR "<File n='$glifile'>\n" if $gli;
269
270 # the .kill file is a handy (if not very elegant) way of aborting
271 # an import.pl or buildcol.pl process
272 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
273 gsprintf($outhandle, "{plugin.kill_file}\n");
274 die "\n";
275 }
276
277 foreach my $plugobj (@$pluginfo) {
278
279 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
280 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
281 }
282
283}
284
285
[8515]286sub metadata_read {
[19497]287 my ($pluginfo, $base_dir, $file, $block_hash,
288 $extrametakeys, $extrametadata, $extrametafile,
[23212]289 $processor, $gli, $aux) = @_;
[8515]290
291 $gli = 0 unless defined $gli;
292
293 my $rv = 0;
294 my $glifile = $file;
295
296 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
297
298 # Announce to GLI that we are handling a file
299 print STDERR "<File n='$glifile'>\n" if $gli;
300
301 # the .kill file is a handy (if not very elegant) way of aborting
302 # an import.pl or buildcol.pl process
303 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
304 gsprintf($outhandle, "{plugin.kill_file}\n");
305 die "\n";
306 }
307
308 my $had_error = 0;
309 # pass this file by each of the plugins in turn until one
310 # is found which will process it
311 # read must return:
312 # undef - could not recognise
313 # -1 - tried but error
314 # 0 - blocked
315 # anything else for successful processing
316
317 foreach my $plugobj (@$pluginfo) {
318
[16381]319 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
[19497]320 $extrametakeys, $extrametadata, $extrametafile,
[23212]321 $processor, $gli, $aux);
[8515]322
323 if (defined $rv) {
324 if ($rv == -1) {
325 # an error has occurred
326 $had_error = 1;
327 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
328 } else {
329 return $rv;
330 }
331 } # else undefined - was not recognised by the plugin
332 }
333
334 return 0;
335}
336
[4]337sub read {
[16381]338 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
[4]339
[809]340 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
[9853]341 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
[6332]342 $gli = 0 unless defined $gli;
343
[315]344 my $rv = 0;
[7363]345 my $glifile = $file;
[7904]346
[7363]347 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
[8515]348
[6332]349 # Announce to GLI that we are handling a file
[7363]350 print STDERR "<File n='$glifile'>\n" if $gli;
[8515]351
[1454]352 # the .kill file is a handy (if not very elegant) way of aborting
353 # an import.pl or buildcol.pl process
354 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
[7829]355 gsprintf($outhandle, "{plugin.kill_file}\n");
[1454]356 die "\n";
357 }
[18441]358
[7363]359 my $had_error = 0;
[4]360 # pass this file by each of the plugins in turn until one
361 # is found which will process it
[7363]362 # read must return:
363 # undef - could not recognise
364 # -1 - tried but error
365 # 0 - blocked
366 # anything else for successful processing
[8515]367
[7829]368 foreach my $plugobj (@$pluginfo) {
[8515]369
370 $rv = $plugobj->read($pluginfo, $base_dir, $file,
[16381]371 $block_hash, $metadata, $processor, $maxdocs,
372 $total_count, $gli, $aux);
[8515]373
374 if (defined $rv) {
[7363]375 if ($rv == -1) {
[7904]376 # an error has occurred
[7363]377 $had_error = 1;
378 } else {
[7904]379 return $rv;
[7363]380 }
381 } # else undefined - was not recognised by the plugin
[4]382 }
[7904]383
[7363]384 if ($had_error) {
385 # was recognised but couldn't be processed
386 if ($verbosity >= 2) {
[7829]387 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
[7363]388 }
389 # tell the GLI that it was not processed
390 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
[7904]391
[7829]392 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
[7363]393 $stats->{'num_not_processed'} ++;
394 } else {
395 # was not recognised
396 if ($verbosity >= 2) {
[7829]397 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
[7363]398 }
399 # tell the GLI that it was not processed
400 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
401
[7829]402 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
[7363]403 $stats->{'num_not_recognised'} ++;
[170]404 }
[315]405 return 0;
[4]406}
407
[2785]408# write out some general stats that the plugins have compiled - note that
409# the buildcol.pl process doesn't currently call this process so the stats
410# are only output after import.pl -
411sub write_stats {
[6332]412 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
[2785]413
[6332]414 $gli = 0 unless defined $gli;
415
[7829]416 foreach my $plugobj (@$pluginfo) {
[2785]417 $plugobj->compile_stats($stats);
418 }
419
420 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
[7363]421 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
[2785]422
[7363]423 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
[6332]424
[2785]425 if ($total == 1) {
[7829]426 gsprintf($statshandle, "* {plugin.one_considered}\n");
[2785]427 } else {
[7829]428 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
[2785]429 }
430 if ($stats->{'num_archives'}) {
[5682]431 if ($stats->{'num_archives'} == 1) {
[7829]432 gsprintf($statshandle, " ({plugin.including_archive})\n");
[5682]433 }
434 else {
[7829]435 gsprintf($statshandle, " ({plugin.including_archives})\n",
436 $stats->{'num_archives'});
[5682]437 }
[2785]438 }
439 if ($stats->{'num_processed'} == 1) {
[7829]440 gsprintf($statshandle, "* {plugin.one_included}\n");
[2785]441 } else {
[7829]442 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
[2785]443 }
[7363]444 if ($stats->{'num_not_recognised'}) {
445 if ($stats->{'num_not_recognised'} == 1) {
[7829]446 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
[7363]447 } else {
[7829]448 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
449 $stats->{'num_not_recognised'});
[7363]450 }
451
452 }
[2797]453 if ($stats->{'num_not_processed'}) {
454 if ($stats->{'num_not_processed'} == 1) {
[7829]455 gsprintf($statshandle, "* {plugin.one_rejected}\n");
[2797]456 } else {
[7829]457 gsprintf($statshandle, "* {plugin.n_rejected}\n",
458 $stats->{'num_not_processed'});
[5682]459 }
[7363]460 }
461 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
[7829]462 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
[2797]463 }
[2785]464}
465
[835]466sub end {
[1587]467 my ($pluginfo, $processor) = @_;
468 map { $_->end($processor); } @$pluginfo;
[835]469}
[4]470
[10155]471sub deinit {
472 my ($pluginfo, $processor) = @_;
473
474
475 map { $_->deinit($processor); } @$pluginfo;
476}
477
[4]4781;
Note: See TracBrowser for help on using the repository browser.