source: gs2-extensions/parallel-building/trunk/src/perllib/plugin.pm@ 26935

Last change on this file since 26935 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 13.8 KB
Line 
1###########################################################################
2#
3# plugin.pm -- functions to handle using plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package plugin;
27
28
29use strict; # to pick up typos and undeclared variables...
30no strict 'refs'; # ...but allow filehandles to be variables and vice versa
31no strict 'subs';
32
33require util;
34use gsprintf 'gsprintf';
35
36# mapping from old plugin names to new ones for backwards compatibility
37# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
38my $plugin_name_map = {
39 'GAPlug' => 'GreenstoneXMLPlugin',
40 'ArcPlug' => 'ArchivesInfPlugin',
41 'RecPlug' => 'DirectoryPlugin',
42 'TEXTPlug' => 'TextPlugin',
43 'XMLPlug' => 'ReadXMLFile',
44 'EMAILPlug' => 'EmailPlugin',
45 'SRCPlug' => 'SourceCodePlugin',
46 'NULPlug' => 'NulPlugin',
47 'W3ImgPlug' => 'HTMLImagePlugin',
48 'PagedImgPlug' => 'PagedImagePlugin',
49 'METSPlug' => 'GreenstoneMETSPlugin',
50 'PPTPlug' => 'PowerPointPlugin',
51 'PSPlug' => 'PostScriptPlugin',
52 'DBPlug' => 'DatabasePlugin'
53 };
54
55# global variables
56my $stats = {'num_processed' => 0,
57 'num_blocked' => 0,
58 'num_not_processed' => 0,
59 'num_not_recognised' => 0,
60 'num_archives' => 0
61 };
62
63#globaloptions contains any options that should be passed to all plugins
64my ($verbosity, $outhandle, $failhandle, $globaloptions);
65
66sub get_valid_pluginname {
67 my ($pluginname) = @_;
68 my $valid_name = $pluginname;
69 if (defined $plugin_name_map->{$pluginname}) {
70 $valid_name = $plugin_name_map->{$pluginname};
71 } elsif ($pluginname =~ /Plug$/) {
72 $valid_name =~ s/Plug/Plugin/;
73
74 }
75 return $valid_name;
76}
77
78sub load_plugin_require
79{
80 my ($pluginname) = @_;
81
82 my @check_list = ();
83
84 # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm'
85 my $pp_plugname
86 = &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
87 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
88
89 # find the plugin
90 if (defined($ENV{'GSDLCOLLECTION'}))
91 {
92 my $customplugname
93 = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
94 $pp_plugname);
95 push(@check_list,$customplugname);
96 }
97
98 my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
99 push(@check_list,$colplugname);
100
101 if (defined $ENV{'GSDLEXTS'}) {
102
103 my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");
104
105 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
106 foreach my $e (@extensions) {
107 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
108 push(@check_list,$extplugname);
109
110 }
111 }
112 if (defined $ENV{'GSDL3EXTS'}) {
113
114 my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");
115
116 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
117 foreach my $e (@extensions) {
118 my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
119 push(@check_list,$extplugname);
120
121 }
122 }
123
124
125 my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
126 push(@check_list,$mainplugname);
127
128 my $success=0;
129 foreach my $plugname (@check_list) {
130 if (-e $plugname) {
131 require $plugname;
132 $success=1;
133 last;
134 }
135 }
136
137 if (!$success) {
138 &gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
139 $pluginname);
140 die "\n";
141 }
142}
143
144sub load_plugin_for_info {
145 my ($pluginname) = shift @_;
146 $pluginname = &get_valid_pluginname($pluginname);
147 load_plugin_require($pluginname);
148
149 # create a plugin object
150 my ($plugobj);
151 my $options = "-gsdlinfo";
152
153 eval ("\$plugobj = new \$pluginname([],[$options])");
154 die "$@" if $@;
155
156 return $plugobj;
157}
158
159sub load_plugins {
160 my ($plugin_list) = shift @_;
161 my $incremental_mode;
162 ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
163 my @plugin_objects = ();
164 $verbosity = 2 unless defined $verbosity;
165 $outhandle = 'STDERR' unless defined $outhandle;
166 $failhandle = 'STDERR' unless defined $failhandle;
167
168 my $inc_paths = join(':', @INC);
169 my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
170 if ($inc_paths !~ /$colplugindir/)
171 {
172 unshift (@INC, $colplugindir); # [jmt12]
173 }
174
175 map { $_ = "\"$_\""; } @$globaloptions;
176 my $globals = join (",", @$globaloptions);
177
178 foreach my $pluginoptions (@$plugin_list) {
179 my $pluginname = shift @$pluginoptions;
180 next unless defined $pluginname;
181 $pluginname = &get_valid_pluginname($pluginname);
182 load_plugin_require($pluginname);
183
184 # create a plugin object
185 my ($plugobj);
186 map { $_ = "\"$_\""; } @$pluginoptions;
187 my $options = join (",", @$pluginoptions);
188 if ($globals) {
189 if (@$pluginoptions) {
190 $options .= ",";
191 }
192 $options .= "$globals";
193 }
194 # need to escape backslash before putting in to the eval
195 # but watch out for any \" (which shouldn't be further escaped)
196 $options =~ s/\\([^"])/\\\\$1/g; #"
197 $options =~ s/\$/\\\$/g;
198
199 eval ("\$plugobj = new \$pluginname([],[$options])");
200 die "$@" if $@;
201
202 # initialize plugin
203 $plugobj->init($verbosity, $outhandle, $failhandle);
204
205 $plugobj->set_incremental($incremental_mode);
206
207 # add this object to the list
208 push (@plugin_objects, $plugobj);
209 }
210
211 return \@plugin_objects;
212}
213
214
215sub begin {
216 my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;
217
218 map { $_->{'gli'} = $gli; } @$pluginfo;
219 map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
220}
221
222 sub remove_all {
223 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
224
225 map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
226}
227
228sub remove_some {
229 my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
230 return if (scalar(@$deleted_files)==0);
231 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz";
232 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
233
234 foreach my $file (@$deleted_files) {
235 # use 'archiveinf-src' info database to look up all the OIDs
236 # that this file is used in (note in most cases, it's just one OID)
237
238 my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file);
239 my $oids = $src_rec->{'oid'};
240 my $rv;
241 foreach my $plugobj (@$pluginfo) {
242
243 $rv = $plugobj->remove_one($file, $oids, $archivedir);
244 if (defined $rv && $rv != -1) {
245 return $rv;
246 } # else undefined (was not recognised by the plugin) or there was an error, try the next one
247 }
248 return 0;
249 }
250
251}
252sub file_block_read {
253 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
254
255
256 $gli = 0 unless defined $gli;
257
258 my $rv = 0;
259 my $glifile = $file;
260
261 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
262
263 # Announce to GLI that we are handling a file
264 print STDERR "<File n='$glifile'>\n" if $gli;
265
266 # the .kill file is a handy (if not very elegant) way of aborting
267 # an import.pl or buildcol.pl process
268 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
269 gsprintf($outhandle, "{plugin.kill_file}\n");
270 die "\n";
271 }
272
273 foreach my $plugobj (@$pluginfo) {
274
275 $rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli);
276 #last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
277 }
278
279}
280
281
282sub metadata_read {
283 my ($pluginfo, $base_dir, $file, $block_hash,
284 $extrametakeys, $extrametadata, $extrametafile,
285 $processor, $gli, $aux) = @_;
286
287 $gli = 0 unless defined $gli;
288
289 my $rv = 0;
290 my $glifile = $file;
291
292 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
293
294 # Announce to GLI that we are handling a file
295 print STDERR "<File n='$glifile'>\n" if $gli;
296
297 # the .kill file is a handy (if not very elegant) way of aborting
298 # an import.pl or buildcol.pl process
299 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
300 gsprintf($outhandle, "{plugin.kill_file}\n");
301 die "\n";
302 }
303
304 my $had_error = 0;
305 # pass this file by each of the plugins in turn until one
306 # is found which will process it
307 # read must return:
308 # undef - could not recognise
309 # -1 - tried but error
310 # 0 - blocked
311 # anything else for successful processing
312
313 foreach my $plugobj (@$pluginfo) {
314
315 $rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
316 $extrametakeys, $extrametadata, $extrametafile,
317 $processor, $gli, $aux);
318
319 if (defined $rv) {
320 if ($rv == -1) {
321 # an error has occurred
322 $had_error = 1;
323 print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
324 } else {
325 return $rv;
326 }
327 } # else undefined - was not recognised by the plugin
328 }
329
330 return 0;
331}
332
333sub read {
334 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;
335
336 $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
337 $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
338 $gli = 0 unless defined $gli;
339
340 my $rv = 0;
341 my $glifile = $file;
342
343 $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
344
345 # Announce to GLI that we are handling a file
346 print STDERR "<File n='$glifile'>\n" if $gli;
347
348 # the .kill file is a handy (if not very elegant) way of aborting
349 # an import.pl or buildcol.pl process
350 if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
351 gsprintf($outhandle, "{plugin.kill_file}\n");
352 die "\n";
353 }
354
355 my $had_error = 0;
356 # pass this file by each of the plugins in turn until one
357 # is found which will process it
358 # read must return:
359 # undef - could not recognise
360 # -1 - tried but error
361 # 0 - blocked
362 # anything else for successful processing
363
364 foreach my $plugobj (@$pluginfo) {
365
366 $rv = $plugobj->read($pluginfo, $base_dir, $file,
367 $block_hash, $metadata, $processor, $maxdocs,
368 $total_count, $gli, $aux);
369
370 if (defined $rv) {
371 if ($rv == -1) {
372 # an error has occurred
373 $had_error = 1;
374 } else {
375 return $rv;
376 }
377 } # else undefined - was not recognised by the plugin
378 }
379
380 if ($had_error) {
381 # was recognised but couldn't be processed
382 if ($verbosity >= 2) {
383 gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
384 }
385 # tell the GLI that it was not processed
386 print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
387
388 gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
389 $stats->{'num_not_processed'} ++;
390 } else {
391 # was not recognised
392 if ($verbosity >= 2) {
393 gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
394 }
395 # tell the GLI that it was not processed
396 print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
397
398 gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
399 $stats->{'num_not_recognised'} ++;
400 }
401 return 0;
402}
403
404# write out some general stats that the plugins have compiled - note that
405# the buildcol.pl process doesn't currently call this process so the stats
406# are only output after import.pl -
407sub write_stats {
408 my ($pluginfo, $statshandle, $faillog, $gli) = @_;
409
410 $gli = 0 unless defined $gli;
411
412 foreach my $plugobj (@$pluginfo) {
413 $plugobj->compile_stats($stats);
414 }
415
416 my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} +
417 $stats->{'num_not_processed'} + $stats->{'num_not_recognised'};
418
419 print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;
420
421 if ($total == 1) {
422 gsprintf($statshandle, "* {plugin.one_considered}\n");
423 } else {
424 gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
425 }
426 if ($stats->{'num_archives'}) {
427 if ($stats->{'num_archives'} == 1) {
428 gsprintf($statshandle, " ({plugin.including_archive})\n");
429 }
430 else {
431 gsprintf($statshandle, " ({plugin.including_archives})\n",
432 $stats->{'num_archives'});
433 }
434 }
435 if ($stats->{'num_processed'} == 1) {
436 gsprintf($statshandle, "* {plugin.one_included}\n");
437 } else {
438 gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
439 }
440 if ($stats->{'num_not_recognised'}) {
441 if ($stats->{'num_not_recognised'} == 1) {
442 gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
443 } else {
444 gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
445 $stats->{'num_not_recognised'});
446 }
447
448 }
449 if ($stats->{'num_not_processed'}) {
450 if ($stats->{'num_not_processed'} == 1) {
451 gsprintf($statshandle, "* {plugin.one_rejected}\n");
452 } else {
453 gsprintf($statshandle, "* {plugin.n_rejected}\n",
454 $stats->{'num_not_processed'});
455 }
456 }
457 if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
458 gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
459 }
460}
461
462sub end {
463 my ($pluginfo, $processor) = @_;
464 map { $_->end($processor); } @$pluginfo;
465}
466
467sub deinit {
468 my ($pluginfo, $processor) = @_;
469
470
471 map { $_->deinit($processor); } @$pluginfo;
472}
473
4741;
Note: See TracBrowser for help on using the repository browser.