Changeset 27305 for main/trunk/greenstone2/bin/script/buildcol.pl
- Timestamp:
- 2013-05-06T15:27:37+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/buildcol.pl
r26976 r27305 4 4 # 5 5 # buildcol.pl -- 6 # 6 7 # A component of the Greenstone digital library software 7 # from the New Zealand Digital Library Project at the 8 # from the New Zealand Digital Library Project at the 8 9 # University of Waikato, New Zealand. 9 10 # … … 26 27 ########################################################################### 27 28 28 29 29 # This program will build a particular collection. 30 31 30 package buildcol; 32 31 33 BEGIN { 34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; 36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); 37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath"); 39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); 40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); 41 42 if (defined $ENV{'GSDL-RUN-SETUP'}) { 43 require util; 44 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'}); 32 # Environment 33 BEGIN 34 { 35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; 37 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib'); 38 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan'); 39 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath'); 40 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins'); 41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify'); 42 43 if (defined $ENV{'GSDL-RUN-SETUP'}) 44 { 45 require util; 46 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'}); 47 } 48 49 if (defined $ENV{'GSDLEXTS'}) 50 { 51 my @extensions = split(/:/, $ENV{'GSDLEXTS'}); 52 foreach my $e (@extensions) 53 { 54 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e; 55 56 unshift(@INC, $ext_prefix . '/perllib'); 57 unshift(@INC, $ext_prefix . '/perllib/cpan'); 58 unshift(@INC, $ext_prefix . '/perllib/plugins'); 59 unshift(@INC, $ext_prefix . '/perllib/classify'); 45 60 } 46 47 if (defined $ENV{'GSDLEXTS'}) { 48 my @extensions = split(/:/,$ENV{'GSDLEXTS'}); 49 foreach my $e (@extensions) { 50 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e"; 51 52 unshift (@INC, "$ext_prefix/perllib"); 53 unshift (@INC, "$ext_prefix/perllib/cpan"); 54 unshift (@INC, "$ext_prefix/perllib/plugins"); 55 unshift (@INC, "$ext_prefix/perllib/classify"); 56 } 61 } 62 if (defined $ENV{'GSDL3EXTS'}) 63 { 64 my @extensions = split(/:/, $ENV{'GSDL3EXTS'}); 65 foreach my $e (@extensions) 66 { 67 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e; 68 69 unshift(@INC, $ext_prefix . '/perllib'); 70 unshift(@INC, $ext_prefix . '/perllib/cpan'); 71 unshift(@INC, $ext_prefix . '/perllib/plugins'); 72 unshift(@INC, $ext_prefix . '/perllib/classify'); 57 73 } 58 if (defined $ENV{'GSDL3EXTS'}) { 59 my @extensions = split(/:/,$ENV{'GSDL3EXTS'}); 60 foreach my $e (@extensions) { 61 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e"; 62 63 unshift (@INC, "$ext_prefix/perllib"); 64 unshift (@INC, "$ext_prefix/perllib/cpan"); 65 unshift (@INC, "$ext_prefix/perllib/plugins"); 66 unshift (@INC, "$ext_prefix/perllib/classify"); 67 } 68 } 69 74 } 70 75 } 71 76 72 use colcfg; 73 use dbutil; 74 use util; 75 use scriptutil; 76 use FileHandle; 77 use gsprintf; 78 use printusage; 79 use parse2; 80 77 # Pragma 81 78 use strict; 82 79 no strict 'refs'; # allow filehandles to be variables and vice versa 83 80 no strict 'subs'; # allow barewords (eg STDERR) as function arguments 84 81 85 82 # Modules 83 use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12] 84 85 # Greenstone Modules 86 use buildcolutils; 87 use FileUtils; 88 use util; 89 90 # Globals 91 # - build up arguments list/control 86 92 my $mode_list = 87 93 [ { 'name' => "all", … … 94 100 'desc' => "{buildcol.mode.infodb}" } ]; 95 101 96 my $sec_index_list = 102 my $sec_index_list = 97 103 [ {'name' => "never", 98 104 'desc' => "{buildcol.sections_index_document_metadata.never}" }, … … 127 133 'type' => "string", 128 134 # parsearg left "" as default 129 #'deft' => & util::filename_cat($ENV{'GSDLHOME'}, "collect"),135 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"), 130 136 'reqd' => "no", 131 137 'hiddengli' => "yes" }, … … 145 151 'type' => "string", 146 152 # parsearg left "" as default 147 #'deft' => & util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),153 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"), 148 154 'reqd' => "no", 149 155 'modegli' => "3" }, … … 243 249 'reqd' => "no", 244 250 'hiddengli' => "yes" }, 251 { 'name' => "indexname", 252 'desc' => "{buildcol.index}", 253 'type' => "string", 254 'reqd' => "no", 255 'modegli' => "3" }, 256 { 'name' => "indexlevel", 257 'desc' => "{buildcol.indexlevel}", 258 'type' => "string", 259 'reqd' => "no", 260 'modegli' => "3" }, 245 261 ]; 246 262 … … 249 265 'args' => $arguments }; 250 266 251 252 # globals 253 my $collection; 254 my $configfilename; 255 my $out; 256 257 # used to signify "gs2"(default) or "gs3" 258 my $gs_mode = "gs2"; 259 260 ## @method gsprintf() 261 # Print a string to the screen after looking it up from a locale dependant 262 # strings file. This function is losely based on the idea of resource 263 # bundles as used in Java. 264 # 265 # @param $error The STDERR stream. 266 # @param $text The string containing GS keys that should be replaced with 267 # their locale dependant equivilents. 268 # @param $out The output stream. 269 # @return The locale-based string to output. 270 # 271 sub gsprintf() 272 { 273 return &gsprintf::gsprintf(@_); 274 } 275 ## gsprintf() ## 276 267 # The hash maps between argument and the buildcolutils subclass supporting that 268 # argument - allowing for extensions to override the normal buildcolutils as 269 # necessary 270 my $function_to_subclass_mappings = {}; 271 272 # Lets get the party rolling... or ball started... hmmm 277 273 &main(); 278 274 279 ## @method main() 280 # 281 # [Parses up and validates the arguments to the build process before creating 282 # the appropriate build process to do the actual work - John] 283 # 284 # @note Added true incremental support - John Thompson, DL Consulting Ltd. 285 # @note There were several bugs regarding using directories other than 286 # "import" or "archives" during import and build quashed. - John 287 # Thompson, DL Consulting Ltd. 288 # 289 # @param $incremental If true indicates this build should not regenerate all 290 # the index and metadata files, and should instead just 291 # append the information found in the archives directory 292 # to the existing files. If this requires some complex 293 # work so as to correctly insert into a classifier so be 294 # it. Of course none of this is done here - instead the 295 # incremental argument is passed to the document 296 # processor. 297 # 275 exit; 276 298 277 sub main 299 278 { 300 # command line args 301 my ($verbosity, $archivedir, $cachedir, $builddir, $site, $maxdocs, 302 $debug, $mode, $indexname, $removeold, $keepold, 303 $incremental, $incremental_mode, 304 $remove_empty_classifications, 305 $collectdir, $build, $type, $textindex, 306 $no_strip_html, $store_metadata_coverage, 307 $no_text, $faillog, $gli, $index, $language, 308 $sections_index_document_metadata, $maxnumeric, $activate); 309 310 my $xml = 0; 311 my $hashParsingResult = {}; 312 # general options available to all plugins 313 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 314 315 # If parse returns -1 then something has gone wrong 316 if ($intArgLeftinAfterParsing == -1) 279 # Dynamically include arguments from any subclasses of buildcolutils we find 280 # in the extensions directory 281 if (defined $ENV{'GSDLEXTS'}) 282 { 283 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'}); 284 } 285 if (defined $ENV{'GSDL3EXTS'}) 286 { 287 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'}); 288 } 289 290 # Loop through arguments, checking to see if any depend on a specific 291 # subclass of buildcolutils. Note that we load the first subclass we 292 # encounter so only support a single 'override' ATM. 293 my $subclass; 294 foreach my $argument (@ARGV) 295 { 296 # proper arguments start with a hyphen 297 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument}) 317 298 { 318 &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 319 die "\n"; 299 my $required_subclass = $function_to_subclass_mappings->{$argument}; 300 if (!defined $subclass) 301 { 302 $subclass = $required_subclass; 303 } 304 # Oh noes! The user has included specific arguments from two different 305 # subclasses... this isn't supported 306 elsif ($subclass ne $required_subclass) 307 { 308 print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n"; 309 exit; 310 } 320 311 } 321 322 foreach my $strVariable (keys %$hashParsingResult) 312 } 313 314 my $buildcolutils; 315 if (defined $subclass) 316 { 317 print "* Loading overriding buildcolutils module: " . $subclass . "\n"; 318 require $subclass . '.pm'; 319 $buildcolutils = new $subclass(\@ARGV, $options); 320 } 321 # We don't have a overridden buildcolutils, or the above command failed 322 # somehow so load the base class 323 if (!defined $buildcolutils) 324 { 325 $buildcolutils = new buildcolutils(\@ARGV, $options); 326 } 327 328 my $collection = $buildcolutils->get_collection(); 329 if (defined $collection) 330 { 331 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options); 332 $buildcolutils->set_collection_options($collect_cfg); 333 334 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg); 335 $buildcolutils->build_collection($builders_ref); 336 $buildcolutils->build_auxiliary_files($builders_ref); 337 $buildcolutils->complete_builders($builders_ref); 338 339 # The user may have requested the collection be activated 340 $buildcolutils->activate_collection(); 341 } 342 343 # Cleanup 344 $buildcolutils->deinit(); 345 } 346 # main() 347 348 # @function _scanForSubclasses() 349 # @param $dir The extension directory to look within 350 # @param $exts A list of the available extensions (as a colon separated string) 351 # @return The number of subclasses of buildcolutils found as an Integer 352 sub _scanForSubclasses 353 { 354 my ($dir, $exts) = @_; 355 my $class_count = 0; 356 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext"); 357 my @extensions = split(/:/, $exts); 358 foreach my $e (@extensions) 359 { 360 # - any subclass must be prefixed with the name of the ext 361 my $package_name = $e . 'buildcolutils'; 362 $package_name =~ s/[^a-z]//gi; # package names have limited characters 363 my $file_name = $package_name . '.pm'; 364 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name); 365 # see if we have a subclass lurking in that extension folder 366 if (&FileUtils::fileExists($file_path)) 323 367 { 324 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 368 # - note we load the filename (with pm) unlike normal modules 369 require $file_name; 370 # - make call to the newly created package 371 my $symbol = qualify('getSupportedArguments', $package_name); 372 # - strict prevents strings being used as function calls, so temporarily 373 # disable that pragma 374 no strict; 375 # - lets check that the function we are about to call actually exists 376 if ( defined &{$symbol} ) 377 { 378 my $extra_arguments = &{$symbol}(); 379 foreach my $argument (@{$extra_arguments}) 380 { 381 # - record a mapping from each extra arguments to the subclass 382 # that supports it. We put the hyphen on here to make comparing 383 # with command line arguments even easier 384 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name; 385 # - and them add them as acceptable arguments to import.pl 386 push(@{$options->{'args'}}, $argument); 387 } 388 $class_count++; 389 } 390 else 391 { 392 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n"; 393 } 325 394 } 326 327 # If $language has been specified, load the appropriate resource bundle 328 # (Otherwise, the default resource bundle will be loaded automatically) 329 if ($language && $language =~ /\S/) { 330 &gsprintf::load_language_specific_resource_bundle($language); 331 } 332 333 if ($xml) { 334 &PrintUsage::print_xml_usage($options); 335 print "\n"; 336 return; 337 } 338 339 if ($gli) { # the gli wants strings to be in UTF-8 340 &gsprintf::output_strings_in_UTF8; 341 } 342 343 # now check that we had exactly one leftover arg, which should be 344 # the collection name. We don't want to do this earlier, cos 345 # -xml arg doesn't need a collection name 346 # Or if the user specified -h, then we output the usage also 347 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 348 { 349 &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 350 die "\n"; 351 } 352 353 $textindex = ""; 354 my $close_out = 0; 355 if ($out !~ /^(STDERR|STDOUT)$/i) { 356 open (OUT, ">$out") || 357 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die); 358 $out = "buildcol::OUT"; 359 $close_out = 1; 360 } 361 $out->autoflush(1); 362 363 # get and check the collection 364 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") { 365 &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 366 die "\n"; 367 } 368 369 if ($faillog eq "") { 370 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 371 } 372 # note that we're appending to the faillog here (import.pl clears it each time) 373 # this could potentially create a situation where the faillog keeps being added 374 # to over multiple builds (if the import process is being skipped) 375 open (FAILLOG, ">>$faillog") || 376 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die); 377 $faillog = 'buildcol::FAILLOG'; 378 $faillog->autoflush(1); 379 380 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 381 # Don't know why this didn't already happen, but now collection specific 382 # classify and plugins directory also added to include path 383 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/classify"); 384 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins"); 385 386 # Read in the collection configuration file. 387 my ($collectcfg, $buildtype, $orthogonalbuildtypes); 388 if ((defined $site) && ($site ne "")) { # GS3 389 $gs_mode = "gs3"; 390 } 391 $configfilename = &colcfg::get_collect_cfg_name($out, $gs_mode); 392 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 393 394 # If the infodbtype value wasn't defined in the collect.cfg file, use the default 395 if (!defined($collectcfg->{'infodbtype'})) 396 { 397 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type(); 398 } 399 400 if ($verbosity !~ /\d+/) { 401 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 402 $verbosity = $collectcfg->{'verbosity'}; 403 } else { 404 $verbosity = 2; # the default 405 } 406 } 407 # we use searchtype for determining buildtype, but for old versions, use buildtype 408 if (defined $collectcfg->{'buildtype'}) { 409 $buildtype = $collectcfg->{'buildtype'}; 410 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) { 411 $buildtype = "mgpp"; 412 } else { 413 $buildtype = "mg"; #mg is the default 414 } 415 416 if (defined $collectcfg->{'orthogonalbuildtypes'}) { 417 $orthogonalbuildtypes = $collectcfg->{'orthogonalbuildtypes'}; 418 } 419 420 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") { 421 $archivedir = $collectcfg->{'archivedir'}; 422 } 423 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") { 424 $cachedir = $collectcfg->{'cachedir'}; 425 } 426 if (defined $collectcfg->{'builddir'} && $builddir eq "") { 427 $builddir = $collectcfg->{'builddir'}; 428 } 429 if ($maxdocs !~ /\-?\d+/) { 430 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 431 $maxdocs = $collectcfg->{'maxdocs'}; 432 } else { 433 $maxdocs = -1; # the default 434 } 435 } 436 if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/) { 437 $maxnumeric = $collectcfg->{'maxnumeric'}; 438 } 439 440 if ($maxnumeric < 4 || $maxnumeric > 512) { 441 $maxnumeric = 4; 442 } 443 444 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 445 $debug = 1; 446 } 447 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) { 448 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) { 449 $mode = $collectcfg->{'mode'}; 450 } else { 451 $mode = "all"; # the default 452 } 453 } 454 if (defined $collectcfg->{'index'} && $indexname eq "") { 455 $indexname = $collectcfg->{'index'}; 456 } 457 if (defined $collectcfg->{'no_text'} && $no_text == 0) { 458 if ($collectcfg->{'no_text'} =~ /^true$/i) { 459 $no_text = 1; 460 } 461 } 462 if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) { 463 if ($collectcfg->{'no_strip_html'} =~ /^true$/i) { 464 $no_strip_html = 1; 465 } 466 } 467 if (defined $collectcfg->{'store_metadata_coverage'} && $store_metadata_coverage == 0) { 468 if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i) { 469 $store_metadata_coverage = 1; 470 } 471 } 472 if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) { 473 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) { 474 $remove_empty_classifications = 1; 475 } 476 } 477 478 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) { 479 $textindex = $collectcfg->{'textcompress'}; 480 } 481 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 482 $gli = 1; 483 } 484 485 if ($sections_index_document_metadata !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'}) { 486 $sections_index_document_metadata = $collectcfg->{'sections_index_document_metadata'}; 487 } 488 489 if ($sections_index_document_metadata !~ /^(never|always|unless_section_metadata_exists)$/) { 490 $sections_index_document_metadata = "never"; 491 } 492 493 ($removeold, $keepold, $incremental, $incremental_mode) 494 = &scriptutil::check_removeold_and_keepold($removeold, $keepold, 495 $incremental, "building", 496 $collectcfg); 497 498 $gli = 0 unless defined $gli; 499 500 # New argument to track whether build is incremental 501 $incremental = 0 unless defined $incremental; 502 503 print STDERR "<Build>\n" if $gli; 504 505 #set the text index 506 if (($buildtype eq "mgpp") || ($buildtype eq "lucene") || ($buildtype eq "solr")) { 507 if ($textindex eq "") { 508 $textindex = "text"; 509 } 510 } 511 else { 512 $textindex = "section:text"; 513 } 514 515 # fill in the default archives and building directories if none 516 # were supplied, turn all \ into / and remove trailing / 517 518 my ($realarchivedir, $realbuilddir); 519 # Modified so that the archivedir, if provided as an argument, is made 520 # absolute if it isn't already 521 if ($archivedir eq "") 522 { 523 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 524 } 525 else 526 { 527 $archivedir = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir); 528 } 529 # End Mod 530 $archivedir =~ s/[\\\/]+/\//g; 531 $archivedir =~ s/\/$//; 532 533 if ($builddir eq "") { 534 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building"); 535 if ($incremental) { 536 &gsprintf($out, "{buildcol.incremental_default_builddir}\n"); 537 } 538 } 539 $builddir =~ s/[\\\/]+/\//g; 540 $builddir =~ s/\/$//; 541 542 # update the archive cache if needed 543 if ($cachedir) { 544 &gsprintf($out, "{buildcol.updating_archive_cache}\n") 545 if ($verbosity >= 1); 546 547 $cachedir =~ s/[\\\/]+$//; 548 $cachedir .= "/collect/$collection" unless 549 $cachedir =~ /collect\/$collection/; 550 551 $realarchivedir = "$cachedir/archives"; 552 $realbuilddir = "$cachedir/building"; 553 &util::mk_all_dir ($realarchivedir); 554 &util::mk_all_dir ($realbuilddir); 555 &util::cachedir ($archivedir, $realarchivedir, $verbosity); 556 557 } else { 558 $realarchivedir = $archivedir; 559 $realbuilddir = $builddir; 560 } 561 562 # build it in realbuilddir 563 &util::mk_all_dir ($realbuilddir); 564 565 my ($buildertype, $builderdir, $builder); 566 # if a builder class has been created for this collection, use it 567 # otherwise, use the mg or mgpp builder 568 if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") { 569 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib"; 570 $buildertype = "custombuilder"; 571 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") { 572 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 573 $buildertype = "custombuilder"; 574 } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") { 575 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 576 $buildertype = "${collection}builder"; 577 } else { 578 579 $builderdir = undef; 580 if ($buildtype ne "") { 581 # caters for extension-based build types, such as 'solr' 582 $buildertype = $buildtype."builder"; 583 } 584 else { 585 # Default to mgpp 586 $buildertype = "mgppbuilder"; 587 } 588 } 589 # check for extension specific builders 590 # (that will then be run after main builder.pm 591 592 my @builderdir_list = ($builderdir); 593 my @buildertype_list = ($buildertype); 594 595 if (defined $orthogonalbuildtypes) { 596 foreach my $obt (@$orthogonalbuildtypes) { 597 598 push(@builderdir_list,undef); # rely on @INC to find it 599 push(@buildertype_list,$obt."Builder"); 600 } 601 } 602 603 # Set up array of the main builder.pm, followed by any ones 604 # from the extension folders 605 606 my $num_builders = scalar(@buildertype_list); 607 my @builders = (); 608 609 for (my $i=0; $i<$num_builders; $i++) { 610 my $this_builder; 611 my $this_buildertype = $buildertype_list[$i]; 612 my $this_builderdir = $builderdir_list[$i]; 613 614 if ((defined $this_builderdir) && ($this_builderdir ne "")) { 615 require "$this_builderdir/$this_buildertype.pm"; 616 } 617 else { 618 require "$this_buildertype.pm"; 619 } 620 621 eval("\$this_builder = new $this_buildertype(\$site, \$collection, " . 622 "\$realarchivedir, \$realbuilddir, \$verbosity, " . 623 "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " . 624 "\$remove_empty_classifications, " . 625 "\$out, \$no_text, \$faillog, \$gli)"); 626 die "$@" if $@; 627 628 push(@builders,$this_builder); 629 } 630 631 # Init phase for builders 632 for (my $i=0; $i<$num_builders; $i++) { 633 my $this_buildertype = $buildertype_list[$i]; 634 my $this_builderdir = $builderdir_list[$i]; 635 my $this_builder = $builders[$i]; 636 637 $this_builder->init(); 638 $this_builder->set_maxnumeric($maxnumeric); 639 640 if (($this_buildertype eq "mgppbuilder") && $no_strip_html) { 641 $this_builder->set_strip_html(0); 642 } 643 644 if ($sections_index_document_metadata ne "never") { 645 $this_builder->set_sections_index_document_metadata($sections_index_document_metadata); 646 } 647 648 if ($store_metadata_coverage) { 649 $this_builder->set_store_metadata_coverage(1); 650 } 651 } 652 653 # Run the requested passes 654 655 if ($mode =~ /^all$/i) { 656 657 # 'map' modifies the elements of the original array, so calling 658 # methods -- as done below -- will cause (by default) @builders 659 # to be changed to whatever these functions return (which is *not* 660 # what we want -- we want to leave the values unchanged) 661 # => Use 'local' (dynamic scoping) to give each 'map' call its 662 # own local copy This could also be done with: 663 # (my $new =$_)->method(); $new 664 # but is a bit more cumbersome to write 665 666 map { local $_=$_; $_->compress_text($textindex); } @builders; 667 map { local $_=$_; $_->build_indexes($indexname); } @builders; 668 map { local $_=$_; $_->make_infodatabase(); } @builders; 669 map { local $_=$_; $_->collect_specific(); } @builders; 670 } elsif ($mode =~ /^compress_text$/i) { 671 map { local $_=$_; $_->compress_text($textindex); } @builders; 672 } elsif ($mode =~ /^build_index$/i) { 673 map { local $_=$_; $_->build_indexes($indexname); } @builders; 674 } elsif ($mode =~ /^infodb$/i) { 675 map { local $_=$_; $_->make_infodatabase(); } @builders; 676 } else { 677 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die); 678 } 679 680 if (!$debug) { 681 map {local $_=$_; $_->make_auxiliary_files(); } @builders; 682 } 683 map {local $_=$_; $_->deinit(); } @builders; 684 685 if (($realbuilddir ne $builddir) && !$debug) { 686 &gsprintf($out, "{buildcol.copying_back_cached_build}\n") 687 if ($verbosity >= 1); 688 &util::rm_r ($builddir); 689 &util::cp_r ($realbuilddir, $builddir); 690 } 691 692 693 # for RSS support: Need rss-items.rdf file in index folder 694 # check if a file called rss-items.rdf exists in archives, then copy it into the building folder 695 # so that when building is moved to index, this file will then also be in index as desired 696 my $collection_dir = &util::resolve_collection_dir($collectdir, $collection, $site); 697 my $rss_items_rdf_file = &util::filename_cat($archivedir, "rss-items.rdf"); 698 if(defined $builddir && -d $builddir && -f $rss_items_rdf_file) { 699 &gsprintf($out, "{buildcol.copying_rss_items_rdf}\n") if ($verbosity >= 1); 700 &util::cp ($rss_items_rdf_file, $builddir); 701 } 702 703 # if buildcol.pl was run with -activate, need to run activate.pl 704 # now that building's complete 705 if($activate) { 706 707 #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV); 708 709 my @activate_argv = (); 710 push(@activate_argv,"-collectdir",$collectdir) if($collectdir); 711 push(@activate_argv,"-builddir",$builddir) if($builddir); 712 push(@activate_argv,"-site",$site) if($site); 713 push(@activate_argv,"-verbosity",$verbosity) if($verbosity); 714 push(@activate_argv,"-removeold") if($removeold); 715 push(@activate_argv,"-keepold") if($keepold); 716 push(@activate_argv,"-incremental") if($incremental); 717 my $quoted_argv = join(" ", map { "\"$_\"" } @activate_argv); 718 719 my $activatecol_cmd = "\"".&util::get_perl_exec()."\" -S activate.pl $quoted_argv \"$collection\""; 720 721 my $activatecol_status = system($activatecol_cmd)/256; 722 723 if ($activatecol_status != 0) { 724 print STDERR "Error: Failed to run: $activatecol_cmd\n"; 725 print STDERR " $!\n" if ($! ne ""); 726 exit(-1); 727 } 728 } 729 730 close OUT if $close_out; 731 close FAILLOG; 732 733 print STDERR "</Build>\n" if $gli; 395 } 396 return $class_count; 734 397 } 735 736 737 398 # _scanForSubclasses()
Note:
See TracChangeset
for help on using the changeset viewer.