Ignore:
Timestamp:
2013-05-06T15:27:37+12:00 (11 years ago)
Author:
jmt12
Message:

Add code to allow importing and building to load overriding versions of inexport.pm and buildcolutils.pm from extensions at runtime. When an extension provides a possible override, Greenstone will dynamically detect and add additional options (visible in the --help). When a user specifies one of these options the appropriate inexport/buildcolutils subclass will be loaded

Location:
main/trunk/greenstone2/bin/script
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/buildcol.pl

    r26976 r27305  
    44#
    55# buildcol.pl --
     6#
    67# A component of the Greenstone digital library software
    7 # from the New Zealand Digital Library Project at the 
     8# from the New Zealand Digital Library Project at the
    89# University of Waikato, New Zealand.
    910#
     
    2627###########################################################################
    2728
    28 
    2929# This program will build a particular collection.
    30 
    3130package buildcol;
    3231
    33 BEGIN {
    34     die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    35     die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
    36     unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    37     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    38     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
    39     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
    40     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
    41 
    42     if (defined $ENV{'GSDL-RUN-SETUP'}) {
    43     require util;
    44     &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
     32# Environment
     33BEGIN
     34{
     35  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
     36  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
     37  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
     38  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
     39  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
     40  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
     41  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
     42
     43  if (defined $ENV{'GSDL-RUN-SETUP'})
     44  {
     45    require util;
     46    &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
     47  }
     48
     49  if (defined $ENV{'GSDLEXTS'})
     50  {
     51    my @extensions = split(/:/, $ENV{'GSDLEXTS'});
     52    foreach my $e (@extensions)
     53    {
     54      my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
     55
     56      unshift(@INC, $ext_prefix . '/perllib');
     57      unshift(@INC, $ext_prefix . '/perllib/cpan');
     58      unshift(@INC, $ext_prefix . '/perllib/plugins');
     59      unshift(@INC, $ext_prefix . '/perllib/classify');
    4560    }
    46    
    47     if (defined $ENV{'GSDLEXTS'}) {
    48     my @extensions = split(/:/,$ENV{'GSDLEXTS'});
    49     foreach my $e (@extensions) {
    50         my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
    51 
    52         unshift (@INC, "$ext_prefix/perllib");
    53         unshift (@INC, "$ext_prefix/perllib/cpan");
    54         unshift (@INC, "$ext_prefix/perllib/plugins");
    55         unshift (@INC, "$ext_prefix/perllib/classify");
    56     }
     61  }
     62  if (defined $ENV{'GSDL3EXTS'})
     63  {
     64    my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
     65    foreach my $e (@extensions)
     66    {
     67      my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
     68
     69      unshift(@INC, $ext_prefix . '/perllib');
     70      unshift(@INC, $ext_prefix . '/perllib/cpan');
     71      unshift(@INC, $ext_prefix . '/perllib/plugins');
     72      unshift(@INC, $ext_prefix . '/perllib/classify');
    5773    }
    58     if (defined $ENV{'GSDL3EXTS'}) {
    59     my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
    60     foreach my $e (@extensions) {
    61         my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
    62 
    63         unshift (@INC, "$ext_prefix/perllib");
    64         unshift (@INC, "$ext_prefix/perllib/cpan");
    65         unshift (@INC, "$ext_prefix/perllib/plugins");
    66         unshift (@INC, "$ext_prefix/perllib/classify");
    67     }
    68     }
    69 
     74  }
    7075}
    7176
    72 use colcfg;
    73 use dbutil;
    74 use util;
    75 use scriptutil;
    76 use FileHandle;
    77 use gsprintf;
    78 use printusage;
    79 use parse2;
    80 
     77# Pragma
    8178use strict;
    8279no strict 'refs'; # allow filehandles to be variables and vice versa
    8380no strict 'subs'; # allow barewords (eg STDERR) as function arguments
    8481
    85 
     82# Modules
     83use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
     84
     85# Greenstone Modules
     86use buildcolutils;
     87use FileUtils;
     88use util;
     89
     90# Globals
     91# - build up arguments list/control
    8692my $mode_list =
    8793    [ { 'name' => "all",
     
    94100        'desc' => "{buildcol.mode.infodb}" } ];
    95101
    96 my $sec_index_list = 
     102my $sec_index_list =
    97103    [ {'name' => "never",
    98104       'desc' => "{buildcol.sections_index_document_metadata.never}" },
     
    127133    'type' => "string",
    128134    # parsearg left "" as default
    129     #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
     135    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
    130136    'reqd' => "no",
    131137        'hiddengli' => "yes" },
     
    145151    'type' => "string",
    146152    # parsearg left "" as default
    147     #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
     153    #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
    148154    'reqd' => "no",
    149155    'modegli' => "3" },
     
    243249    'reqd' => "no",
    244250    'hiddengli' => "yes" },
     251      { 'name' => "indexname",
     252    'desc' => "{buildcol.index}",
     253    'type' => "string",
     254    'reqd' => "no",
     255    'modegli' => "3" },
     256      { 'name' => "indexlevel",
     257    'desc' => "{buildcol.indexlevel}",
     258    'type' => "string",
     259    'reqd' => "no",
     260    'modegli' => "3" },
    245261      ];
    246262
     
    249265        'args' => $arguments };
    250266
    251 
    252 # globals
    253 my $collection;
    254 my $configfilename;
    255 my $out;
    256 
    257 # used to signify "gs2"(default) or "gs3"
    258 my $gs_mode = "gs2";
    259 
    260 ## @method gsprintf()
    261 #  Print a string to the screen after looking it up from a locale dependant
    262 #  strings file. This function is losely based on the idea of resource
    263 #  bundles as used in Java.
    264 #
    265 #  @param  $error The STDERR stream.
    266 #  @param  $text The string containing GS keys that should be replaced with
    267 #                their locale dependant equivilents.
    268 #  @param  $out The output stream.
    269 #  @return The locale-based string to output.
    270 #
    271 sub gsprintf()
    272 {
    273     return &gsprintf::gsprintf(@_);
    274 }
    275 ## gsprintf() ##
    276 
     267# The hash maps between argument and the buildcolutils subclass supporting that
     268# argument - allowing for extensions to override the normal buildcolutils as
     269# necessary
     270my $function_to_subclass_mappings = {};
     271
     272# Lets get the party rolling... or ball started... hmmm
    277273&main();
    278274
    279 ## @method main()
    280 #
    281 #  [Parses up and validates the arguments to the build process before creating
    282 #  the appropriate build process to do the actual work - John]
    283 #
    284 #  @note Added true incremental support - John Thompson, DL Consulting Ltd.
    285 #  @note There were several bugs regarding using directories other than
    286 #        "import" or "archives" during import and build quashed. - John
    287 #        Thompson, DL Consulting Ltd.
    288 #
    289 #  @param  $incremental If true indicates this build should not regenerate all
    290 #                       the index and metadata files, and should instead just
    291 #                       append the information found in the archives directory
    292 #                       to the existing files. If this requires some complex
    293 #                       work so as to correctly insert into a classifier so be
    294 #                       it. Of course none of this is done here - instead the
    295 #                       incremental argument is passed to the document
    296 #                       processor.
    297 #
     275exit;
     276
    298277sub main
    299278{
    300     # command line args
    301     my ($verbosity, $archivedir, $cachedir, $builddir, $site, $maxdocs,
    302     $debug, $mode, $indexname, $removeold, $keepold,
    303     $incremental, $incremental_mode,
    304     $remove_empty_classifications,
    305     $collectdir, $build, $type, $textindex,
    306     $no_strip_html, $store_metadata_coverage,
    307     $no_text, $faillog, $gli, $index, $language,
    308     $sections_index_document_metadata, $maxnumeric, $activate);
    309    
    310     my $xml = 0;
    311     my $hashParsingResult = {};
    312     # general options available to all plugins
    313     my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
    314 
    315     # If parse returns -1 then something has gone wrong
    316     if ($intArgLeftinAfterParsing == -1)
     279  # Dynamically include arguments from any subclasses of buildcolutils we find
     280  # in the extensions directory
     281  if (defined $ENV{'GSDLEXTS'})
     282  {
     283    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
     284  }
     285  if (defined $ENV{'GSDL3EXTS'})
     286  {
     287    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
     288  }
     289
     290  # Loop through arguments, checking to see if any depend on a specific
     291  # subclass of buildcolutils. Note that we load the first subclass we
     292  # encounter so only support a single 'override' ATM.
     293  my $subclass;
     294  foreach my $argument (@ARGV)
     295  {
     296    # proper arguments start with a hyphen
     297    if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
    317298    {
    318     &PrintUsage::print_txt_usage($options, "{buildcol.params}");
    319     die "\n";
     299      my $required_subclass = $function_to_subclass_mappings->{$argument};
     300      if (!defined $subclass)
     301      {
     302        $subclass = $required_subclass;
     303      }
     304      # Oh noes! The user has included specific arguments from two different
     305      # subclasses... this isn't supported
     306      elsif ($subclass ne $required_subclass)
     307      {
     308        print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
     309        exit;
     310      }
    320311    }
    321    
    322     foreach my $strVariable (keys %$hashParsingResult)
     312  }
     313
     314  my $buildcolutils;
     315  if (defined $subclass)
     316  {
     317    print "* Loading overriding buildcolutils module: " . $subclass . "\n";
     318    require $subclass . '.pm';
     319    $buildcolutils = new $subclass(\@ARGV, $options);
     320  }
     321  # We don't have a overridden buildcolutils, or the above command failed
     322  # somehow so load the base class
     323  if (!defined $buildcolutils)
     324  {
     325    $buildcolutils = new buildcolutils(\@ARGV, $options);
     326  }
     327
     328  my $collection = $buildcolutils->get_collection();
     329  if (defined $collection)
     330  {
     331    my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
     332    $buildcolutils->set_collection_options($collect_cfg);
     333
     334    my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
     335    $buildcolutils->build_collection($builders_ref);
     336    $buildcolutils->build_auxiliary_files($builders_ref);
     337    $buildcolutils->complete_builders($builders_ref);
     338
     339    # The user may have requested the collection be activated
     340    $buildcolutils->activate_collection();
     341  }
     342
     343  # Cleanup
     344  $buildcolutils->deinit();
     345}
     346# main()
     347
     348# @function _scanForSubclasses()
     349# @param $dir The extension directory to look within
     350# @param $exts A list of the available extensions (as a colon separated string)
     351# @return The number of subclasses of buildcolutils found as an Integer
     352sub _scanForSubclasses
     353{
     354  my ($dir, $exts) = @_;
     355  my $class_count = 0;
     356  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
     357  my @extensions = split(/:/, $exts);
     358  foreach my $e (@extensions)
     359  {
     360    # - any subclass must be prefixed with the name of the ext
     361    my $package_name = $e . 'buildcolutils';
     362    $package_name =~ s/[^a-z]//gi; # package names have limited characters
     363    my $file_name = $package_name . '.pm';
     364    my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
     365    # see if we have a subclass lurking in that extension folder
     366    if (&FileUtils::fileExists($file_path))
    323367    {
    324     eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
     368      # - note we load the filename (with pm) unlike normal modules
     369      require $file_name;
     370      # - make call to the newly created package
     371      my $symbol = qualify('getSupportedArguments', $package_name);
     372      # - strict prevents strings being used as function calls, so temporarily
     373      #   disable that pragma
     374      no strict;
     375      # - lets check that the function we are about to call actually exists
     376      if ( defined &{$symbol} )
     377      {
     378        my $extra_arguments = &{$symbol}();
     379        foreach my $argument (@{$extra_arguments})
     380        {
     381          # - record a mapping from each extra arguments to the subclass
     382          #   that supports it. We put the hyphen on here to make comparing
     383          #   with command line arguments even easier
     384          $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
     385          # - and them add them as acceptable arguments to import.pl
     386          push(@{$options->{'args'}}, $argument);
     387        }
     388        $class_count++;
     389      }
     390      else
     391      {
     392        print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
     393      }
    325394    }
    326 
    327     # If $language has been specified, load the appropriate resource bundle
    328     # (Otherwise, the default resource bundle will be loaded automatically)
    329     if ($language && $language =~ /\S/) {
    330     &gsprintf::load_language_specific_resource_bundle($language);
    331     }
    332 
    333     if ($xml) {
    334         &PrintUsage::print_xml_usage($options);
    335     print "\n";
    336     return;
    337     }
    338 
    339     if ($gli) { # the gli wants strings to be in UTF-8
    340     &gsprintf::output_strings_in_UTF8;
    341     }
    342 
    343     # now check that we had exactly one leftover arg, which should be
    344     # the collection name. We don't want to do this earlier, cos
    345     # -xml arg doesn't need a collection name
    346     # Or if the user specified -h, then we output the usage also
    347     if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
    348     {
    349     &PrintUsage::print_txt_usage($options, "{buildcol.params}");
    350     die "\n";
    351     }
    352    
    353     $textindex = "";
    354     my $close_out = 0;
    355     if ($out !~ /^(STDERR|STDOUT)$/i) {
    356     open (OUT, ">$out") ||
    357         (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
    358     $out = "buildcol::OUT";
    359     $close_out = 1;
    360     }
    361     $out->autoflush(1);
    362 
    363     # get and check the collection
    364     if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
    365     &PrintUsage::print_txt_usage($options, "{buildcol.params}");
    366     die "\n";
    367     }
    368 
    369     if ($faillog eq "") {
    370     $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
    371     }
    372     # note that we're appending to the faillog here (import.pl clears it each time)
    373     # this could potentially create a situation where the faillog keeps being added
    374     # to over multiple builds (if the import process is being skipped)
    375     open (FAILLOG, ">>$faillog") ||
    376     (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
    377     $faillog = 'buildcol::FAILLOG';
    378     $faillog->autoflush(1);
    379 
    380     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
    381     # Don't know why this didn't already happen, but now collection specific
    382     # classify and plugins directory also added to include path
    383     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/classify");
    384     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins");
    385 
    386     # Read in the collection configuration file.
    387     my ($collectcfg, $buildtype, $orthogonalbuildtypes);
    388     if ((defined $site) && ($site ne "")) { # GS3
    389     $gs_mode = "gs3";
    390     }
    391     $configfilename = &colcfg::get_collect_cfg_name($out, $gs_mode);
    392     $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
    393    
    394     # If the infodbtype value wasn't defined in the collect.cfg file, use the default
    395     if (!defined($collectcfg->{'infodbtype'}))
    396     {
    397       $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
    398     }
    399 
    400     if ($verbosity !~ /\d+/) {
    401     if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
    402         $verbosity = $collectcfg->{'verbosity'};
    403     } else {
    404         $verbosity = 2; # the default
    405     }
    406     }
    407     # we use searchtype for determining buildtype, but for old versions, use buildtype
    408     if (defined $collectcfg->{'buildtype'}) {
    409     $buildtype = $collectcfg->{'buildtype'};
    410     } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
    411     $buildtype = "mgpp";
    412     } else {
    413     $buildtype = "mg"; #mg is the default
    414     }
    415 
    416     if (defined $collectcfg->{'orthogonalbuildtypes'}) {
    417     $orthogonalbuildtypes = $collectcfg->{'orthogonalbuildtypes'};
    418     }
    419 
    420     if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
    421     $archivedir = $collectcfg->{'archivedir'};
    422     }
    423     if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
    424     $cachedir = $collectcfg->{'cachedir'};
    425     }
    426     if (defined $collectcfg->{'builddir'} && $builddir eq "") {
    427     $builddir = $collectcfg->{'builddir'};
    428     }
    429     if ($maxdocs !~ /\-?\d+/) {
    430     if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
    431         $maxdocs = $collectcfg->{'maxdocs'};
    432     } else {
    433         $maxdocs = -1; # the default
    434     }
    435     }
    436     if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/) {
    437     $maxnumeric = $collectcfg->{'maxnumeric'};
    438     }
    439    
    440     if ($maxnumeric < 4 || $maxnumeric > 512) {
    441     $maxnumeric = 4;
    442     }
    443    
    444     if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
    445     $debug = 1;
    446     }
    447     if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
    448     if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
    449         $mode = $collectcfg->{'mode'};
    450     } else {
    451         $mode = "all"; # the default
    452     }
    453     }
    454     if (defined $collectcfg->{'index'} && $indexname eq "") {
    455     $indexname = $collectcfg->{'index'};
    456     }
    457     if (defined $collectcfg->{'no_text'} && $no_text == 0) {
    458     if ($collectcfg->{'no_text'} =~ /^true$/i) {
    459         $no_text = 1;
    460     }
    461     }
    462     if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) {
    463     if ($collectcfg->{'no_strip_html'} =~ /^true$/i) {
    464         $no_strip_html = 1;
    465     }
    466     }
    467     if (defined $collectcfg->{'store_metadata_coverage'} && $store_metadata_coverage == 0) {
    468     if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i) {
    469         $store_metadata_coverage = 1;
    470     }
    471     }
    472     if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) {
    473     if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) {
    474         $remove_empty_classifications = 1;
    475     }
    476     }
    477    
    478     if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
    479     $textindex = $collectcfg->{'textcompress'};
    480     }
    481     if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
    482     $gli = 1;
    483     }
    484 
    485     if ($sections_index_document_metadata !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'}) {
    486     $sections_index_document_metadata = $collectcfg->{'sections_index_document_metadata'};
    487     }
    488    
    489     if ($sections_index_document_metadata !~ /^(never|always|unless_section_metadata_exists)$/) {
    490     $sections_index_document_metadata = "never";
    491     }
    492    
    493     ($removeold, $keepold, $incremental, $incremental_mode)
    494     = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
    495                            $incremental, "building",
    496                            $collectcfg);
    497  
    498     $gli = 0 unless defined $gli;
    499    
    500     # New argument to track whether build is incremental
    501     $incremental = 0 unless defined $incremental;
    502 
    503     print STDERR "<Build>\n" if $gli;
    504 
    505     #set the text index
    506     if (($buildtype eq "mgpp") || ($buildtype eq "lucene") || ($buildtype eq "solr")) {
    507     if ($textindex eq "") {
    508         $textindex = "text";
    509     }
    510     }
    511     else {
    512     $textindex = "section:text";
    513     }
    514 
    515     # fill in the default archives and building directories if none
    516     # were supplied, turn all \ into / and remove trailing /
    517 
    518     my ($realarchivedir, $realbuilddir);
    519     # Modified so that the archivedir, if provided as an argument, is made
    520     # absolute if it isn't already
    521     if ($archivedir eq "")
    522       {
    523         $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
    524       }
    525     else
    526       {
    527         $archivedir = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir);
    528       }
    529     # End Mod
    530     $archivedir =~ s/[\\\/]+/\//g;
    531     $archivedir =~ s/\/$//;
    532 
    533     if ($builddir eq "") {
    534     $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building");
    535     if ($incremental) {
    536         &gsprintf($out, "{buildcol.incremental_default_builddir}\n");
    537     }
    538     }
    539     $builddir =~ s/[\\\/]+/\//g;
    540     $builddir =~ s/\/$//;
    541 
    542     # update the archive cache if needed
    543     if ($cachedir) {
    544     &gsprintf($out, "{buildcol.updating_archive_cache}\n")
    545         if ($verbosity >= 1);
    546 
    547     $cachedir =~ s/[\\\/]+$//;
    548     $cachedir .= "/collect/$collection" unless
    549         $cachedir =~ /collect\/$collection/;
    550 
    551     $realarchivedir = "$cachedir/archives";
    552     $realbuilddir = "$cachedir/building";
    553     &util::mk_all_dir ($realarchivedir);
    554     &util::mk_all_dir ($realbuilddir);
    555     &util::cachedir ($archivedir, $realarchivedir, $verbosity);
    556 
    557     } else {
    558     $realarchivedir = $archivedir;
    559     $realbuilddir = $builddir;
    560     }
    561 
    562     # build it in realbuilddir
    563     &util::mk_all_dir ($realbuilddir);
    564 
    565     my ($buildertype, $builderdir,  $builder);
    566     # if a builder class has been created for this collection, use it
    567     # otherwise, use the mg or mgpp builder
    568     if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") {
    569     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib";
    570     $buildertype = "custombuilder";
    571     } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") {
    572     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
    573     $buildertype = "custombuilder";
    574     } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
    575     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
    576     $buildertype = "${collection}builder";
    577     } else {   
    578 
    579     $builderdir = undef;
    580     if ($buildtype ne "") {
    581         # caters for extension-based build types, such as 'solr'
    582         $buildertype = $buildtype."builder";
    583     }
    584     else {
    585         # Default to mgpp
    586         $buildertype = "mgppbuilder";
    587     }
    588     }
    589     # check for extension specific builders
    590     # (that will then be run after main builder.pm
    591 
    592     my @builderdir_list = ($builderdir);
    593     my @buildertype_list = ($buildertype);
    594    
    595     if (defined $orthogonalbuildtypes) {
    596     foreach my $obt (@$orthogonalbuildtypes) {
    597 
    598         push(@builderdir_list,undef); # rely on @INC to find it
    599         push(@buildertype_list,$obt."Builder");
    600     }
    601     }
    602 
    603     # Set up array of the main builder.pm, followed by any ones
    604     # from the extension folders
    605    
    606     my $num_builders = scalar(@buildertype_list);
    607     my @builders = ();
    608 
    609     for (my $i=0; $i<$num_builders; $i++) {
    610     my $this_builder;
    611     my $this_buildertype = $buildertype_list[$i];
    612     my $this_builderdir  = $builderdir_list[$i];
    613 
    614     if ((defined $this_builderdir) && ($this_builderdir ne "")) {
    615         require "$this_builderdir/$this_buildertype.pm";
    616     }
    617     else {
    618         require "$this_buildertype.pm";
    619     }
    620 
    621     eval("\$this_builder = new $this_buildertype(\$site, \$collection, " .
    622          "\$realarchivedir, \$realbuilddir, \$verbosity, " .
    623          "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " .
    624          "\$remove_empty_classifications, " .
    625          "\$out, \$no_text, \$faillog, \$gli)");
    626     die "$@" if $@;
    627 
    628     push(@builders,$this_builder);
    629     }
    630    
    631     # Init phase for builders
    632     for (my $i=0; $i<$num_builders; $i++) {
    633     my $this_buildertype = $buildertype_list[$i];
    634     my $this_builderdir  = $builderdir_list[$i];
    635     my $this_builder     = $builders[$i];
    636 
    637     $this_builder->init();
    638     $this_builder->set_maxnumeric($maxnumeric);
    639    
    640     if (($this_buildertype eq "mgppbuilder") && $no_strip_html) {
    641         $this_builder->set_strip_html(0);
    642     }
    643 
    644     if ($sections_index_document_metadata ne "never") {
    645         $this_builder->set_sections_index_document_metadata($sections_index_document_metadata);
    646     }
    647    
    648     if ($store_metadata_coverage) {
    649         $this_builder->set_store_metadata_coverage(1);
    650     }
    651     }
    652        
    653     # Run the requested passes
    654 
    655     if ($mode =~ /^all$/i) {
    656 
    657     # 'map' modifies the elements of the original array, so calling
    658     # methods -- as done below -- will cause (by default) @builders
    659     # to be changed to whatever these functions return (which is *not*
    660     # what we want -- we want to leave the values unchanged)
    661     # => Use 'local' (dynamic scoping) to give each 'map' call its
    662     #    own local copy This could also be done with:
    663     #      (my $new =$_)->method(); $new
    664     #    but is a bit more cumbersome to write
    665    
    666     map { local $_=$_; $_->compress_text($textindex); } @builders;
    667     map { local $_=$_; $_->build_indexes($indexname); } @builders;
    668     map { local $_=$_; $_->make_infodatabase(); }  @builders;
    669     map { local $_=$_; $_->collect_specific(); } @builders;
    670     } elsif ($mode =~ /^compress_text$/i) {
    671     map { local $_=$_; $_->compress_text($textindex); } @builders;
    672     } elsif ($mode =~ /^build_index$/i) {
    673     map { local $_=$_; $_->build_indexes($indexname); } @builders;
    674     } elsif ($mode =~ /^infodb$/i) {
    675     map { local $_=$_; $_->make_infodatabase(); } @builders;
    676     } else {
    677     (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
    678     }
    679 
    680     if (!$debug) {
    681     map {local $_=$_; $_->make_auxiliary_files(); } @builders;
    682     }
    683     map {local $_=$_; $_->deinit(); } @builders;
    684    
    685     if (($realbuilddir ne $builddir) && !$debug) {
    686     &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
    687         if ($verbosity >= 1);
    688     &util::rm_r ($builddir);
    689     &util::cp_r ($realbuilddir, $builddir);
    690     }
    691 
    692 
    693     # for RSS support: Need rss-items.rdf file in index folder
    694     #  check if a file called rss-items.rdf exists in archives, then copy it into the building folder
    695     #  so that when building is moved to index, this file will then also be in index as desired
    696     my $collection_dir = &util::resolve_collection_dir($collectdir, $collection, $site);
    697     my $rss_items_rdf_file = &util::filename_cat($archivedir, "rss-items.rdf");
    698     if(defined $builddir && -d $builddir && -f $rss_items_rdf_file) {
    699     &gsprintf($out, "{buildcol.copying_rss_items_rdf}\n") if ($verbosity >= 1);
    700     &util::cp ($rss_items_rdf_file, $builddir);
    701     }
    702 
    703     # if buildcol.pl was run with -activate, need to run activate.pl
    704     # now that building's complete 
    705     if($activate) {
    706    
    707         #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV);
    708        
    709         my @activate_argv = ();
    710         push(@activate_argv,"-collectdir",$collectdir) if($collectdir);
    711         push(@activate_argv,"-builddir",$builddir) if($builddir);
    712         push(@activate_argv,"-site",$site) if($site);
    713         push(@activate_argv,"-verbosity",$verbosity) if($verbosity);
    714         push(@activate_argv,"-removeold") if($removeold);
    715         push(@activate_argv,"-keepold") if($keepold);
    716         push(@activate_argv,"-incremental") if($incremental);
    717         my $quoted_argv = join(" ", map { "\"$_\"" } @activate_argv);
    718        
    719         my $activatecol_cmd = "\"".&util::get_perl_exec()."\" -S activate.pl $quoted_argv \"$collection\"";
    720        
    721         my $activatecol_status = system($activatecol_cmd)/256;
    722 
    723         if ($activatecol_status != 0) {
    724             print STDERR "Error: Failed to run: $activatecol_cmd\n";
    725             print STDERR "       $!\n" if ($! ne "");
    726             exit(-1);
    727         }
    728     }
    729    
    730     close OUT if $close_out;
    731     close FAILLOG;
    732 
    733     print STDERR "</Build>\n" if $gli;
     395  }
     396  return $class_count;
    734397}
    735 
    736 
    737 
     398# _scanForSubclasses()
  • main/trunk/greenstone2/bin/script/import.pl

    r26536 r27305  
    6767}
    6868
     69# Pragma
    6970use strict;
     71use warnings;
     72
     73# Modules
     74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
     75
     76# Greenstone Modules
     77use FileUtils;
    7078use inexport;
    71 
    72 my $oidtype_list =
     79use util;
     80
     81my $oidtype_list =
    7382    [ { 'name' => "hash",
    7483        'desc' => "{import.OIDtype.hash}" },
     
    129138    'type' => "string",
    130139    # parsearg left "" as default
    131     #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
     140    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
    132141    'deft' => "",
    133142    'reqd' => "no",
     
    154163    'type' => "string",
    155164    # parsearg left "" as default
    156     #'deft' => &util::filename_cat("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
     165    #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
    157166    'deft' => "",
    158167    'reqd' => "no",
     
    267276        'args' => $arguments };
    268277
    269 
    270 
    271 sub main 
     278my $function_to_inexport_subclass_mappings = {};
     279
     280sub main
    272281{
    273     my $inexport = new inexport("import",\@ARGV,$options);
    274    
    275     my $collection = $inexport->get_collection();
    276 
    277     if (defined $collection) {
    278     my ($config_filename,$collect_cfg)
    279         = $inexport->read_collection_cfg($collection,$options);   
    280 
    281     $inexport->set_collection_options($collect_cfg);
    282    
    283     my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
    284    
    285     $inexport->generate_statistics($pluginfo);
     282  # Dynamically include arguments from any subclasses of inexport we find
     283  # in the extensions directory
     284  if (defined $ENV{'GSDLEXTS'})
     285  {
     286    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
     287  }
     288  if (defined $ENV{'GSDL3EXTS'})
     289  {
     290    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
     291  }
     292
     293  # Loop through arguments, checking to see if any depend on a specific
     294  # subclass of InExport. Note that we load the first subclass we encounter
     295  # so only support a single 'override' ATM.
     296  my $inexport_subclass;
     297  foreach my $argument (@ARGV)
     298  {
     299    # proper arguments start with a hyphen
     300    if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
     301    {
     302      my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
     303      if (!defined $inexport_subclass)
     304      {
     305        $inexport_subclass = $required_inexport_subclass;
     306      }
     307      # Oh noes! The user has included specific arguments from two different
     308      # inexport subclasses... this isn't supported
     309      elsif ($inexport_subclass ne $required_inexport_subclass)
     310      {
     311        print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
     312        exit;
     313      }
    286314    }
     315  }
     316
     317  my $inexport;
     318  if (defined $inexport_subclass)
     319  {
     320    print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
     321    require $inexport_subclass . '.pm';
     322    $inexport = new $inexport_subclass("import",\@ARGV,$options);
     323  }
     324  # We don't have a overridden inexport, or the above command failed somehow
     325  # so load the base inexport class
     326  if (!defined $inexport)
     327  {
     328    $inexport = new inexport("import",\@ARGV,$options);
     329  }
     330
     331  my $collection = $inexport->get_collection();
     332
     333  if (defined $collection)
     334  {
     335    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
     336
     337    $inexport->set_collection_options($collect_cfg);
     338
     339    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
     340
     341    $inexport->generate_statistics($pluginfo);
     342  }
     343
     344  $inexport->deinit();
    287345}
    288 
     346# main()
     347
     348# @function _scanForSubclasses()
     349# @param $dir The extension directory to look within
     350# @param $exts A list of the available extensions (as a colon separated string)
     351# @return The number of subclasses of InExport found as an Integer
     352sub _scanForSubclasses
     353{
     354  my ($dir, $exts) = @_;
     355  my $inexport_class_count = 0;
     356  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
     357  my @extensions = split(/:/, $exts);
     358  foreach my $e (@extensions)
     359  {
     360    # - any subclass of InExport must be prefixed with the name of the ext
     361    my $package_name = $e . 'inexport';
     362    $package_name =~ s/[^a-z]//gi; # package names have limited characters
     363    my $inexport_filename = $package_name . '.pm';
     364    my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
     365    # see if we have a subclass of InExport lurking in that extension folder
     366    if (-f $inexport_path)
     367    {
     368      # - note we load the filename (with pm) unlike normal modules
     369      require $inexport_filename;
     370      # - make call to the newly created package
     371      my $symbol = qualify('getSupportedArguments', $package_name);
     372      # - strict prevents strings being used as function calls, so temporarily
     373      #   disable that pragma
     374      no strict;
     375      # - lets check that the function we are about to call actually exists
     376      if ( defined &{$symbol} )
     377      {
     378        my $extra_arguments = &{$symbol}();
     379        foreach my $argument (@{$extra_arguments})
     380        {
     381          # - record a mapping from each extra arguments to the inexport class
     382          #   that supports it. We put the hyphen on here to make comparing
     383          #   with command line arguments even easier
     384          $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
     385          # - and them add them as acceptable arguments to import.pl
     386          push(@{$options->{'args'}}, $argument);
     387        }
     388        $inexport_class_count++;
     389      }
     390      else
     391      {
     392        print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
     393      }
     394    }
     395  }
     396  return $inexport_class_count;
     397}
     398# _scanForInExportModules()
    289399
    290400&main();
Note: See TracChangeset for help on using the changeset viewer.