Changeset 27305

Show
Ignore:
Timestamp:
06.05.2013 15:27:37 (6 years ago)
Author:
jmt12
Message:

Add code to allow importing and building to load overriding versions of inexport.pm and buildcolutils.pm from extensions at runtime. When an extension provides a possible override, Greenstone will dynamically detect and add additional options (visible in the --help). When a user specifies one of these options the appropriate inexport/buildcolutils subclass will be loaded

Location:
main/trunk/greenstone2/bin/script
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/buildcol.pl

    r26976 r27305  
    44# 
    55# buildcol.pl -- 
     6# 
    67# A component of the Greenstone digital library software 
    7 # from the New Zealand Digital Library Project at the  
     8# from the New Zealand Digital Library Project at the 
    89# University of Waikato, New Zealand. 
    910# 
     
    2627########################################################################### 
    2728 
    28  
    2929# This program will build a particular collection. 
    30  
    3130package buildcol; 
    3231 
    33 BEGIN { 
    34     die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 
    35     die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; 
    36     unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); 
    37     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 
    38     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath"); 
    39     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); 
    40     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); 
    41  
    42     if (defined $ENV{'GSDL-RUN-SETUP'}) { 
    43     require util; 
    44     &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'}); 
     32# Environment 
     33BEGIN 
     34{ 
     35  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 
     36  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; 
     37  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib'); 
     38  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan'); 
     39  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath'); 
     40  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins'); 
     41  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify'); 
     42 
     43  if (defined $ENV{'GSDL-RUN-SETUP'}) 
     44  { 
     45    require util; 
     46    &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'}); 
     47  } 
     48 
     49  if (defined $ENV{'GSDLEXTS'}) 
     50  { 
     51    my @extensions = split(/:/, $ENV{'GSDLEXTS'}); 
     52    foreach my $e (@extensions) 
     53    { 
     54      my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e; 
     55 
     56      unshift(@INC, $ext_prefix . '/perllib'); 
     57      unshift(@INC, $ext_prefix . '/perllib/cpan'); 
     58      unshift(@INC, $ext_prefix . '/perllib/plugins'); 
     59      unshift(@INC, $ext_prefix . '/perllib/classify'); 
    4560    } 
    46      
    47     if (defined $ENV{'GSDLEXTS'}) { 
    48     my @extensions = split(/:/,$ENV{'GSDLEXTS'}); 
    49     foreach my $e (@extensions) { 
    50         my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e"; 
    51  
    52         unshift (@INC, "$ext_prefix/perllib"); 
    53         unshift (@INC, "$ext_prefix/perllib/cpan"); 
    54         unshift (@INC, "$ext_prefix/perllib/plugins"); 
    55         unshift (@INC, "$ext_prefix/perllib/classify"); 
    56     } 
     61  } 
     62  if (defined $ENV{'GSDL3EXTS'}) 
     63  { 
     64    my @extensions = split(/:/, $ENV{'GSDL3EXTS'}); 
     65    foreach my $e (@extensions) 
     66    { 
     67      my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e; 
     68 
     69      unshift(@INC, $ext_prefix . '/perllib'); 
     70      unshift(@INC, $ext_prefix . '/perllib/cpan'); 
     71      unshift(@INC, $ext_prefix . '/perllib/plugins'); 
     72      unshift(@INC, $ext_prefix . '/perllib/classify'); 
    5773    } 
    58     if (defined $ENV{'GSDL3EXTS'}) { 
    59     my @extensions = split(/:/,$ENV{'GSDL3EXTS'}); 
    60     foreach my $e (@extensions) { 
    61         my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e"; 
    62  
    63         unshift (@INC, "$ext_prefix/perllib"); 
    64         unshift (@INC, "$ext_prefix/perllib/cpan"); 
    65         unshift (@INC, "$ext_prefix/perllib/plugins"); 
    66         unshift (@INC, "$ext_prefix/perllib/classify"); 
    67     } 
    68     } 
    69  
     74  } 
    7075} 
    7176 
    72 use colcfg; 
    73 use dbutil; 
    74 use util; 
    75 use scriptutil; 
    76 use FileHandle; 
    77 use gsprintf; 
    78 use printusage; 
    79 use parse2; 
    80  
     77# Pragma 
    8178use strict; 
    8279no strict 'refs'; # allow filehandles to be variables and vice versa 
    8380no strict 'subs'; # allow barewords (eg STDERR) as function arguments 
    8481 
    85  
     82# Modules 
     83use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12] 
     84 
     85# Greenstone Modules 
     86use buildcolutils; 
     87use FileUtils; 
     88use util; 
     89 
     90# Globals 
     91# - build up arguments list/control 
    8692my $mode_list = 
    8793    [ { 'name' => "all", 
     
    94100        'desc' => "{buildcol.mode.infodb}" } ]; 
    95101 
    96 my $sec_index_list =  
     102my $sec_index_list = 
    97103    [ {'name' => "never", 
    98104       'desc' => "{buildcol.sections_index_document_metadata.never}" }, 
     
    127133    'type' => "string", 
    128134    # parsearg left "" as default 
    129     #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 
     135    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"), 
    130136    'reqd' => "no", 
    131137        'hiddengli' => "yes" }, 
     
    145151    'type' => "string", 
    146152    # parsearg left "" as default 
    147     #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"), 
     153    #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"), 
    148154    'reqd' => "no", 
    149155    'modegli' => "3" }, 
     
    243249    'reqd' => "no", 
    244250    'hiddengli' => "yes" }, 
     251      { 'name' => "indexname", 
     252    'desc' => "{buildcol.index}", 
     253    'type' => "string", 
     254    'reqd' => "no", 
     255    'modegli' => "3" }, 
     256      { 'name' => "indexlevel", 
     257    'desc' => "{buildcol.indexlevel}", 
     258    'type' => "string", 
     259    'reqd' => "no", 
     260    'modegli' => "3" }, 
    245261      ]; 
    246262 
     
    249265        'args' => $arguments }; 
    250266 
    251  
    252 # globals 
    253 my $collection; 
    254 my $configfilename; 
    255 my $out; 
    256  
    257 # used to signify "gs2"(default) or "gs3" 
    258 my $gs_mode = "gs2"; 
    259  
    260 ## @method gsprintf() 
    261 #  Print a string to the screen after looking it up from a locale dependant 
    262 #  strings file. This function is losely based on the idea of resource 
    263 #  bundles as used in Java. 
    264 # 
    265 #  @param  $error The STDERR stream. 
    266 #  @param  $text The string containing GS keys that should be replaced with 
    267 #                their locale dependant equivilents. 
    268 #  @param  $out The output stream. 
    269 #  @return The locale-based string to output. 
    270 # 
    271 sub gsprintf() 
    272 { 
    273     return &gsprintf::gsprintf(@_); 
    274 } 
    275 ## gsprintf() ## 
    276  
     267# The hash maps between argument and the buildcolutils subclass supporting that 
     268# argument - allowing for extensions to override the normal buildcolutils as 
     269# necessary 
     270my $function_to_subclass_mappings = {}; 
     271 
     272# Lets get the party rolling... or ball started... hmmm 
    277273&main(); 
    278274 
    279 ## @method main() 
    280 # 
    281 #  [Parses up and validates the arguments to the build process before creating 
    282 #  the appropriate build process to do the actual work - John] 
    283 # 
    284 #  @note Added true incremental support - John Thompson, DL Consulting Ltd. 
    285 #  @note There were several bugs regarding using directories other than  
    286 #        "import" or "archives" during import and build quashed. - John  
    287 #        Thompson, DL Consulting Ltd. 
    288 # 
    289 #  @param  $incremental If true indicates this build should not regenerate all 
    290 #                       the index and metadata files, and should instead just 
    291 #                       append the information found in the archives directory 
    292 #                       to the existing files. If this requires some complex 
    293 #                       work so as to correctly insert into a classifier so be 
    294 #                       it. Of course none of this is done here - instead the 
    295 #                       incremental argument is passed to the document 
    296 #                       processor. 
    297 # 
     275exit; 
     276 
    298277sub main 
    299278{ 
    300     # command line args 
    301     my ($verbosity, $archivedir, $cachedir, $builddir, $site, $maxdocs,  
    302     $debug, $mode, $indexname, $removeold, $keepold,  
    303     $incremental, $incremental_mode, 
    304     $remove_empty_classifications, 
    305     $collectdir, $build, $type, $textindex, 
    306     $no_strip_html, $store_metadata_coverage, 
    307     $no_text, $faillog, $gli, $index, $language, 
    308     $sections_index_document_metadata, $maxnumeric, $activate); 
    309      
    310     my $xml = 0; 
    311     my $hashParsingResult = {}; 
    312     # general options available to all plugins 
    313     my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options"); 
    314  
    315     # If parse returns -1 then something has gone wrong 
    316     if ($intArgLeftinAfterParsing == -1) 
     279  # Dynamically include arguments from any subclasses of buildcolutils we find 
     280  # in the extensions directory 
     281  if (defined $ENV{'GSDLEXTS'}) 
     282  { 
     283    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'}); 
     284  } 
     285  if (defined $ENV{'GSDL3EXTS'}) 
     286  { 
     287    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'}); 
     288  } 
     289 
     290  # Loop through arguments, checking to see if any depend on a specific 
     291  # subclass of buildcolutils. Note that we load the first subclass we 
     292  # encounter so only support a single 'override' ATM. 
     293  my $subclass; 
     294  foreach my $argument (@ARGV) 
     295  { 
     296    # proper arguments start with a hyphen 
     297    if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument}) 
    317298    { 
    318     &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 
    319     die "\n"; 
     299      my $required_subclass = $function_to_subclass_mappings->{$argument}; 
     300      if (!defined $subclass) 
     301      { 
     302        $subclass = $required_subclass; 
     303      } 
     304      # Oh noes! The user has included specific arguments from two different 
     305      # subclasses... this isn't supported 
     306      elsif ($subclass ne $required_subclass) 
     307      { 
     308        print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n"; 
     309        exit; 
     310      } 
    320311    } 
    321      
    322     foreach my $strVariable (keys %$hashParsingResult) 
     312  } 
     313 
     314  my $buildcolutils; 
     315  if (defined $subclass) 
     316  { 
     317    print "* Loading overriding buildcolutils module: " . $subclass . "\n"; 
     318    require $subclass . '.pm'; 
     319    $buildcolutils = new $subclass(\@ARGV, $options); 
     320  } 
     321  # We don't have a overridden buildcolutils, or the above command failed 
     322  # somehow so load the base class 
     323  if (!defined $buildcolutils) 
     324  { 
     325    $buildcolutils = new buildcolutils(\@ARGV, $options); 
     326  } 
     327 
     328  my $collection = $buildcolutils->get_collection(); 
     329  if (defined $collection) 
     330  { 
     331    my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options); 
     332    $buildcolutils->set_collection_options($collect_cfg); 
     333 
     334    my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg); 
     335    $buildcolutils->build_collection($builders_ref); 
     336    $buildcolutils->build_auxiliary_files($builders_ref); 
     337    $buildcolutils->complete_builders($builders_ref); 
     338 
     339    # The user may have requested the collection be activated 
     340    $buildcolutils->activate_collection(); 
     341  } 
     342 
     343  # Cleanup 
     344  $buildcolutils->deinit(); 
     345} 
     346# main() 
     347 
     348# @function _scanForSubclasses() 
     349# @param $dir The extension directory to look within 
     350# @param $exts A list of the available extensions (as a colon separated string) 
     351# @return The number of subclasses of buildcolutils found as an Integer 
     352sub _scanForSubclasses 
     353{ 
     354  my ($dir, $exts) = @_; 
     355  my $class_count = 0; 
     356  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext"); 
     357  my @extensions = split(/:/, $exts); 
     358  foreach my $e (@extensions) 
     359  { 
     360    # - any subclass must be prefixed with the name of the ext 
     361    my $package_name = $e . 'buildcolutils'; 
     362    $package_name =~ s/[^a-z]//gi; # package names have limited characters 
     363    my $file_name = $package_name . '.pm'; 
     364    my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name); 
     365    # see if we have a subclass lurking in that extension folder 
     366    if (&FileUtils::fileExists($file_path)) 
    323367    { 
    324     eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}"; 
     368      # - note we load the filename (with pm) unlike normal modules 
     369      require $file_name; 
     370      # - make call to the newly created package 
     371      my $symbol = qualify('getSupportedArguments', $package_name); 
     372      # - strict prevents strings being used as function calls, so temporarily 
     373      #   disable that pragma 
     374      no strict; 
     375      # - lets check that the function we are about to call actually exists 
     376      if ( defined &{$symbol} ) 
     377      { 
     378        my $extra_arguments = &{$symbol}(); 
     379        foreach my $argument (@{$extra_arguments}) 
     380        { 
     381          # - record a mapping from each extra arguments to the subclass 
     382          #   that supports it. We put the hyphen on here to make comparing 
     383          #   with command line arguments even easier 
     384          $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name; 
     385          # - and them add them as acceptable arguments to import.pl 
     386          push(@{$options->{'args'}}, $argument); 
     387        } 
     388        $class_count++; 
     389      } 
     390      else 
     391      { 
     392        print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n"; 
     393      } 
    325394    } 
    326  
    327     # If $language has been specified, load the appropriate resource bundle 
    328     # (Otherwise, the default resource bundle will be loaded automatically) 
    329     if ($language && $language =~ /\S/) { 
    330     &gsprintf::load_language_specific_resource_bundle($language); 
    331     } 
    332  
    333     if ($xml) { 
    334         &PrintUsage::print_xml_usage($options); 
    335     print "\n"; 
    336     return; 
    337     } 
    338  
    339     if ($gli) { # the gli wants strings to be in UTF-8 
    340     &gsprintf::output_strings_in_UTF8;  
    341     } 
    342  
    343     # now check that we had exactly one leftover arg, which should be  
    344     # the collection name. We don't want to do this earlier, cos  
    345     # -xml arg doesn't need a collection name 
    346     # Or if the user specified -h, then we output the usage also 
    347     if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/)) 
    348     { 
    349     &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 
    350     die "\n"; 
    351     } 
    352      
    353     $textindex = ""; 
    354     my $close_out = 0; 
    355     if ($out !~ /^(STDERR|STDOUT)$/i) { 
    356     open (OUT, ">$out") || 
    357         (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die); 
    358     $out = "buildcol::OUT"; 
    359     $close_out = 1; 
    360     } 
    361     $out->autoflush(1); 
    362  
    363     # get and check the collection 
    364     if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") { 
    365     &PrintUsage::print_txt_usage($options, "{buildcol.params}"); 
    366     die "\n"; 
    367     } 
    368  
    369     if ($faillog eq "") { 
    370     $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 
    371     } 
    372     # note that we're appending to the faillog here (import.pl clears it each time) 
    373     # this could potentially create a situation where the faillog keeps being added  
    374     # to over multiple builds (if the import process is being skipped) 
    375     open (FAILLOG, ">>$faillog") || 
    376     (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die); 
    377     $faillog = 'buildcol::FAILLOG'; 
    378     $faillog->autoflush(1); 
    379  
    380     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 
    381     # Don't know why this didn't already happen, but now collection specific 
    382     # classify and plugins directory also added to include path 
    383     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/classify"); 
    384     unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins"); 
    385  
    386     # Read in the collection configuration file. 
    387     my ($collectcfg, $buildtype, $orthogonalbuildtypes);  
    388     if ((defined $site) && ($site ne "")) { # GS3 
    389     $gs_mode = "gs3"; 
    390     } 
    391     $configfilename = &colcfg::get_collect_cfg_name($out, $gs_mode); 
    392     $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode); 
    393      
    394     # If the infodbtype value wasn't defined in the collect.cfg file, use the default 
    395     if (!defined($collectcfg->{'infodbtype'})) 
    396     { 
    397       $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type(); 
    398     } 
    399  
    400     if ($verbosity !~ /\d+/) { 
    401     if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 
    402         $verbosity = $collectcfg->{'verbosity'}; 
    403     } else { 
    404         $verbosity = 2; # the default 
    405     } 
    406     } 
    407     # we use searchtype for determining buildtype, but for old versions, use buildtype 
    408     if (defined $collectcfg->{'buildtype'}) { 
    409     $buildtype = $collectcfg->{'buildtype'}; 
    410     } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) { 
    411     $buildtype = "mgpp"; 
    412     } else { 
    413     $buildtype = "mg"; #mg is the default 
    414     } 
    415  
    416     if (defined $collectcfg->{'orthogonalbuildtypes'}) { 
    417     $orthogonalbuildtypes = $collectcfg->{'orthogonalbuildtypes'}; 
    418     } 
    419  
    420     if (defined $collectcfg->{'archivedir'} && $archivedir eq "") { 
    421     $archivedir = $collectcfg->{'archivedir'}; 
    422     } 
    423     if (defined $collectcfg->{'cachedir'} && $cachedir eq "") { 
    424     $cachedir = $collectcfg->{'cachedir'}; 
    425     } 
    426     if (defined $collectcfg->{'builddir'} && $builddir eq "") { 
    427     $builddir = $collectcfg->{'builddir'}; 
    428     } 
    429     if ($maxdocs !~ /\-?\d+/) { 
    430     if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 
    431         $maxdocs = $collectcfg->{'maxdocs'}; 
    432     } else { 
    433         $maxdocs = -1; # the default 
    434     } 
    435     } 
    436     if (defined $collectcfg->{'maxnumeric'} && $collectcfg->{'maxnumeric'} =~ /\d+/) { 
    437     $maxnumeric = $collectcfg->{'maxnumeric'}; 
    438     }  
    439      
    440     if ($maxnumeric < 4 || $maxnumeric > 512) { 
    441     $maxnumeric = 4; 
    442     } 
    443      
    444     if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 
    445     $debug = 1; 
    446     } 
    447     if ($mode !~ /^(all|compress_text|build_index|infodb)$/) { 
    448     if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) { 
    449         $mode = $collectcfg->{'mode'}; 
    450     } else { 
    451         $mode = "all"; # the default 
    452     } 
    453     } 
    454     if (defined $collectcfg->{'index'} && $indexname eq "") { 
    455     $indexname = $collectcfg->{'index'}; 
    456     } 
    457     if (defined $collectcfg->{'no_text'} && $no_text == 0) { 
    458     if ($collectcfg->{'no_text'} =~ /^true$/i) { 
    459         $no_text = 1; 
    460     } 
    461     } 
    462     if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) { 
    463     if ($collectcfg->{'no_strip_html'} =~ /^true$/i) { 
    464         $no_strip_html = 1; 
    465     } 
    466     } 
    467     if (defined $collectcfg->{'store_metadata_coverage'} && $store_metadata_coverage == 0) { 
    468     if ($collectcfg->{'store_metadata_coverage'} =~ /^true$/i) { 
    469         $store_metadata_coverage = 1; 
    470     } 
    471     } 
    472     if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) { 
    473     if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) { 
    474         $remove_empty_classifications = 1; 
    475     } 
    476     } 
    477      
    478     if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) { 
    479     $textindex = $collectcfg->{'textcompress'}; 
    480     } 
    481     if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 
    482     $gli = 1; 
    483     } 
    484  
    485     if ($sections_index_document_metadata !~ /\S/ && defined $collectcfg->{'sections_index_document_metadata'}) { 
    486     $sections_index_document_metadata = $collectcfg->{'sections_index_document_metadata'}; 
    487     } 
    488      
    489     if ($sections_index_document_metadata !~ /^(never|always|unless_section_metadata_exists)$/) { 
    490     $sections_index_document_metadata = "never"; 
    491     } 
    492      
    493     ($removeold, $keepold, $incremental, $incremental_mode)  
    494     = &scriptutil::check_removeold_and_keepold($removeold, $keepold,  
    495                            $incremental, "building",  
    496                            $collectcfg); 
    497   
    498     $gli = 0 unless defined $gli; 
    499      
    500     # New argument to track whether build is incremental 
    501     $incremental = 0 unless defined $incremental; 
    502  
    503     print STDERR "<Build>\n" if $gli; 
    504  
    505     #set the text index 
    506     if (($buildtype eq "mgpp") || ($buildtype eq "lucene") || ($buildtype eq "solr")) { 
    507     if ($textindex eq "") { 
    508         $textindex = "text"; 
    509     } 
    510     } 
    511     else { 
    512     $textindex = "section:text"; 
    513     } 
    514  
    515     # fill in the default archives and building directories if none 
    516     # were supplied, turn all \ into / and remove trailing / 
    517  
    518     my ($realarchivedir, $realbuilddir); 
    519     # Modified so that the archivedir, if provided as an argument, is made 
    520     # absolute if it isn't already 
    521     if ($archivedir eq "") 
    522       { 
    523         $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 
    524       } 
    525     else 
    526       { 
    527         $archivedir = &util::make_absolute($ENV{'GSDLCOLLECTDIR'}, $archivedir); 
    528       } 
    529     # End Mod 
    530     $archivedir =~ s/[\\\/]+/\//g; 
    531     $archivedir =~ s/\/$//; 
    532  
    533     if ($builddir eq "") { 
    534     $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building"); 
    535     if ($incremental) { 
    536         &gsprintf($out, "{buildcol.incremental_default_builddir}\n"); 
    537     } 
    538     } 
    539     $builddir =~ s/[\\\/]+/\//g; 
    540     $builddir =~ s/\/$//; 
    541  
    542     # update the archive cache if needed 
    543     if ($cachedir) { 
    544     &gsprintf($out, "{buildcol.updating_archive_cache}\n") 
    545         if ($verbosity >= 1); 
    546  
    547     $cachedir =~ s/[\\\/]+$//; 
    548     $cachedir .= "/collect/$collection" unless  
    549         $cachedir =~ /collect\/$collection/; 
    550  
    551     $realarchivedir = "$cachedir/archives"; 
    552     $realbuilddir = "$cachedir/building"; 
    553     &util::mk_all_dir ($realarchivedir); 
    554     &util::mk_all_dir ($realbuilddir); 
    555     &util::cachedir ($archivedir, $realarchivedir, $verbosity); 
    556  
    557     } else { 
    558     $realarchivedir = $archivedir; 
    559     $realbuilddir = $builddir; 
    560     } 
    561  
    562     # build it in realbuilddir 
    563     &util::mk_all_dir ($realbuilddir); 
    564  
    565     my ($buildertype, $builderdir,  $builder); 
    566     # if a builder class has been created for this collection, use it 
    567     # otherwise, use the mg or mgpp builder 
    568     if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") { 
    569     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib"; 
    570     $buildertype = "custombuilder"; 
    571     } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") { 
    572     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
    573     $buildertype = "custombuilder"; 
    574     } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") { 
    575     $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
    576     $buildertype = "${collection}builder"; 
    577     } else {     
    578  
    579     $builderdir = undef; 
    580     if ($buildtype ne "") { 
    581         # caters for extension-based build types, such as 'solr' 
    582         $buildertype = $buildtype."builder"; 
    583     } 
    584     else { 
    585         # Default to mgpp 
    586         $buildertype = "mgppbuilder"; 
    587     } 
    588     } 
    589     # check for extension specific builders  
    590     # (that will then be run after main builder.pm 
    591  
    592     my @builderdir_list = ($builderdir); 
    593     my @buildertype_list = ($buildertype); 
    594      
    595     if (defined $orthogonalbuildtypes) { 
    596     foreach my $obt (@$orthogonalbuildtypes) { 
    597  
    598         push(@builderdir_list,undef); # rely on @INC to find it 
    599         push(@buildertype_list,$obt."Builder"); 
    600     } 
    601     } 
    602  
    603     # Set up array of the main builder.pm, followed by any ones 
    604     # from the extension folders 
    605      
    606     my $num_builders = scalar(@buildertype_list); 
    607     my @builders = (); 
    608  
    609     for (my $i=0; $i<$num_builders; $i++) { 
    610     my $this_builder; 
    611     my $this_buildertype = $buildertype_list[$i]; 
    612     my $this_builderdir  = $builderdir_list[$i]; 
    613  
    614     if ((defined $this_builderdir) && ($this_builderdir ne "")) { 
    615         require "$this_builderdir/$this_buildertype.pm"; 
    616     } 
    617     else { 
    618         require "$this_buildertype.pm"; 
    619     } 
    620  
    621     eval("\$this_builder = new $this_buildertype(\$site, \$collection, " . 
    622          "\$realarchivedir, \$realbuilddir, \$verbosity, " . 
    623          "\$maxdocs, \$debug, \$keepold, \$incremental, \$incremental_mode, " . 
    624          "\$remove_empty_classifications, " . 
    625          "\$out, \$no_text, \$faillog, \$gli)"); 
    626     die "$@" if $@; 
    627  
    628     push(@builders,$this_builder); 
    629     } 
    630      
    631     # Init phase for builders 
    632     for (my $i=0; $i<$num_builders; $i++) { 
    633     my $this_buildertype = $buildertype_list[$i]; 
    634     my $this_builderdir  = $builderdir_list[$i]; 
    635     my $this_builder     = $builders[$i]; 
    636  
    637     $this_builder->init(); 
    638     $this_builder->set_maxnumeric($maxnumeric); 
    639      
    640     if (($this_buildertype eq "mgppbuilder") && $no_strip_html) { 
    641         $this_builder->set_strip_html(0); 
    642     } 
    643  
    644     if ($sections_index_document_metadata ne "never") { 
    645         $this_builder->set_sections_index_document_metadata($sections_index_document_metadata); 
    646     } 
    647      
    648     if ($store_metadata_coverage) { 
    649         $this_builder->set_store_metadata_coverage(1); 
    650     } 
    651     } 
    652          
    653     # Run the requested passes 
    654  
    655     if ($mode =~ /^all$/i) { 
    656  
    657     # 'map' modifies the elements of the original array, so calling 
    658     # methods -- as done below -- will cause (by default) @builders 
    659     # to be changed to whatever these functions return (which is *not* 
    660     # what we want -- we want to leave the values unchanged)  
    661     # => Use 'local' (dynamic scoping) to give each 'map' call its 
    662     #    own local copy This could also be done with:  
    663     #      (my $new =$_)->method(); $new  
    664     #    but is a bit more cumbersome to write 
    665      
    666     map { local $_=$_; $_->compress_text($textindex); } @builders; 
    667     map { local $_=$_; $_->build_indexes($indexname); } @builders; 
    668     map { local $_=$_; $_->make_infodatabase(); }  @builders; 
    669     map { local $_=$_; $_->collect_specific(); } @builders; 
    670     } elsif ($mode =~ /^compress_text$/i) { 
    671     map { local $_=$_; $_->compress_text($textindex); } @builders; 
    672     } elsif ($mode =~ /^build_index$/i) { 
    673     map { local $_=$_; $_->build_indexes($indexname); } @builders; 
    674     } elsif ($mode =~ /^infodb$/i) { 
    675     map { local $_=$_; $_->make_infodatabase(); } @builders; 
    676     } else { 
    677     (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die); 
    678     } 
    679  
    680     if (!$debug) { 
    681     map {local $_=$_; $_->make_auxiliary_files(); } @builders; 
    682     } 
    683     map {local $_=$_; $_->deinit(); } @builders; 
    684      
    685     if (($realbuilddir ne $builddir) && !$debug) { 
    686     &gsprintf($out, "{buildcol.copying_back_cached_build}\n") 
    687         if ($verbosity >= 1); 
    688     &util::rm_r ($builddir); 
    689     &util::cp_r ($realbuilddir, $builddir); 
    690     } 
    691  
    692  
    693     # for RSS support: Need rss-items.rdf file in index folder 
    694     #  check if a file called rss-items.rdf exists in archives, then copy it into the building folder 
    695     #  so that when building is moved to index, this file will then also be in index as desired 
    696     my $collection_dir = &util::resolve_collection_dir($collectdir, $collection, $site); 
    697     my $rss_items_rdf_file = &util::filename_cat($archivedir, "rss-items.rdf"); 
    698     if(defined $builddir && -d $builddir && -f $rss_items_rdf_file) { 
    699     &gsprintf($out, "{buildcol.copying_rss_items_rdf}\n") if ($verbosity >= 1); 
    700     &util::cp ($rss_items_rdf_file, $builddir); 
    701     } 
    702  
    703     # if buildcol.pl was run with -activate, need to run activate.pl  
    704     # now that building's complete   
    705     if($activate) { 
    706      
    707         #my $quoted_argv = join(" ", map { "\"$_\"" } @ARGV); 
    708          
    709         my @activate_argv = (); 
    710         push(@activate_argv,"-collectdir",$collectdir) if($collectdir); 
    711         push(@activate_argv,"-builddir",$builddir) if($builddir); 
    712         push(@activate_argv,"-site",$site) if($site); 
    713         push(@activate_argv,"-verbosity",$verbosity) if($verbosity); 
    714         push(@activate_argv,"-removeold") if($removeold); 
    715         push(@activate_argv,"-keepold") if($keepold); 
    716         push(@activate_argv,"-incremental") if($incremental); 
    717         my $quoted_argv = join(" ", map { "\"$_\"" } @activate_argv); 
    718          
    719         my $activatecol_cmd = "\"".&util::get_perl_exec()."\" -S activate.pl $quoted_argv \"$collection\""; 
    720          
    721         my $activatecol_status = system($activatecol_cmd)/256; 
    722  
    723         if ($activatecol_status != 0) { 
    724             print STDERR "Error: Failed to run: $activatecol_cmd\n"; 
    725             print STDERR "       $!\n" if ($! ne ""); 
    726             exit(-1); 
    727         } 
    728     } 
    729      
    730     close OUT if $close_out; 
    731     close FAILLOG; 
    732  
    733     print STDERR "</Build>\n" if $gli; 
     395  } 
     396  return $class_count; 
    734397} 
    735  
    736  
    737  
     398# _scanForSubclasses() 
  • main/trunk/greenstone2/bin/script/import.pl

    r26536 r27305  
    6767} 
    6868 
     69# Pragma 
    6970use strict; 
     71use warnings; 
     72 
     73# Modules 
     74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12] 
     75 
     76# Greenstone Modules 
     77use FileUtils; 
    7078use inexport; 
    71  
    72 my $oidtype_list =  
     79use util; 
     80 
     81my $oidtype_list = 
    7382    [ { 'name' => "hash", 
    7483        'desc' => "{import.OIDtype.hash}" }, 
     
    129138    'type' => "string", 
    130139    # parsearg left "" as default 
    131     #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 
     140    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"), 
    132141    'deft' => "", 
    133142    'reqd' => "no", 
     
    154163    'type' => "string", 
    155164    # parsearg left "" as default 
    156     #'deft' => &util::filename_cat("&lt;collectdir&gt;", "colname", "etc", "fail.log"), 
     165    #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"), 
    157166    'deft' => "", 
    158167    'reqd' => "no", 
     
    267276        'args' => $arguments }; 
    268277 
    269  
    270  
    271 sub main  
     278my $function_to_inexport_subclass_mappings = {}; 
     279 
     280sub main 
    272281{ 
    273     my $inexport = new inexport("import",\@ARGV,$options); 
    274      
    275     my $collection = $inexport->get_collection(); 
    276  
    277     if (defined $collection) { 
    278     my ($config_filename,$collect_cfg)  
    279         = $inexport->read_collection_cfg($collection,$options);     
    280  
    281     $inexport->set_collection_options($collect_cfg); 
    282      
    283     my $pluginfo = $inexport->process_files($config_filename,$collect_cfg); 
    284      
    285     $inexport->generate_statistics($pluginfo); 
     282  # Dynamically include arguments from any subclasses of inexport we find 
     283  # in the extensions directory 
     284  if (defined $ENV{'GSDLEXTS'}) 
     285  { 
     286    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'}); 
     287  } 
     288  if (defined $ENV{'GSDL3EXTS'}) 
     289  { 
     290    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'}); 
     291  } 
     292 
     293  # Loop through arguments, checking to see if any depend on a specific 
     294  # subclass of InExport. Note that we load the first subclass we encounter 
     295  # so only support a single 'override' ATM. 
     296  my $inexport_subclass; 
     297  foreach my $argument (@ARGV) 
     298  { 
     299    # proper arguments start with a hyphen 
     300    if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument}) 
     301    { 
     302      my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument}; 
     303      if (!defined $inexport_subclass) 
     304      { 
     305        $inexport_subclass = $required_inexport_subclass; 
     306      } 
     307      # Oh noes! The user has included specific arguments from two different 
     308      # inexport subclasses... this isn't supported 
     309      elsif ($inexport_subclass ne $required_inexport_subclass) 
     310      { 
     311        print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n"; 
     312        exit; 
     313      } 
    286314    } 
     315  } 
     316 
     317  my $inexport; 
     318  if (defined $inexport_subclass) 
     319  { 
     320    print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n"; 
     321    require $inexport_subclass . '.pm'; 
     322    $inexport = new $inexport_subclass("import",\@ARGV,$options); 
     323  } 
     324  # We don't have a overridden inexport, or the above command failed somehow 
     325  # so load the base inexport class 
     326  if (!defined $inexport) 
     327  { 
     328    $inexport = new inexport("import",\@ARGV,$options); 
     329  } 
     330 
     331  my $collection = $inexport->get_collection(); 
     332 
     333  if (defined $collection) 
     334  { 
     335    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options); 
     336 
     337    $inexport->set_collection_options($collect_cfg); 
     338 
     339    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg); 
     340 
     341    $inexport->generate_statistics($pluginfo); 
     342  } 
     343 
     344  $inexport->deinit(); 
    287345} 
    288  
     346# main() 
     347 
     348# @function _scanForSubclasses() 
     349# @param $dir The extension directory to look within 
     350# @param $exts A list of the available extensions (as a colon separated string) 
     351# @return The number of subclasses of InExport found as an Integer 
     352sub _scanForSubclasses 
     353{ 
     354  my ($dir, $exts) = @_; 
     355  my $inexport_class_count = 0; 
     356  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext"); 
     357  my @extensions = split(/:/, $exts); 
     358  foreach my $e (@extensions) 
     359  { 
     360    # - any subclass of InExport must be prefixed with the name of the ext 
     361    my $package_name = $e . 'inexport'; 
     362    $package_name =~ s/[^a-z]//gi; # package names have limited characters 
     363    my $inexport_filename = $package_name . '.pm'; 
     364    my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename); 
     365    # see if we have a subclass of InExport lurking in that extension folder 
     366    if (-f $inexport_path) 
     367    { 
     368      # - note we load the filename (with pm) unlike normal modules 
     369      require $inexport_filename; 
     370      # - make call to the newly created package 
     371      my $symbol = qualify('getSupportedArguments', $package_name); 
     372      # - strict prevents strings being used as function calls, so temporarily 
     373      #   disable that pragma 
     374      no strict; 
     375      # - lets check that the function we are about to call actually exists 
     376      if ( defined &{$symbol} ) 
     377      { 
     378        my $extra_arguments = &{$symbol}(); 
     379        foreach my $argument (@{$extra_arguments}) 
     380        { 
     381          # - record a mapping from each extra arguments to the inexport class 
     382          #   that supports it. We put the hyphen on here to make comparing 
     383          #   with command line arguments even easier 
     384          $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name; 
     385          # - and them add them as acceptable arguments to import.pl 
     386          push(@{$options->{'args'}}, $argument); 
     387        } 
     388        $inexport_class_count++; 
     389      } 
     390      else 
     391      { 
     392        print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n"; 
     393      } 
     394    } 
     395  } 
     396  return $inexport_class_count; 
     397} 
     398# _scanForInExportModules() 
    289399 
    290400&main();