Changeset 10158


Ignore:
Timestamp:
2005-06-23T11:23:42+12:00 (19 years ago)
Author:
davidb
Message:

*builder.pm packages (principally lucenebuilder.pl which inherits from
mgppbuilder) upgraded to support incremental building.

Location:
trunk/gsdl/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/lucenebuilder.pm

    r9910 r10158  
    5858
    5959    eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
    60      "\$source_dir, \$build_dir, \$verbosity, \$outhandle)");
     60     "\$source_dir, \$build_dir, \$keepold, \$verbosity, \$outhandle)");
    6161    die "$@" if $@;
    6262
     
    217217    my $lucene_passes_sections = $llevel;
    218218
     219    my $opt_create_index = ($self->{'keepold'}) ? "" : "-create";
     220
    219221    my $osextra = "";
    220222    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
    221223    $build_dir =~ s@/@\\@g;
    222224    } else {
    223     $osextra = " -d /";
    224225    if ($outhandle ne "STDERR") {
    225226        # so lucene_passes doesn't print to stderr if we redirect output
     
    275276    } else {
    276277    if (!-e "$lucene_passes_exe" ||
    277         !open (PIPEOUT, "| $full_lucene_passes_exe index $lucene_passes_sections \"$build_dir\" \"$indexdir\"   $osextra")) {
     278        !open (PIPEOUT, "| $full_lucene_passes_exe $opt_create_index index $lucene_passes_sections \"$build_dir\" \"$indexdir\"   $osextra")) {
    278279        print STDERR "<FatalError name='NoRunLucenePasses'/>\n</Stage>\n" if $self->{'gli'};
    279280        die "lucenebuilder::build_index - couldn't run $lucene_passes_exe\n";
  • trunk/gsdl/perllib/mgbuilder.pm

    r9938 r10158  
    196196
    197197    eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
    198      "\$source_dir, \$build_dir, \$verbosity, \$outhandle)");
     198     "\$source_dir, \$build_dir, \$keepold, \$verbosity, \$outhandle)");
    199199    die "$@" if $@;
    200200
     
    922922sub deinit {
    923923    my $self = shift (@_);
     924
     925    &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
    924926}
    925927
  • trunk/gsdl/perllib/mgppbuilder.pm

    r9938 r10158  
    264264
    265265    eval("\$self->{'buildproc'} = new $buildproctype(\$collection, " .
    266      "\$source_dir, \$build_dir, \$verbosity, \$outhandle)");
     266     "\$source_dir, \$build_dir, \$keepold, \$verbosity, \$outhandle)");
    267267    die "$@" if $@;
    268268
     
    864864    #$self->{'buildproc'}->set_indexfieldmap ($self->{'indexfieldmap'});
    865865
    866     $self->{'buildproc'}->reset();
     866    # make_infodatabase does not support incremental build
     867    # => full reset needed
     868    $self->{'buildproc'}->zero_reset();
    867869
    868870    # do the collection info
     
    938940    }
    939941    print $handle $subcoll_entry;
    940      # now add language meta
     942
     943    # now add language meta
    941944    my $lang_entry = "";
    942945    foreach my $lang (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
     
    950953    }
    951954    print $handle $lang_entry;
    952    #end the collection entry
    953     print $handle "\n" . ('-' x 70) . "\n";
    954    
    955    
     955    # end the collection entry
     956    print $handle "\n" . ('-' x 70) . "\n";       
    956957   
    957958    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
     
    10231024}
    10241025
    1025 # at the end of building, we have an indexfieldmap with all teh mappings, plus
    1026 # some extras, and indexmap with any indexes in it that weren't specified in the index definition.
    1027 # we want to make an ordered list of fields that are indexed, and a list of mappings that are used. this will be used for the build.cfg file, and for collection meta definition
    1028 # we store these in a build.cfg bit
     1026# at the end of building, we have an indexfieldmap with all the mappings,
     1027# plus some extras, and indexmap with any indexes in it that weren't
     1028# specified in the index definition.  we want to make an ordered list of
     1029# fields that are indexed, and a list of mappings that are used. this will
     1030# be used for the build.cfg file, and for collection meta definition we
     1031# store these in a build.cfg bit
    10291032sub make_final_field_list {
    10301033    my $self = shift (@_);
    10311034   
    10321035    $self->{'build_cfg'} = {};
    1033 
     1036   
    10341037    # store the indexfieldmap information
    10351038    my @indexfieldmap = ();
     
    10371040    my $specifiedfields = {};
    10381041    my @specifiedfieldorder = ();
    1039     # go through the index definition and add each thing to a map, so we can easily check if it is already specified - when doing the metadata, we print out all the individual fields, but some may already be specified in the index definition, so we dont want to add those again.
     1042
     1043    # go through the index definition and add each thing to a map, so we
     1044    # can easily check if it is already specified - when doing the
     1045    # metadata, we print out all the individual fields, but some may
     1046    # already be specified in the index definition, so we dont want to add
     1047    # those again.
     1048
    10401049    foreach my $field (@{$self->{'collect_cfg'}->{'indexes'}}) {
    10411050    # remove subcoll stuff
     
    10731082    }
    10741083    }
     1084
    10751085    $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap;
    10761086    $self->{'build_cfg'}->{'indexfields'} = \@indexfields;
    1077 
    1078 
    1079 }
    1080 
    1081 
    1082 # recreate the field list from the build.cfg file, look first in building, then in index to find it. if there is no build.cfg, we cant do the field list (there is unlikely to be any index anyway.)
     1087}
     1088
     1089
     1090# recreate the field list from the build.cfg file, look first in building,
     1091# then in index to find it. if there is no build.cfg, we can't do the field
     1092# list (there is unlikely to be any index anyway.)
    10831093sub read_final_field_list {
    10841094    my $self = shift (@_);
     
    11021112    }
    11031113    }
     1114
    11041115    my $buildcfg = &colcfg::read_build_cfg( $buildconfigfile);
     1116
    11051117    if (defined $buildcfg->{'indexfields'}) {
    11061118    foreach my $field (@{$buildcfg->{'indexfields'}}) {
     
    11081120    }
    11091121    }
     1122
    11101123    if (defined $buildcfg->{'indexfieldmap'}) {
    11111124    foreach my $field (@{$buildcfg->{'indexfieldmap'}}) {
     
    11151128    }
    11161129    }       
    1117    
     1130
    11181131    $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap;
    11191132    $self->{'build_cfg'}->{'indexfields'} = \@indexfields;
    1120    
    1121 }
     1133}
     1134
    11221135sub make_auxiliary_files {
    11231136    my $self = shift (@_);
     
    11291142    $build_cfg = $self->{'build_cfg'};
    11301143    }
    1131     #my %build_cfg = ();
    11321144   
    11331145    my $outhandle =  $self->{'outhandle'};
     
    11591171    # store the number of documents and number of bytes
    11601172    $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
     1173    $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections();
    11611174    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
    11621175
     
    11841197    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
    11851198
    1186     #$build_cfg->{'notbuilt'} = $self->{'notbuilt'};
    11871199    my @notbuilt = ();
    11881200    foreach my $nb (keys %{$self->{'notbuilt'}}) {
     
    11931205    # write out the build information
    11941206    &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
    1195                  '^(builddate|buildtype|numdocs|numbytes|textlevel|indexstem)$',
     1207                 '^(builddate|buildtype|numdocs|numsections|numbytes|textlevel|indexstem)$',
    11961208                             '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields|indexlevels|levelmap)$');
    11971209
     
    12011213sub deinit {
    12021214    my $self = shift (@_);
     1215   
     1216    &plugin::deinit($self->{'pluginfo'},$self->{'buildproc'});
    12031217}
    12041218
     
    12211235
    12221236    if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) {
    1223     print $outhandle "***************\n";
    1224     if ($indexing_text) {
    1225         print $outhandle "WARNING: There is very little or no text to process for $index\n";
    1226     } elsif (!$self->{'no_text'}) {
    1227         print $outhandle "WARNING: There is very little or no text to compress\n";
    1228     }     
    1229     print $outhandle "         Was this your intention?\n";
    1230     print $outhandle "***************\n";
     1237   
     1238    if ($self->{'keepold'}) {
     1239        if ($num_processed_bytes == 0) {
     1240        if ($indexing_text) {
     1241            print $outhandle "No additional text was added to $index\n";
     1242        } elsif (!$self->{'no_text'}) {
     1243            print $outhandle "No additional text was compressed\n";
     1244        }   
     1245        }   
     1246    }
     1247    else {
     1248        print $outhandle "***************\n";
     1249        if ($indexing_text) {
     1250        print $outhandle "WARNING: There is very little or no text to process for $index\n";
     1251        } elsif (!$self->{'no_text'}) {
     1252        print $outhandle "WARNING: There is very little or no text to compress\n";
     1253        }     
     1254        print $outhandle "         Was this your intention?\n";
     1255        print $outhandle "***************\n";
     1256    }
     1257
    12311258    }
    12321259
Note: See TracChangeset for help on using the changeset viewer.