Changeset 4794 for trunk/gsdl/perllib


Ignore:
Timestamp:
2003-06-25T11:11:52+12:00 (21 years ago)
Author:
kjdon
Message:

the mgpp index/fields now use collectionmeta for their display names. if there is no collmeta defined, the metadata name is used, except for allfields and text which use a macro

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/mgppbuilder.pm

    r4768 r4794  
    3535
    3636BEGIN {
    37     # set autoflush on for STDERR and STDOUT so that mg
     37    # set autoflush on for STDERR and STDOUT so that mgpp
    3838    # doesn't get out of sync with plugins
    3939    STDOUT->autoflush(1);
     
    8282              'People'=>'PE',
    8383              'PE'=>1,
    84               'AllFields'=>'ZZ',
     84              'allfields'=>'ZZ',
    8585              'ZZ'=>1,
    86               'TextOnly'=>'TX',
     86              'text'=>'TX',
    8787              'TX'=>1,
    8888              'AND'=>1,
     
    381381
    382382    # create the mapping between the index descriptions
    383     # and their directory names
     383    # and their directory names (includes subcolls and langs)
    384384    $self->{'index_mapping'} = $self->create_index_mapping ($indexes);
    385385
     
    440440    # store the mapping orders as well as the maps
    441441    # also put index, subcollection and language fields into the mapping thing -
    442     # (the full index name (eg document:text:subcol:lang) is not used on
     442    # (the full index name (eg text:subcol:lang) is not used on
    443443    # the query page) -these are used for collectionmeta later on
    444444    if (!defined $mapping{'indexmap'}{"$fields"}) {
     
    621621    }
    622622   
    623     # set up the document processor
     623    # set up the document processr
    624624    $self->{'buildproc'}->set_output_handle ($handle);
    625625    $self->{'buildproc'}->set_mode ('text');
     
    686686    system ("mgpp_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra");
    687687
    688    
     688    #define the final field lists
     689    $self->make_final_field_list();
     690   
    689691    # remove unwanted files
    690692    my $tmpdir = &util::filename_cat ($self->{'build_dir'}, $indexdir);
     
    701703    }
    702704    closedir (DIR);
    703   }
     705    }
    704706}   
    705707
     
    725727
    726728    # define the indexed field mapping if not already done so (ie if infodb called separately from build_index)
    727     if (scalar(keys %{$self->{'buildproc'}->{'indexfieldmap'}}) == 0) {
    728     #check build.cfg to see if indexfields have been filled in
    729     $buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "building/build.cfg");
    730     if (-e $buildconfigfile) {
    731         $buildcfg = &colcfg::read_build_cfg( $buildconfigfile);
    732         if (defined $buildcfg->{'indexfields'}) {
    733         foreach $field (@{$buildcfg->{'indexfields'}}) {
    734             $self->{'buildproc'}->{'indexfields'}->{$field} = 1;
    735         }
    736         }
    737         if (defined $buildcfg->{'indexfieldmap'}) {
    738         foreach $field (@{$buildcfg->{'indexfieldmap'}}) {
    739             ($f, $v) = $field =~ /^(.*)\-\>(.*)$/;
    740             $self->{'buildproc'}->{'indexfieldmap'}->{$f} = $v;
    741         }
    742         }       
    743     }
    744     }
    745    
     729    if (!defined $self->{'build_cfg'}) {
     730    $self->read_final_field_list();
     731    }
    746732    print $outhandle "\n*** creating the info database and processing associated files\n"
    747733    if ($self->{'verbosity'} >= 1);
     
    772758    $self->{'buildproc'}->reset();
    773759
     760    # do the collection info
     761    print $handle "[collection]\n";
     762   
     763    # first do the collection meta stuff - everything without a dot
     764    my $collmetadefined = 0;
    774765    if (defined $self->{'collect_cfg'}->{'collectionmeta'}) {
    775    
    776     if (!defined $self->{'index_mapping'}) {
    777         $self->{'index_mapping'} =
    778         $self->create_index_mapping ($self->{'collect_cfg'}->{'indexes'});
    779     }
    780    
    781     print $handle "[collection]\n";
    782 
     766    $collmetadefined = 1;
    783767    foreach $cmeta (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}})) {
    784         my $defaultfound=0;
    785         my $first=1;
    786         my $metadata_entry = "";
    787         my $default="";
    788         my $cmetamap = "";
    789         if ($cmeta =~ s/^\.//) {
    790         if (defined $self->{'index_mapping'}->{$cmeta}) {
    791             $cmetamap = $self->{'index_mapping'}->{$cmeta};
    792             $cmeta = ".$cmeta";
    793         }
    794         else {
    795             print $outhandle "mgbuilder: warning bad collectionmeta option '$cmeta' - ignored\n";
    796             next; #ignore this one
    797         }
    798         }
    799         else {
    800         $cmetamap = $cmeta; # just using the same name
    801         }
    802         #iterate through the languages
    803         foreach $lang (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}})) {
    804         if ($first) {
    805             $first=0;
    806             #set the default default to the first entry
    807             $default=$self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{$lang};
    808         }
    809         if ($lang =~ /default/) {
    810             $defaultfound=1;
    811             #the default entry goes first
    812             $metadata_entry = "<$cmetamap>" .
    813             $self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{'default'} . "\n" . $metadata_entry;
    814         }
    815         else {
    816             my ($l) = $lang =~ /^\[l=(\w*)\]$/;
    817             if ($l) {         
    818             $metadata_entry .= "<$cmetamap:$l>" .
    819                 $self->{'collect_cfg'}->{'collectionmeta'}->{$cmeta}->{$lang} . "\n";
    820             }
    821         }
    822         }
    823         #if we haven't found a default, put one in
    824         if (!$defaultfound) {
    825         $metadata_entry = "<$cmetamap>$default\n" . $metadata_entry;
    826         }
     768        next if ($cmeta =~ /^\./); # for now, ignore ones with dots
     769        my ($metadata_entry) = $self->create_language_db_map($cmeta, $cmeta);
    827770        #write the entry to the file
    828771        print $handle $metadata_entry;
    829772       
    830     }
    831 
    832     #add the indexfieldmap macros to [collection]
    833     # eg <TI>Title
    834     #    <SU>Subject
    835     # these may be overidden for other langs if add to macro files
    836     $field_entry="";
    837     foreach $longfield (keys %{$self->{'buildproc'}->{'indexfieldmap'}}){
    838         $shortfield = $self->{'buildproc'}->{'indexfieldmap'}->{$longfield};
    839         next if $shortfield eq 1;
    840         $field_entry .= "<$shortfield>$longfield\n";
    841     }
    842     print $handle $field_entry;
     773    } # foreach collmeta key
     774    }
     775    #add the indexfieldmap macros to [collection]
     776    # eg <TI>Title
     777    #    <SU>Subject
     778    # these now come from collection meta. if that is not defined, usses the metadata name
     779    $field_entry="";
     780    foreach $longfield (@{$self->{'build_cfg'}->{'indexfields'}}){
     781    print $outhandle "doing long field $longfield\n";
     782    $shortfield = $self->{'buildproc'}->{'indexfieldmap'}->{$longfield};
     783    next if $shortfield eq 1;
    843784   
    844     print $handle "\n" . ('-' x 70) . "\n";
    845    
    846     }
     785    # we need to check if some coll meta has been defined
     786    my $collmeta = ".$longfield";
     787    if ($collmetadefined && defined $self->{'collect_cfg'}->{'collectionmeta'}->{$collmeta}) {
     788        print $outhandle "coll meta $collmeta defined\n";
     789        $metadata_entry = $self->create_language_db_map($collmeta, $shortfield);
     790        $field_entry .= $metadata_entry;
     791    } else { #use the metadata names, or the text macros for allfields and textonly
     792        if ($longfield eq "allfields") {
     793        $field_entry .= "<$shortfield>_query:textallfields_\n";
     794        } elsif ($longfield eq "text") {
     795        $field_entry .= "<$shortfield>_query:texttextonly_\n";
     796        } else {
     797        $field_entry .= "<$shortfield>$longfield\n";
     798        }
     799    }
     800    }
     801    print $handle $field_entry;
     802   
     803    #end the collection entry
     804    print $handle "\n" . ('-' x 70) . "\n";
     805   
     806   
    847807   
    848808    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
     
    867827}
    868828
     829sub create_language_db_map {
     830    my $self = shift (@_);
     831    my ($metaname, $mapname) = @_;
     832    my $outhandle =  $self->{'outhandle'};
     833    my $defaultfound=0;
     834    my $first=1;
     835    my $metadata_entry = "";
     836    my $default="";
     837    print $outhandle "crate for meta $metaname\n";
     838    #iterate through the languages
     839    foreach $lang (keys (%{$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}})) {
     840    print $outhandle "lang=$lang\n";
     841    if ($first) {
     842        $first=0;
     843        #set the default default to the first entry
     844        $default=$self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{$lang};
     845        print $outhandle "defualt = $default\n";
     846    }
     847    if ($lang =~ /default/) {
     848        $defaultfound=1;
     849        #the default entry goes first
     850        $metadata_entry = "<$mapname>" .
     851        $self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{'default'} . "\n" . $metadata_entry;
     852    }
     853    else {
     854        my ($l) = $lang =~ /^\[l=(\w*)\]$/;
     855        if ($l) {         
     856        $metadata_entry .= "<$mapname:$l>" .
     857            $self->{'collect_cfg'}->{'collectionmeta'}->{$metaname}->{$lang} . "\n";
     858        }
     859    }
     860    } #foreach lang
     861    #if we haven't found a default, put one in
     862    if (!$defaultfound) {
     863    $metadata_entry = "<$mapname>$default\n" . $metadata_entry;
     864    }
     865    return $metadata_entry;
     866   
     867}
    869868sub collect_specific {
    870869    my $self = shift (@_);
    871870}
    872871
    873 sub make_auxiliary_files {
    874     my $self = shift (@_);
    875     my ($index);
    876     my %build_cfg = ();
    877 
    878     my $outhandle =  $self->{'outhandle'};
    879     print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
    880 
    881     # get the text directory
    882     &util::mk_all_dir ($self->{'build_dir'});
    883 
    884     # store the build date
    885     $build_cfg->{'builddate'} = time;
    886     $build_cfg->{'buildtype'} = "mgpp";
    887 
    888     # store the number of documents and number of bytes
    889     $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
    890     $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
    891 
    892     # store the mapping between the index names and the directory names
    893     my @indexmap = ();
    894     foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
    895     push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
    896     }
    897     $build_cfg->{'indexmap'} = \@indexmap;
    898 
    899     my @subcollectionmap = ();
    900     foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
    901     push (@subcollectionmap, "$subcollection\-\>" .
    902           $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
    903     }
    904     $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
    905 
    906     my @languagemap = ();
    907     foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
    908     push (@languagemap, "$language\-\>" .
    909           $self->{'index_mapping'}->{'languagemap'}->{$language});
    910     }
    911     $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
    912 
    913     $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
     872# at the end of building, we have an indexfieldmap with all teh mappings, plus
     873# some extras, and indexmap with any indexes in it that weren't specified in the index definition.
     874# we want to make an ordered list of fields that are indexed, and a list of mappings that are used. this will be used for the build.cfg file, and for collection meta definition
     875# we store these in a build.cfg bit
     876sub make_final_field_list {
     877    my $self = shift (@_);
     878   
     879    $self->{'build_cfg'} = {};
    914880
    915881    # store the indexfieldmap information
     
    938904
    939905    } elsif ($field eq 'text') {
    940         push (@indexfieldmap, "TextOnly\-\>TX");
    941         push (@indexfields, "TextOnly");
     906        push (@indexfieldmap, "text\-\>TX");
     907        push (@indexfields, "text");
    942908    } elsif ($field eq 'allfields') {
    943         push (@indexfieldmap, "AllFields\-\>ZZ");
    944         push (@indexfields, "AllFields");
     909        push (@indexfieldmap, "allfields\-\>ZZ");
     910        push (@indexfields, "allfields");
    945911    } else {
    946912        push (@indexfieldmap, "$field\-\>$self->{'buildproc'}->{'indexfieldmap'}->{$field}");
     
    948914       
    949915    }
    950     #if (defined $self->{'buildproc'}->{'indexfields'}->{'TextOnly'}) {
    951     #push (@indexfieldmap, "TextOnly\-\>TX");
    952     #}
    953     #foreach $field (sort keys %{$self->{'buildproc'}->{'indexfields'}}) {
    954     #next if $field eq "TextOnly";
    955     #push (@indexfieldmap, "$field\-\>$self->{'buildproc'}->{'indexfieldmap'}->{$field}");
    956     }
    957 
    958     $build_cfg->{'indexfieldmap'} = \@indexfieldmap;
    959     $build_cfg->{'indexfields'} = \@indexfields;
    960 
    961     #store the indexed field information
    962     #foreach $field (sort keys %{$self->{'buildproc'}->{'indexfields'}}) {
    963    
    964     #push (@{$build_cfg->{'indexfields'}}, $field);
    965     #}
     916    }
     917    $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap;
     918    $self->{'build_cfg'}->{'indexfields'} = \@indexfields;
     919
     920
     921}
     922
     923
     924# recreate the field list from the build.cfg file, look first in building, then in index to find it. if there is no build.cfg, we cant do the field list (there is unlikely to be any index anyway.)
     925sub read_final_field_list {
     926    my $self = shift (@_);
     927    $self->{'build_cfg'} = {};
     928    my @indexfieldmap = ();
     929    my @indexfields = ();
     930   
     931    if (scalar(keys %{$self->{'buildproc'}->{'indexfieldmap'}}) == 0) {
     932    # set the default mapping
     933    $self->{'buildproc'}->set_indexfieldmap ($self->{'indexfieldmap'});
     934    }
     935    # we read the stuff in from the build.cfg file - if its there
     936    $buildconfigfile = &util::filename_cat($self->{'build_dir'}, "build.cfg");
     937   
     938    if (!-e $buildconfigfile) {
     939    # try the index dir - but do we know where it is?? try here
     940    $buildconfigfile  = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg");
     941    if (!-e $buildconfigfile) {
     942        #we cant find a config file - just ignore the field list
     943        return;
     944    }
     945    }
     946    $buildcfg = &colcfg::read_build_cfg( $buildconfigfile);
     947    if (defined $buildcfg->{'indexfields'}) {
     948    foreach $field (@{$buildcfg->{'indexfields'}}) {
     949        push (@indexfields, "$field");
     950    }
     951    }
     952    if (defined $buildcfg->{'indexfieldmap'}) {
     953    foreach $field (@{$buildcfg->{'indexfieldmap'}}) {
     954        push (@indexfieldmap, "$field");
     955        ($f, $v) = $field =~ /^(.*)\-\>(.*)$/;
     956        $self->{'buildproc'}->{'indexfieldmap'}->{$f} = $v;
     957    }
     958    }       
     959   
     960    $self->{'build_cfg'}->{'indexfieldmap'} = \@indexfieldmap;
     961    $self->{'build_cfg'}->{'indexfields'} = \@indexfields;
     962   
     963}
     964sub make_auxiliary_files {
     965    my $self = shift (@_);
     966    my ($index);
     967   
     968    my $build_cfg = {};
     969    # this already includes indexfieldmap and indexfields
     970    if (defined $self->{'build_cfg'}) {
     971    $build_cfg = $self->{'build_cfg'};
     972    }
     973    #my %build_cfg = ();
     974   
     975    my $outhandle =  $self->{'outhandle'};
     976    print $outhandle "\n*** creating auxiliary files \n" if ($self->{'verbosity'} >= 1);
     977
     978    # get the text directory
     979    &util::mk_all_dir ($self->{'build_dir'});
     980
     981    # store the build date
     982    $build_cfg->{'builddate'} = time;
     983    $build_cfg->{'buildtype'} = "mgpp"; #do we need this??
     984   
     985    # store the number of documents and number of bytes
     986    $build_cfg->{'numdocs'} = $self->{'buildproc'}->get_num_docs();
     987    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
     988
     989    # store the mapping between the index names and the directory names
     990    my @indexmap = ();
     991    foreach $index (@{$self->{'index_mapping'}->{'indexmaporder'}}) {
     992    push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
     993    }
     994    $build_cfg->{'indexmap'} = \@indexmap;
     995
     996    my @subcollectionmap = ();
     997    foreach $subcollection (@{$self->{'index_mapping'}->{'subcollectionmaporder'}}) {
     998    push (@subcollectionmap, "$subcollection\-\>" .
     999          $self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection});
     1000    }
     1001    $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
     1002
     1003    my @languagemap = ();
     1004    foreach $language (@{$self->{'index_mapping'}->{'languagemaporder'}}) {
     1005    push (@languagemap, "$language\-\>" .
     1006          $self->{'index_mapping'}->{'languagemap'}->{$language});
     1007    }
     1008    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
     1009
     1010    $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
     1011
    9661012    # write out the build information
    9671013    &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
    9681014                 '^(builddate|buildtype|numdocs|numbytes)$',
    9691015                             '^(indexmap|subcollectionmap|languagemap|indexfieldmap|notbuilt|indexfields)$');
    970 
     1016   
    9711017}
    9721018
Note: See TracChangeset for help on using the changeset viewer.