Ignore:
Timestamp:
2007-12-20T21:53:14+13:00 (16 years ago)
Author:
davidb
Message:

Changes to allow statistic calculations for metadata coverage, i.e. for this docment which metadata set prefixes are used, which fields within those prefixes are used, and how many times. This is then agregated over the all documents and the summary stored as collection level metadata.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/basebuilder.pm

    r14930 r14934  
    368368    $self->{'buildproc'}->zero_reset();
    369369
     370    $self->{'buildproc'}->{'mdprefix_fields'} = {};
     371
    370372    if ($self->{'keepold'}) {
    371373    # create flat classify structure, ready for new docs to be added
     
    377379
    378380   
     381    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
     382           "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
     383
    379384    # this has changed to only output collection meta if its
    380385    # not in the config file
    381386    $self->output_collection_meta($handle);
    382     &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
    383            "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});
    384387   
    385388    # output classification information
     
    562565}
    563566
    564 # default is to output an empty [collection] entry
     567
     568
     569sub output_collection_meta_start {
     570    my $self = shift(@_);
     571    my ($handle) = @_;
     572   
     573    print $handle "[collection]\n";
     574
     575
     576
     577sub output_collection_meta_sets {
     578    my $self = shift(@_);
     579    my ($handle) = @_;
     580
     581    my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'};
     582
     583    foreach my $prefix (keys %$mdprefix_fields)
     584    {
     585    print $handle "<metadataset>$prefix\n";
     586
     587    foreach my $field (keys %{$mdprefix_fields->{$prefix}})
     588    {
     589        my $val = $mdprefix_fields->{$prefix}->{$field};
     590
     591        print $handle "<metadatalist-$prefix>$field\n";
     592        print $handle "<metadatafreq-$prefix-$field>$val\n";
     593    }
     594
     595    }
     596
     597
     598
     599sub output_collection_meta_end {
     600    my $self = shift(@_);
     601    my ($handle) = @_;
     602   
     603    print $handle ('-' x 70) . "\n";;
     604
     605
     606
     607
     608# default is to output the metadata sets (prefixes) used in collection
     609
    565610sub output_collection_meta {
    566611    my $self = shift(@_);
    567612    my ($handle) = @_;
    568    
    569     print $handle "[collection]\n". ('-' x 70) . "\n";;
     613
     614    $self->output_collection_meta_start($handle);
     615    $self->output_collection_meta_sets($handle);
     616    $self->output_collection_meta_end($handle);
    570617
    571618
Note: See TracChangeset for help on using the changeset viewer.