Changeset 8892


Ignore:
Timestamp:
2005-01-12T13:45:04+13:00 (19 years ago)
Author:
davidb
Message:

Addition of new minus option to BasPlug: -associate_ext.

This new option allows a collectoin designer to cause files
with the same file root but different extension to be handled
as one document. A comma separated list is given of filename
extensions that should be associated with the primary version
(i.e. the file that is being processed by the Plugin that carries
this option).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r8818 r8892  
    3232$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
    3333
     34use File::Basename;
     35
    3436use Kea;
    3537use parsargv;
     
    6769    'type' => "regexp",
    6870    'deft' => "",
     71    'reqd' => "no" },
     72      { 'name' => "associate_ext",
     73    'desc' => "{BasPlug.associate_ext}",
     74    'type' => "string",
    6975    'reqd' => "no" },
    7076      { 'name' => "input_encoding",
     
    315321             q^process_exp/.*/^, \$self->{'process_exp'},
    316322             q^block_exp/.*/^, \$self->{'block_exp'},
     323             q^associate_ext/.*/^, \$self->{'associate_ext'},
    317324             q^extract_language^, \$self->{'extract_language'},
    318325             q^extract_acronyms^, \$self->{'extract_acronyms'},
     
    338345    die "\n";
    339346    }
     347
     348    my $associate_ext = $self->{'associate_ext'};
     349    if ((defined $associate_ext) && ($associate_ext ne "")) {
     350    my @exts = split(/,/,$associate_ext);
     351   
     352    my %associate_ext_lookup = ();
     353    foreach my $e (@exts) {
     354        $assoicate_ext_lookup{$e} = 1;
     355    }
     356
     357    $self->{'associate_ext_lookup'} = \%associate_ext_lookup;
     358    }
     359
     360    $self->{'shared_fileroot'} = {};
    340361    $self->{'file_blocks'} = {};
    341362
     
    409430    my $self = shift (@_); 
    410431    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
     432
     433    # Keep track of filenames with same root but different extensions
     434    # Used to support -assoicate_ext
     435
     436    my $associate_ext = $self->{'associate_ext'};
     437    if ((defined $associate_ext) && ($associate_ext ne "")) {
     438
     439    my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/);
     440    if ((defined $file_prefix) && (defined $file_ext)) {
     441       
     442        my $shared_fileroot = $self->{'shared_fileroot'};
     443        if (!defined $shared_fileroot->{$file_prefix}) {
     444        my $file_prefix_rec = { 'tie_to' => undef, 'exts' => {} };
     445        $shared_fileroot->{$file_prefix} = $file_prefix_rec;
     446        }
     447       
     448        my $file_prefix_rec = $shared_fileroot->{$file_prefix};
     449
     450        my $process_exp = $self->{'process_exp'};
     451
     452        if ($file =~ m/$self->{'process_exp'}/) {
     453        # This is the document the others should be tied to
     454        $file_prefix_rec->{'tie_to'} = $file_ext;
     455        }
     456        else {
     457        $file_prefix_rec->{'exts'}->{$file_ext} = 1;
     458        }
     459    }
     460    }
     461
    411462    return undef;
    412463}
     464
     465sub tie_to_filename
     466{
     467    my $self = shift (@_); 
     468 
     469    my ($file_ext,$file_prefix_rec) = @_;
     470
     471    if (defined $file_prefix_rec) {
     472    my $tie_to = $file_prefix_rec->{'tie_to'};
     473    if (defined $tie_to) {
     474        if ($tie_to eq $file_ext) {
     475        return 1;
     476        }
     477    }
     478    }
     479
     480    return 0;
     481}
     482
     483sub tie_to_assoc_file
     484{
     485    my $self = shift (@_);   
     486    my ($file_ext,$file_prefix_rec) = @_;
     487
     488    if (defined $file_prefix_rec) {
     489    my $tie_to = $file_prefix_rec->{'tie_to'};
     490    if (defined $tie_to) {
     491
     492        my $exts = $file_prefix_rec->{'exts'};
     493
     494        my $has_file_ext = $exts->{$file_ext};
     495       
     496        if ($has_file_ext) {
     497        return 1;
     498        }
     499    }
     500    }
     501
     502    return 0;
     503}
     504
     505
     506sub associate_with
     507{
     508    my $self = shift (@_);   
     509    my ($file, $filename, $metadata) = @_;
     510
     511    my $associate_ext = $self->{'associate_ext'};
     512
     513    return 0 if (!$associate_ext);
     514
     515    # If file, see if matches with "tie_to" doc or is one of the
     516    # associated filename extensions.
     517
     518    my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/);
     519    if ((defined $file_prefix) && (defined $file_ext)) {
     520
     521    my $file_prefix_rec = $self->{'shared_fileroot'}->{$file_prefix};
     522
     523    if ($self->tie_to_filename($file_ext,$file_prefix_rec)) {
     524
     525        # Set up gsdlassocfile_tobe
     526
     527        my $exts = $file_prefix_rec->{'exts'};
     528       
     529        if (!defined $metadata->{'gsdlassocfile_tobe'}) {
     530        $metadata->{'gsdlassocfile_tobe'} = [];
     531        }
     532
     533        my $assoc_tobe = $metadata->{'gsdlassocfile_tobe'};
     534           
     535        my ($full_prefix) = ($filename =~ m/^(.*)\..*?$/);
     536        foreach my $e (keys %$exts) {       
     537        my $assoc_file = "$full_prefix.$e";
     538        my $mime_type = ""; # let system auto detect this
     539        push(@$assoc_tobe,"$assoc_file:$mime_type:");
     540        }
     541    }
     542    elsif ($self->tie_to_assoc_file($file_ext,$file_prefix_rec)) {
     543        # a form of smart block
     544       
     545        return 1;
     546    }
     547    }
     548
     549    return 0;
     550}
     551
    413552
    414553# The BasPlug read() function. This function does all the right things
     
    442581    $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    443582
    444 
    445     if ($smart_block){
     583    if ($self->associate_with($file,$filename,$metadata)) {
     584    # a form of smart block
     585    $self->{'num_blocked'} ++;
     586    return 0; # blocked
     587    }
     588
     589    if ($smart_block) {
    446590    if (defined $self->{'file_blocks'}->{$filename} && $self->{'file_blocks'}->{$filename} == 1){
     591        $self->{'num_blocked'} ++;
    447592        return 0; # blocked
    448593    }
     
    769914        my $equiv_form = "";
    770915        foreach my $gaf (@{$metadata->{$field}}) {
    771         #print STDERR "**** gsdlassocfile_tobe = $gaf\n";
    772916        my ($full_filename,$mimetype) = ($gaf =~ m/^(.*):(.*):$/);
    773917        my ($tail_filename) = ($full_filename =~ /^.*[\/\\](.+?)$/);
Note: See TracChangeset for help on using the changeset viewer.