Changeset 11122


Ignore:
Timestamp:
01/27/06 00:21:47 (15 years ago)
Author:
davidb
Message:

Introduction of -associate_tail_re option to BasPlug. This is a generalisation
of the associate_ext (in fact associate_ext is now mapped into the equivalent
assoicate_tail_re expression). To work properly, plugins that use the
secondary plugin mechanism (such as PDFPlug) need to duplicate the
associate_tail_re value secondary plug ins too.

Location:
trunk/gsdl/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r11089 r11122  
    105105    'type' => "string",
    106106    'reqd' => "no" },
     107      { 'name' => "associate_tail_re",
     108    'desc' => "{BasPlug.associate_tail_re}",
     109    'type' => "string",
     110    'reqd' => "no" },
    107111      { 'name' => "input_encoding",
    108112    'desc' => "{BasPlug.input_encoding}",
     
    413417    my $associate_ext = $self->{'associate_ext'};
    414418    if ((defined $associate_ext) && ($associate_ext ne "")) {
    415     my @exts = split(/,/,$associate_ext);
    416 
    417     my %associate_ext_lookup = ();
    418     foreach my $e (@exts) {
    419         $associate_ext_lookup{$e} = 1;
    420     }
    421 
    422     $self->{'associate_ext_lookup'} = \%associate_ext_lookup;
     419
     420    my $associate_tail_re = $self->{'associate_tail_re'};
     421    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     422        my $outhandle = $self->{'outhandle'};
     423        print $outhandle "Warning: can only specify 'associate_ext' or 'associate_tail_re'\n";
     424        print $outhandle "         defaulting to 'associate_tail_re'\n";
     425    }
     426    else {
     427        my @exts = split(/,/,$associate_ext);
     428
     429        my @exts_bracketed = map { $_ = "(?:\\.$_)" } @exts;
     430        my $associate_tail_re = join("|",@exts_bracketed);
     431        $self->{'associate_tail_re'} = $associate_tail_re;
     432    }
     433
     434    delete $self->{'associate_ext'};
    423435    }
    424436
     
    545557    return;
    546558}
    547    
     559
     560sub root_ext_split
     561{
     562    my $self = shift (@_);
     563    my ($filename,$tail_re) = @_;
     564   
     565    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
     566
     567    if ((!defined $file_prefix) || (!defined $file_ext)) {
     568    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
     569    }
     570
     571    return ($file_prefix,$file_ext);
     572}
     573
    548574sub metadata_read {
    549575    my $self = shift (@_); 
    550576    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    551577    # Keep track of filenames with same root but different extensions
    552     # Used to support -associate_ext
    553 
    554     my $associate_ext = $self->{'associate_ext'};
    555     if ((defined $associate_ext) && ($associate_ext ne "")) {
    556 
    557     my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/);
     578    # Used to support -associate_ext and the more generalised
     579    # -associate_tail_re
     580
     581    my $associate_tail_re = $self->{'associate_tail_re'};
     582    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     583
     584    my ($file_prefix,$file_ext)
     585        = $self->root_ext_split($file,$associate_tail_re);
     586
    558587    if ((defined $file_prefix) && (defined $file_ext)) {
    559        
     588
    560589        my $shared_fileroot = $self->{'shared_fileroot'};
    561590        if (!defined $shared_fileroot->{$file_prefix}) {
    562         my $file_prefix_rec = { 'tie_to' => undef, 'exts' => {} };
     591        my $file_prefix_rec = { 'tie_to'  => undef,
     592                        'exts'    => {} };
    563593        $shared_fileroot->{$file_prefix} = $file_prefix_rec;
    564594        }
     
    568598        my $process_exp = $self->{'process_exp'};
    569599
    570         if ($file =~ m/$self->{'process_exp'}/) {
     600        if ($file =~ m/$process_exp/) {
    571601        # This is the document the others should be tied to
    572602        $file_prefix_rec->{'tie_to'} = $file_ext;
    573603        }
    574604        else {
    575         if (defined $self->{'associate_ext_lookup'}->{$file_ext}) {
     605        if ($file_ext =~ m/$associate_tail_re$/) {
    576606            $file_prefix_rec->{'exts'}->{$file_ext} = 1;
    577607        }
    578608        }
    579     }
    580     }
    581    
     609
     610    }
     611    }
     612
    582613    # now check whether we are actually processing this
    583614    my $filename = $file;
     
    630661
    631662        my $has_file_ext = $exts->{$file_ext};
    632        
     663
    633664        if ($has_file_ext) {
    634665        return 1;
     
    646677    my ($file, $filename, $metadata) = @_;
    647678
    648     my $associate_ext = $self->{'associate_ext'};
    649 
    650 
    651     return 0 if (!$associate_ext);
     679    my $associate_tail_re = $self->{'associate_tail_re'};
     680    return 0 if (!$associate_tail_re);
    652681
    653682    # If file, see if matches with "tie_to" doc or is one of the
    654683    # associated filename extensions.
    655684
    656     my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/);
     685    my ($file_prefix,$file_ext) = $self->root_ext_split($file,$associate_tail_re);
     686
    657687    if ((defined $file_prefix) && (defined $file_ext)) {
    658688
     
    673703        my ($full_prefix) = ($filename =~ m/^(.*)\..*?$/);
    674704        foreach my $e (keys %$exts) {       
    675         my $assoc_file = "$full_prefix.$e";
     705        my $assoc_file = "$full_prefix$e";
     706        print STDERR "  $self->{'plugin_type'}: Associating $file_prefix$e with $file_prefix_rec->{'tie_to'} version\n";
    676707        my $mime_type = ""; # let system auto detect this
    677708        push(@$assoc_tobe,"$assoc_file:$mime_type:");
    678709        }
     710
    679711    }
    680712    elsif ($self->tie_to_assoc_file($file_ext,$file_prefix_rec)) {
    681         # a form of smart block
    682        
     713
     714
     715        # a form of smart block     
    683716        return 1;
    684717    }
     
    724757    }
    725758    }
     759
    726760    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    727761    return (undef,undef); # can't recognise
     
    812846    # include any metadata passed in from previous plugins
    813847    # note that this metadata is associated with the top level section
     848
     849    my $associate_tail_re = $self->{'associate_tail_re'};
    814850
    815851    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
     
    11561192    my ($doc_obj, $cursection, $metadata) = @_;
    11571193
     1194    my $associate_tail_re = $self->{'associate_tail_re'};
     1195
    11581196    foreach my $field (keys(%$metadata)) {
    11591197    # $metadata->{$field} may be an array reference
     
    11731211                 
    11741212        $doc_obj->associate_file($full_filename,$tail_filename,$mimetype);
    1175        
     1213
     1214        # work out extended tail extension (i.e. matching tail re)
     1215
     1216        my ($file_prefix,$file_extended_ext)
     1217            = $self->root_ext_split($tail_filename,$associate_tail_re);
     1218        my ($pre_doc_ext) = ($file_extended_ext =~ m/^(.*)\..*$/);
     1219
    11761220        my ($doc_ext) = ($tail_filename =~ m/^.*\.(.*)$/);
    11771221        my $start_doclink = "<a href=\"_httpcollection_/index/assoc/{Or}{[parent(Top):archivedir],[archivedir]}/$tail_filename\">";
     
    11791223        my $end_doclink = "</a>";
    11801224
    1181         $equiv_form .= " $start_doclink\{If\}{$srcicon,$srcicon,$doc_ext\}$end_doclink";       
     1225        my $assoc_form = "$start_doclink\{If\}{$srcicon,$srcicon,$doc_ext\}$end_doclink";
     1226
     1227        if (defined $pre_doc_ext) {
     1228            # for metadata such as [mp3._edited] [mp3._full] ...
     1229            $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.$pre_doc_ext", $assoc_form);
     1230        }
     1231
     1232        # for multiple metadata such as [mp3.assoclink]
     1233        $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.assoclink", $assoc_form);
     1234       
     1235        $equiv_form .= " $assoc_form"; 
    11821236        }
    11831237        $doc_obj->add_utf8_metadata ($cursection, "equivlink", $equiv_form);
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r10889 r11122  
    146146    my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    147147    my $text_options = $secondary_plugin_options->{'TEXTPlug'};
     148    my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
    148149   
    149150    if ($self->{'input_encoding'} eq "auto") {
     
    179180    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    180181    push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     182
     183    my $associate_tail_re = $self->{'associate_tail_re'};
     184    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     185    push(@$html_options, "-associate_tail_re", $associate_tail_re);
     186    push(@$text_options, "-associate_tail_re", $associate_tail_re);
     187    push(@$pagedimg_options, "-associate_tail_re", $associate_tail_re);
     188    }
     189
    181190 
    182191    $self = bless $self, $class;
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10769 r11122  
    152152    my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    153153    my $text_options = $secondary_plugin_options->{'TextPlug'};
    154    
     154    my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};   
    155155    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this
    156156    push(@$html_options,"-input_encoding", "utf8");
     157
    157158    if ($self->{'input_encoding'} eq "auto") {
    158159    $self->{'input_encoding'} = "utf8";
     
    167168    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    168169    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     170
     171    my $associate_tail_re = $self->{'associate_tail_re'};
     172    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     173    push(@$html_options, "-associate_tail_re", $associate_tail_re);
     174    push(@$text_options, "-associate_tail_re", $associate_tail_re);
     175    push(@$structhtml_options, "-associate_tail_re", $associate_tail_re);
     176    }
    169177       
    170178    $self = bless $self, $class;
Note: See TracChangeset for help on using the changeset viewer.