Changeset 11122

Show
Ignore:
Timestamp:
27.01.2006 00:21:47 (14 years ago)
Author:
davidb
Message:

Introduction of -associate_tail_re option to BasPlug?. This is a generalisation
of the associate_ext (in fact associate_ext is now mapped into the equivalent
assoicate_tail_re expression). To work properly, plugins that use the
secondary plugin mechanism (such as PDFPlug) need to duplicate the
associate_tail_re value secondary plug ins too.

Location:
trunk/gsdl/perllib/plugins
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r11089 r11122  
    105105    'type' => "string", 
    106106    'reqd' => "no" }, 
     107      { 'name' => "associate_tail_re", 
     108    'desc' => "{BasPlug.associate_tail_re}", 
     109    'type' => "string", 
     110    'reqd' => "no" }, 
    107111      { 'name' => "input_encoding", 
    108112    'desc' => "{BasPlug.input_encoding}", 
     
    413417    my $associate_ext = $self->{'associate_ext'}; 
    414418    if ((defined $associate_ext) && ($associate_ext ne "")) { 
    415     my @exts = split(/,/,$associate_ext); 
    416  
    417     my %associate_ext_lookup = (); 
    418     foreach my $e (@exts) { 
    419         $associate_ext_lookup{$e} = 1; 
    420     } 
    421  
    422     $self->{'associate_ext_lookup'} = \%associate_ext_lookup; 
     419 
     420    my $associate_tail_re = $self->{'associate_tail_re'}; 
     421    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
     422        my $outhandle = $self->{'outhandle'}; 
     423        print $outhandle "Warning: can only specify 'associate_ext' or 'associate_tail_re'\n"; 
     424        print $outhandle "         defaulting to 'associate_tail_re'\n"; 
     425    } 
     426    else { 
     427        my @exts = split(/,/,$associate_ext); 
     428 
     429        my @exts_bracketed = map { $_ = "(?:\\.$_)" } @exts; 
     430        my $associate_tail_re = join("|",@exts_bracketed); 
     431        $self->{'associate_tail_re'} = $associate_tail_re; 
     432    } 
     433 
     434    delete $self->{'associate_ext'}; 
    423435    } 
    424436 
     
    545557    return; 
    546558} 
    547      
     559 
     560sub root_ext_split 
     561{ 
     562    my $self = shift (@_); 
     563    my ($filename,$tail_re) = @_; 
     564     
     565    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/); 
     566 
     567    if ((!defined $file_prefix) || (!defined $file_ext)) { 
     568    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/); 
     569    } 
     570 
     571    return ($file_prefix,$file_ext); 
     572} 
     573 
    548574sub metadata_read { 
    549575    my $self = shift (@_);   
    550576    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 
    551577    # Keep track of filenames with same root but different extensions 
    552     # Used to support -associate_ext 
    553  
    554     my $associate_ext = $self->{'associate_ext'}; 
    555     if ((defined $associate_ext) && ($associate_ext ne "")) { 
    556  
    557     my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/); 
     578    # Used to support -associate_ext and the more generalised 
     579    # -associate_tail_re 
     580 
     581    my $associate_tail_re = $self->{'associate_tail_re'}; 
     582    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
     583 
     584    my ($file_prefix,$file_ext)  
     585        = $self->root_ext_split($file,$associate_tail_re); 
     586 
    558587    if ((defined $file_prefix) && (defined $file_ext)) { 
    559          
     588 
    560589        my $shared_fileroot = $self->{'shared_fileroot'}; 
    561590        if (!defined $shared_fileroot->{$file_prefix}) { 
    562         my $file_prefix_rec = { 'tie_to' => undef, 'exts' => {} }; 
     591        my $file_prefix_rec = { 'tie_to'  => undef,  
     592                        'exts'    => {} }; 
    563593        $shared_fileroot->{$file_prefix} = $file_prefix_rec; 
    564594        } 
     
    568598        my $process_exp = $self->{'process_exp'}; 
    569599 
    570         if ($file =~ m/$self->{'process_exp'}/) { 
     600        if ($file =~ m/$process_exp/) { 
    571601        # This is the document the others should be tied to 
    572602        $file_prefix_rec->{'tie_to'} = $file_ext; 
    573603        } 
    574604        else { 
    575         if (defined $self->{'associate_ext_lookup'}->{$file_ext}) { 
     605        if ($file_ext =~ m/$associate_tail_re$/) { 
    576606            $file_prefix_rec->{'exts'}->{$file_ext} = 1; 
    577607        } 
    578608        } 
    579     } 
    580     } 
    581      
     609 
     610    } 
     611    } 
     612 
    582613    # now check whether we are actually processing this 
    583614    my $filename = $file; 
     
    630661 
    631662        my $has_file_ext = $exts->{$file_ext}; 
    632          
     663 
    633664        if ($has_file_ext) { 
    634665        return 1; 
     
    646677    my ($file, $filename, $metadata) = @_; 
    647678 
    648     my $associate_ext = $self->{'associate_ext'}; 
    649  
    650  
    651     return 0 if (!$associate_ext); 
     679    my $associate_tail_re = $self->{'associate_tail_re'}; 
     680    return 0 if (!$associate_tail_re); 
    652681 
    653682    # If file, see if matches with "tie_to" doc or is one of the 
    654683    # associated filename extensions. 
    655684 
    656     my ($file_prefix,$file_ext) = ($file =~ m/^(.*)\.(.*?)$/); 
     685    my ($file_prefix,$file_ext) = $self->root_ext_split($file,$associate_tail_re); 
     686 
    657687    if ((defined $file_prefix) && (defined $file_ext)) { 
    658688 
     
    673703        my ($full_prefix) = ($filename =~ m/^(.*)\..*?$/); 
    674704        foreach my $e (keys %$exts) {        
    675         my $assoc_file = "$full_prefix.$e"; 
     705        my $assoc_file = "$full_prefix$e"; 
     706        print STDERR "  $self->{'plugin_type'}: Associating $file_prefix$e with $file_prefix_rec->{'tie_to'} version\n"; 
    676707        my $mime_type = ""; # let system auto detect this 
    677708        push(@$assoc_tobe,"$assoc_file:$mime_type:");  
    678709        } 
     710 
    679711    } 
    680712    elsif ($self->tie_to_assoc_file($file_ext,$file_prefix_rec)) { 
    681         # a form of smart block 
    682          
     713 
     714 
     715        # a form of smart block      
    683716        return 1; 
    684717    } 
     
    724757    } 
    725758    } 
     759 
    726760    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 
    727761    return (undef,undef); # can't recognise 
     
    812846    # include any metadata passed in from previous plugins  
    813847    # note that this metadata is associated with the top level section 
     848 
     849    my $associate_tail_re = $self->{'associate_tail_re'}; 
    814850 
    815851    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata); 
     
    11561192    my ($doc_obj, $cursection, $metadata) = @_; 
    11571193 
     1194    my $associate_tail_re = $self->{'associate_tail_re'}; 
     1195 
    11581196    foreach my $field (keys(%$metadata)) { 
    11591197    # $metadata->{$field} may be an array reference 
     
    11731211                   
    11741212        $doc_obj->associate_file($full_filename,$tail_filename,$mimetype); 
    1175          
     1213 
     1214        # work out extended tail extension (i.e. matching tail re) 
     1215 
     1216        my ($file_prefix,$file_extended_ext)  
     1217            = $self->root_ext_split($tail_filename,$associate_tail_re); 
     1218        my ($pre_doc_ext) = ($file_extended_ext =~ m/^(.*)\..*$/); 
     1219 
    11761220        my ($doc_ext) = ($tail_filename =~ m/^.*\.(.*)$/); 
    11771221        my $start_doclink = "<a href=\"_httpcollection_/index/assoc/{Or}{[parent(Top):archivedir],[archivedir]}/$tail_filename\">"; 
     
    11791223        my $end_doclink = "</a>"; 
    11801224 
    1181         $equiv_form .= " $start_doclink\{If\}{$srcicon,$srcicon,$doc_ext\}$end_doclink";         
     1225        my $assoc_form = "$start_doclink\{If\}{$srcicon,$srcicon,$doc_ext\}$end_doclink"; 
     1226 
     1227        if (defined $pre_doc_ext) { 
     1228            # for metadata such as [mp3._edited] [mp3._full] ... 
     1229            $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.$pre_doc_ext", $assoc_form);  
     1230        } 
     1231 
     1232        # for multiple metadata such as [mp3.assoclink] 
     1233        $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.assoclink", $assoc_form);  
     1234         
     1235        $equiv_form .= " $assoc_form";   
    11821236        } 
    11831237        $doc_obj->add_utf8_metadata ($cursection, "equivlink", $equiv_form);  
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r10889 r11122  
    146146    my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
    147147    my $text_options = $secondary_plugin_options->{'TEXTPlug'}; 
     148    my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 
    148149    
    149150    if ($self->{'input_encoding'} eq "auto") { 
     
    179180    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    180181    push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
     182 
     183    my $associate_tail_re = $self->{'associate_tail_re'}; 
     184    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
     185    push(@$html_options, "-associate_tail_re", $associate_tail_re); 
     186    push(@$text_options, "-associate_tail_re", $associate_tail_re); 
     187    push(@$pagedimg_options, "-associate_tail_re", $associate_tail_re); 
     188    } 
     189 
    181190  
    182191    $self = bless $self, $class; 
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10769 r11122  
    152152    my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
    153153    my $text_options = $secondary_plugin_options->{'TextPlug'}; 
    154      
     154    my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};     
    155155    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this 
    156156    push(@$html_options,"-input_encoding", "utf8"); 
     157 
    157158    if ($self->{'input_encoding'} eq "auto") { 
    158159    $self->{'input_encoding'} = "utf8"; 
     
    167168    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
    168169    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
     170 
     171    my $associate_tail_re = $self->{'associate_tail_re'}; 
     172    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
     173    push(@$html_options, "-associate_tail_re", $associate_tail_re); 
     174    push(@$text_options, "-associate_tail_re", $associate_tail_re); 
     175    push(@$structhtml_options, "-associate_tail_re", $associate_tail_re); 
     176    } 
    169177        
    170178    $self = bless $self, $class;