Ignore:
Timestamp:
2005-07-06T15:27:45+12:00 (19 years ago)
Author:
kjdon
Message:

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/W3ImgPlug.pm

    r9853 r10218  
    225225
    226226sub new {
    227     my $class = shift (@_);
    228     my $self = new HTMLPlug ($class, @_);
    229     $self->{'plugin_type'} = "W3ImgPlug";
    230     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
    231     my $option_list = $self->{'option_list'};
    232     push( @{$option_list}, $options );
    233 
    234     if (!parsargv::parse(\@_,
    235              q^aggressiveness/\d/3^, \$self->{'aggressiveness'},
    236              q^index_pages^, \$self->{'index_pages'},
    237              q^no_cache_images^, \$self->{'no_cache_images'},
    238              q^min_size/\d*/2000^, \$self->{'min_img_filesize'},
    239              q^min_width/\d*/50^, \$self->{'min_img_width'},
    240              q^min_height/\d*/50^, \$self->{'min_img_height'},
    241              q^thumb_size/\d*/100^, \$self->{'thumbnail_size'},
    242              q^convert_params/.*/ ^, \$self->{'img_convert_param'},
    243              q^max_near_text/\d*/400^, \$self->{'maxtext'},
    244              q^min_near_text/\d*/10^, \$self->{'mintext'},
    245              q^smallpage_threshold/\d*/2048^, \$self->{'smallpage_threshold'},
    246              q^textrefs_threshold/\d*/2^, \$self->{'textref_threshold'},
    247              q^caption_length/\d*/80^, \$self->{'caption_len'},
    248              q^neartext_length/\d*/300^, \$self->{'neartext_len'},
    249              q^document_text^, \$self->{'document_text'},
    250              "allow_extra_options"
    251              )) {
    252 
    253     print STDERR "\nIncorrect options passed to W3ImgPlug, check your collect.cfg configuration file\n";
    254     $self->print_txt_usage("");  # Use default resource bundle
    255     die "\n";
    256     }
    257    
     227    my ($class) = shift (@_);
     228    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
     229    push(@$pluginlist, $class);
     230
     231    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     232    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     233
     234    my $self = (defined $hashArgOptLists)? new HTMLPlug($pluginlist,$inputargs,$hashArgOptLists): new HTMLPlug($pluginlist,$inputargs);
     235
    258236    # init class variables
    259237    $self->{'textref'} = undef; # init by read_file fn
     
    399377    }
    400378
    401     if ( $self->{'neartext_len'} > $self->{'maxtext'} ) {
    402     $self->{'maxtext'} = $self->{'neartext_len'} * 1.33;
    403     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'maxtext'}\n";
     379    if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) {
     380    $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33;
     381    print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    404382    }
    405     if ( $self->{'caption_len'} > $self->{'maxtext'} ) {
    406     $self->{'maxtext'} = $self->{'caption_len'} * 1.33;
    407     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'maxtext'}\n";
     383    if ( $self->{'caption_length'} > $self->{'max_near_text'} ) {
     384    $self->{'max_near_text'} = $self->{'caption_length'} * 1.33;
     385    print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    408386    }
    409387
     
    492470    } else { ($crcid) = `cksum $filepath` =~ /^(\d+)/; }
    493471    $thumbfp = "$tndir/tn_$crcid.jpg";
    494     `convert -flatten -filter Hanning $self->{'img_convert_param'} -geometry "$self->{'thumbnail_size'}x$self->{'thumbnail_size'}>" $filepath $thumbfp` unless -e $thumbfp;
     472    `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp;
    495473    if ( ! (-e $thumbfp) ) {
    496474    print STDERR "W3ImgPlug: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
     
    532510        # textual references
    533511        if ( $aggr  == 5 || $aggr >= 7) {
    534         if ( length($relreltext) > ($self->{'caption_len'} * 2) )  {
     512        if ( length($relreltext) > ($self->{'caption_length'} * 2) )  {
    535513            $reltext .= $self->get_textrefs($relreltext, $textref, $prevpos, $imgs->{$id}{'pos'}, $nextpos); }
    536514        else {
     
    610588
    611589    # extract larger context
    612     $maxtext = $self->{'maxtext'};
     590    $maxtext = $self->{'max_near_text'};
    613591    $startpos = $pos - ($maxtext * 4);
    614592    $context_size = $maxtext*10;
     
    663641    }
    664642    foreach $sentence ( keys %sentences ) {
    665     if ($sentences{$sentence} < $self->{'textref_threshold'}) {
     643    if ($sentences{$sentence} < $self->{'textrefs_threshold'}) {
    666644        delete $sentences{$sentence};
    667645    }
     
    681659    $startpos, $context, $context_size);
    682660   
    683     $mintext = $self->{'mintext'};
    684     $goodlen = $self->{'caption_len'};
     661    $mintext = $self->{'min_near_text'};
     662    $goodlen = $self->{'caption_length'};
    685663
    686664    # extract a context to extract near text from (faster)
    687     $context_size = $self->{'maxtext'}*3;
     665    $context_size = $self->{'max_near_text'}*3;
    688666    $startpos = $pos - ($context_size / 2);
    689667    if ($startpos < $prevpos ) { $startpos = $prevpos }
     
    762740    # if bound_tag too far from the image, then prob not caption
    763741    # (note: have to allow for tags, so multiply by 3
    764     if ( $etag && length($nt[0]) < ($self->{'caption_len'} * 3) ) {
     742    if ( $etag && length($nt[0]) < ($self->{'caption_length'} * 3) ) {
    765743    if ( $nt[0] =~ /<\/$etag>/si ) {
    766744        # the whole caption is above the image: <tag>text</tag><img>
    767745        ($nt[0]) =~ /<(?:$etag)[\s]?.*?>(.*?)<\/$etag>/is;
    768746        $nt[0] = $self->strip_tags($nt[0]);
    769         if ( length($nt[0]) > $self->{'mintext'} ) {
     747        if ( length($nt[0]) > $self->{'min_near_text'} ) {
    770748        $gotcap = 1;
    771749        $nt[1] = "";
     
    776754        ($nt[1]) = $nt[1] =~ /(.*?)<\/$etag>/si;
    777755        $nt[0] = $self->strip_tags($nt[0] . $nt[1]);
    778         if ( length($nt[0]) > $self->{'mintext'} ) {
     756        if ( length($nt[0]) > $self->{'min_near_text'} ) {
    779757        $gotcap = 2;
    780758        $nt[1] = "";
     
    818796    $bestlen[0] = $bestlen[1] = 0; $bestlen[2] = $bdist = 999999;
    819797    $best[0] = $best[1] = $best[2] = "";
    820     $maxtext = $self->{'maxtext'};
    821     $mintext = $self->{'mintext'};
    822     $goodlen = $self->{'neartext_len'};
     798    $maxtext = $self->{'max_near_text'};
     799    $mintext = $self->{'min_near_text'};
     800    $goodlen = $self->{'neartext_length'};
    823801
    824802    # extract a context to extract near text from (faster)
     
    945923    # either side of the tag (by word boundary)
    946924    return "" if ( ! exists $imgs->{$fp}{'rawpos'} );
    947     $startpos = $imgs->{$fp}{'rawpos'} - (($self->{'maxtext'} / 2) + 20);
     925    $startpos = $imgs->{$fp}{'rawpos'} - (($self->{'max_near_text'} / 2) + 20);
    948926    if ( $startpos < 0 ) { $startpos = 0 }
    949     $rawtext = substr $self->{'plaintext'}, $startpos, $self->{'maxtext'} + 20;
     927    $rawtext = substr $self->{'plaintext'}, $startpos, $self->{'max_near_text'} + 20;
    950928    $rawtext =~ s/\s\s/ /g;
    951929
     
    1010988    }
    1011989    $filesize = (-s $abspath);
    1012     if ( $filesize >= $self->{'min_img_filesize'}
    1013         && ( $width >= $self->{'min_img_width'} )
    1014         && ( $height >= $self->{'min_img_height'} ) ) {
     990    if ( $filesize >= $self->{'min_size'}
     991        && ( $width >= $self->{'min_width'} )
     992        && ( $height >= $self->{'min_height'} ) ) {
    1015993       
    1016994       $imgs->{$filepath}{'exists'} = 1;
Note: See TracChangeset for help on using the changeset viewer.