Changeset 10218 for trunk/gsdl/perllib/plugins/W3ImgPlug.pm
- Timestamp:
- 2005-07-06T15:27:45+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/W3ImgPlug.pm
r9853 r10218 225 225 226 226 sub new { 227 my $class = shift (@_); 228 my $self = new HTMLPlug ($class, @_); 229 $self->{'plugin_type'} = "W3ImgPlug"; 230 # 14-05-02 To allow for proper inheritance of arguments - John Thompson 231 my $option_list = $self->{'option_list'}; 232 push( @{$option_list}, $options ); 233 234 if (!parsargv::parse(\@_, 235 q^aggressiveness/\d/3^, \$self->{'aggressiveness'}, 236 q^index_pages^, \$self->{'index_pages'}, 237 q^no_cache_images^, \$self->{'no_cache_images'}, 238 q^min_size/\d*/2000^, \$self->{'min_img_filesize'}, 239 q^min_width/\d*/50^, \$self->{'min_img_width'}, 240 q^min_height/\d*/50^, \$self->{'min_img_height'}, 241 q^thumb_size/\d*/100^, \$self->{'thumbnail_size'}, 242 q^convert_params/.*/ ^, \$self->{'img_convert_param'}, 243 q^max_near_text/\d*/400^, \$self->{'maxtext'}, 244 q^min_near_text/\d*/10^, \$self->{'mintext'}, 245 q^smallpage_threshold/\d*/2048^, \$self->{'smallpage_threshold'}, 246 q^textrefs_threshold/\d*/2^, \$self->{'textref_threshold'}, 247 q^caption_length/\d*/80^, \$self->{'caption_len'}, 248 q^neartext_length/\d*/300^, \$self->{'neartext_len'}, 249 q^document_text^, \$self->{'document_text'}, 250 "allow_extra_options" 251 )) { 252 253 print STDERR "\nIncorrect options passed to W3ImgPlug, check your collect.cfg configuration file\n"; 254 $self->print_txt_usage(""); # Use default resource bundle 255 die "\n"; 256 } 257 227 my ($class) = shift (@_); 228 my ($pluginlist,$inputargs,$hashArgOptLists) = @_; 229 push(@$pluginlist, $class); 230 231 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 232 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 233 234 my $self = (defined $hashArgOptLists)? new HTMLPlug($pluginlist,$inputargs,$hashArgOptLists): new HTMLPlug($pluginlist,$inputargs); 235 258 236 # init class variables 259 237 $self->{'textref'} = undef; # init by read_file fn … … 399 377 } 400 378 401 if ( $self->{'neartext_len '} > $self->{'maxtext'} ) {402 $self->{'max text'} = $self->{'neartext_len'} * 1.33;403 print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max text'}\n";379 if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) { 380 $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33; 381 print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 404 382 } 405 if ( $self->{'caption_len '} > $self->{'maxtext'} ) {406 $self->{'max text'} = $self->{'caption_len'} * 1.33;407 print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max text'}\n";383 if ( $self->{'caption_length'} > $self->{'max_near_text'} ) { 384 $self->{'max_near_text'} = $self->{'caption_length'} * 1.33; 385 print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 408 386 } 409 387 … … 492 470 } else { ($crcid) = `cksum $filepath` =~ /^(\d+)/; } 493 471 $thumbfp = "$tndir/tn_$crcid.jpg"; 494 `convert -flatten -filter Hanning $self->{' img_convert_param'} -geometry "$self->{'thumbnail_size'}x$self->{'thumbnail_size'}>" $filepath $thumbfp` unless -e $thumbfp;472 `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp; 495 473 if ( ! (-e $thumbfp) ) { 496 474 print STDERR "W3ImgPlug: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0; … … 532 510 # textual references 533 511 if ( $aggr == 5 || $aggr >= 7) { 534 if ( length($relreltext) > ($self->{'caption_len '} * 2) ) {512 if ( length($relreltext) > ($self->{'caption_length'} * 2) ) { 535 513 $reltext .= $self->get_textrefs($relreltext, $textref, $prevpos, $imgs->{$id}{'pos'}, $nextpos); } 536 514 else { … … 610 588 611 589 # extract larger context 612 $maxtext = $self->{'max text'};590 $maxtext = $self->{'max_near_text'}; 613 591 $startpos = $pos - ($maxtext * 4); 614 592 $context_size = $maxtext*10; … … 663 641 } 664 642 foreach $sentence ( keys %sentences ) { 665 if ($sentences{$sentence} < $self->{'textref _threshold'}) {643 if ($sentences{$sentence} < $self->{'textrefs_threshold'}) { 666 644 delete $sentences{$sentence}; 667 645 } … … 681 659 $startpos, $context, $context_size); 682 660 683 $mintext = $self->{'min text'};684 $goodlen = $self->{'caption_len '};661 $mintext = $self->{'min_near_text'}; 662 $goodlen = $self->{'caption_length'}; 685 663 686 664 # extract a context to extract near text from (faster) 687 $context_size = $self->{'max text'}*3;665 $context_size = $self->{'max_near_text'}*3; 688 666 $startpos = $pos - ($context_size / 2); 689 667 if ($startpos < $prevpos ) { $startpos = $prevpos } … … 762 740 # if bound_tag too far from the image, then prob not caption 763 741 # (note: have to allow for tags, so multiply by 3 764 if ( $etag && length($nt[0]) < ($self->{'caption_len '} * 3) ) {742 if ( $etag && length($nt[0]) < ($self->{'caption_length'} * 3) ) { 765 743 if ( $nt[0] =~ /<\/$etag>/si ) { 766 744 # the whole caption is above the image: <tag>text</tag><img> 767 745 ($nt[0]) =~ /<(?:$etag)[\s]?.*?>(.*?)<\/$etag>/is; 768 746 $nt[0] = $self->strip_tags($nt[0]); 769 if ( length($nt[0]) > $self->{'min text'} ) {747 if ( length($nt[0]) > $self->{'min_near_text'} ) { 770 748 $gotcap = 1; 771 749 $nt[1] = ""; … … 776 754 ($nt[1]) = $nt[1] =~ /(.*?)<\/$etag>/si; 777 755 $nt[0] = $self->strip_tags($nt[0] . $nt[1]); 778 if ( length($nt[0]) > $self->{'min text'} ) {756 if ( length($nt[0]) > $self->{'min_near_text'} ) { 779 757 $gotcap = 2; 780 758 $nt[1] = ""; … … 818 796 $bestlen[0] = $bestlen[1] = 0; $bestlen[2] = $bdist = 999999; 819 797 $best[0] = $best[1] = $best[2] = ""; 820 $maxtext = $self->{'max text'};821 $mintext = $self->{'min text'};822 $goodlen = $self->{'neartext_len '};798 $maxtext = $self->{'max_near_text'}; 799 $mintext = $self->{'min_near_text'}; 800 $goodlen = $self->{'neartext_length'}; 823 801 824 802 # extract a context to extract near text from (faster) … … 945 923 # either side of the tag (by word boundary) 946 924 return "" if ( ! exists $imgs->{$fp}{'rawpos'} ); 947 $startpos = $imgs->{$fp}{'rawpos'} - (($self->{'max text'} / 2) + 20);925 $startpos = $imgs->{$fp}{'rawpos'} - (($self->{'max_near_text'} / 2) + 20); 948 926 if ( $startpos < 0 ) { $startpos = 0 } 949 $rawtext = substr $self->{'plaintext'}, $startpos, $self->{'max text'} + 20;927 $rawtext = substr $self->{'plaintext'}, $startpos, $self->{'max_near_text'} + 20; 950 928 $rawtext =~ s/\s\s/ /g; 951 929 … … 1010 988 } 1011 989 $filesize = (-s $abspath); 1012 if ( $filesize >= $self->{'min_ img_filesize'}1013 && ( $width >= $self->{'min_ img_width'} )1014 && ( $height >= $self->{'min_ img_height'} ) ) {990 if ( $filesize >= $self->{'min_size'} 991 && ( $width >= $self->{'min_width'} ) 992 && ( $height >= $self->{'min_height'} ) ) { 1015 993 1016 994 $imgs->{$filepath}{'exists'} = 1;
Note:
See TracChangeset
for help on using the changeset viewer.