Changeset 10254 for trunk/gsdl/perllib/plugins/W3ImgPlug.pm
- Timestamp:
- 2005-07-19T16:27:51+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/W3ImgPlug.pm
r10218 r10254 117 117 use util; 118 118 use parsargv; 119 use strict 'subs'; 119 use strict; # 'subs'; 120 no strict 'refs'; # allow filehandles to be variables and viceversa 120 121 121 122 sub BEGIN { 122 @ ISA = qw( HTMLPlug );123 @W3ImgPlug::ISA = qw( HTMLPlug ); 123 124 } 124 125 … … 423 424 $doc_obj = new doc ("", "indexed_doc"); 424 425 $section = $doc_obj->get_top_section(); 425 $prevpos = ( $i == 0 ? 0 : $imgs->{$fplist[$i - 1]}{'pos'});426 $nextpos = ( $i >= ($nimgs -1) ? -1 : $imgs->{$fplist[$i + 1]}{'pos'} );426 my $prevpos = ( $i == 0 ? 0 : $imgs->{$fplist[$i - 1]}{'pos'}); 427 my $nextpos = ( $i >= ($nimgs -1) ? -1 : $imgs->{$fplist[$i + 1]}{'pos'} ); 427 428 428 429 $self->extract_image_info($imgtag, $filepath, $textref, $doc_obj, $section, $tndir, $prevpos, $nextpos); … … 459 460 $orig_fp =~ s/\\/\//g; 460 461 $filepath = "$self->{'htpath'}/$filepath"; 461 ($onlyfn) = $filename =~ /([^\\\/]*)$/;462 my ($onlyfn) = $filename =~ /([^\\\/]*)$/; 462 463 ($fn, $ext) = $onlyfn =~ /(.*)\.(.*)/; 463 464 $fn = lc $fn; $ext = lc $ext; … … 601 602 if ( $self->{'aggressiveness'} == 5 && ! $self->{'smallpage'} ) { 602 603 @pagemeta = $self->get_page_metadata($textref); 603 foreach $value ( @pagemeta ) {604 foreach my $value ( @pagemeta ) { 604 605 $context .= "$value."; # make each into psuedo-sentence 605 606 } … … 611 612 612 613 # extract general references 613 foreach $rw ( @refwords ) {614 foreach my $rw ( @refwords ) { 614 615 while ( $context =~ /[\.\?\!\,](.*?$rw\W.*?[\.\?\!\,])/ig ) { 615 $sentence = $1;616 my $sentence = $1; 616 617 $sentence =~ s/\s+/ /g; 617 618 $sentences{$sentence}+=2; … … 621 622 my ($fignum) = $context =~ /[\.\?\!].*?(?:figure|table)s?[\-\_\ \.](\d+\w*)\W.*?[\.\?\!]/ig; 622 623 if ( $fignum ) { 623 foreach $rw ( @refwords ) {624 foreach my $rw ( @refwords ) { 624 625 while ( $context =~ /[\.\?\!](.*?(figure|table)[\-\_\ \.]$fignum\W.*?[\.\?\!])/ig ) { 625 $sentence = $1;626 my $sentence = $1; 626 627 $sentence =~ s/\s+/ /g; 627 628 $sentences{$sentence}+=4; … … 632 633 # sentences with occurances of important words 633 634 @relwords = $reltext =~ /([a-zA-Z]{4,})/g; # take out small words 634 foreach $word ( @relwords ) {635 foreach my $word ( @relwords ) { 635 636 if ( $self->{'stopwords'}{$word} ) { next } # skip stop words 636 637 while ( $context =~ /([^\.\?\!]*?$word\W.*?[\.\?\!])/ig ) { 637 $sentence = $1;638 my $sentence = $1; 638 639 $sentence =~ s/\s+/ /g; 639 640 $sentences{$sentence}++; 640 641 } 641 642 } 642 foreach $sentence ( keys %sentences ) {643 foreach my $sentence ( keys %sentences ) { 643 644 if ($sentences{$sentence} < $self->{'textrefs_threshold'}) { 644 645 delete $sentences{$sentence}; … … 656 657 sub extract_caption_text { 657 658 my ($self, $tag, $textref, $prevpos, $pos, $nextpos) = (@_); 658 my (@neartext, $len, $hdelim, $ goodlen,659 my (@neartext, $len, $hdelim, $mintext, $goodlen, 659 660 $startpos, $context, $context_size); 660 661 … … 1003 1004 } 1004 1005 $num = 0; 1005 foreach $i ( keys %{$imgs} ) {1006 foreach my $i ( keys %{$imgs} ) { 1006 1007 if ( $imgs->{$i}{'pos'} ) { 1007 1008 $num++;
Note:
See TracChangeset
for help on using the changeset viewer.