greenstone.org greenstone wiki greenstone trac planet greenstone

Changeset 17727

Show
Ignore:
Timestamp:
2008-11-06 11:35:21 (2 months ago)
Author:
kjdon
Message:

W3ImagePlugin renamed to HTMLImagePlugin

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • gsdl/trunk/perllib/plugins/HTMLImagePlugin.pm

    r16392 r17727  
    11########################################################################### 
    22# 
    3 # W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents 
     3# HTMLImagePlugin.pm -- Context-based image indexing plugin for HTML documents 
    44# 
    55# A component of the Greenstone digital library software 
     
    3939#  collection builds at the import stage. 
    4040# 
    41 W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also  
     41HTMLImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also  
    4242#  if required). It can be used in place of HTMLPlugin to index both 
    4343#  pages and their images. 
     
    5555#    ImageMagick can be downloaded from the website above. 
    5656#    Make sure the system path includes the ImageMagick binaries 
    57 #    before using W3ImagePlugin. 
     57#    before using HTMLImagePlugin. 
    5858# 
    5959#    NOTE: NT/2000/XP contain a filesystem utility 'convert.exe'  
     
    9797#   ... 
    9898# 
    99 #   plugin W3ImagePlugin -index_pages -aggressiveness 6 
     99#   plugin HTMLImagePlugin -index_pages -aggressiveness 6 
    100100# 
    101101#   ... 
     
    110110# 
    111111  
    112 package W3ImagePlugin; 
     112package HTMLImagePlugin; 
    113113 
    114114use HTMLPlugin; 
     
    120120 
    121121sub BEGIN { 
    122     @W3ImagePlugin::ISA = qw( HTMLPlugin ); 
     122    @HTMLImagePlugin::ISA = qw( HTMLPlugin ); 
    123123} 
    124124 
     
    218218        'reqd' => "no" } ]; 
    219219 
    220 my $options = { 'name'     => "W3ImagePlugin", 
     220my $options = { 'name'     => "HTMLImagePlugin", 
    221221                'desc'     => "{W3ImagePlugin.desc}", 
    222222                'abstract' => "no", 
     
    281281 
    282282# get complex configuration options from configuration files 
    283 # -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+) 
     283# -- $GSDLCOLLECTION/etc/HTMLImagePlugin.cfg (tag sets for aggr 2+) 
    284284# -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+) 
    285285 
    286 # If there's no W3ImagePlugin.cfg file we'll use the following default values 
     286# If there's no HTMLImagePlugin.cfg file we'll use the following default values 
    287287my $defaultcfg = ' 
    288288<delimitertagset> 
     
    323323    my ($filepath); 
    324324 
    325     print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n" 
     325    print {$self->{'outhandle'}} "HTMLImagePlugin: Initialising\n" 
    326326        if $self->{'verbosity'} > 1; 
    327     # etc/W3ImagePlugin.cfg (XML) 
     327    # etc/HTMLImagePlugin.cfg (XML) 
    328328    # tag sets for captions and neartext 
    329329    if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) { 
     
    332332        my ($cfg, @tagsets, $tagset, $type, @delims); 
    333333 
    334         $filepath = "$collpath/etc/W3ImagePlugin.cfg"; 
     334        $filepath = "$collpath/etc/HTMLImagePlugin.cfg"; 
    335335        if ( open CFG, "<$filepath" ) { 
    336336            while (<CFG>) { $cfg .= $_ } 
     
    354354        # output a warning if there seem to be no delimiters 
    355355        if ( scalar(@{$self->{'cdelims'}} == 0)) { 
    356             print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n"; 
     356            print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no caption delimiters found in $filepath\n"; 
    357357        } 
    358358        if ( scalar(@{$self->{'delims'}} == 0)) { 
    359             print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 
     359            print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 
    360360        } 
    361361    } 
     
    373373            close STOPWORDS; 
    374374        } else { 
    375             print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 
     375            print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 
    376376        } 
    377377         
     
    380380    if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) { 
    381381        $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33; 
    382         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
     382        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
    383383    }  
    384384    if ( $self->{'caption_length'} > $self->{'max_near_text'} ) { 
    385385        $self->{'max_near_text'} = $self->{'caption_length'} * 1.33; 
    386         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
     386        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
    387387    } 
    388388 
     
    420420            ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is ); 
    421421            if (! defined($imgtag)) { $imgtag = $filepath } 
    422             print $outhandle "W3ImagePlugin: extracting $filepath\n" 
     422            print $outhandle "HTMLImagePlugin: extracting $filepath\n" 
    423423                if ( $self->{'verbosity'} > 1 ); 
    424424            $doc_obj = new doc ("", "indexed_doc"); 
     
    434434        return $numdocs; 
    435435    } else { 
    436         print $outhandle "W3ImagePlugin: No images from $file indexed\n" 
     436        print $outhandle "HTMLImagePlugin: No images from $file indexed\n" 
    437437            if ( $self->{'verbosity'} > 2 ); 
    438438        return 1; 
     
    473473    `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp; 
    474474    if ( ! (-e $thumbfp) ) { 
    475         print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;  
     475        print STDERR "HTMLImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;  
    476476    } 
    477477     
     
    854854    } elsif ( $bestlen[$best1] < $mintext ) { 
    855855        # use plain text extraction if tags failed (e.g. usable tag outside context) 
    856         print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n"  
     856        print {$self->{'outhandle'}} "HTMLImagePlugin: Fallback to plain-text extraction for $tag\n"  
    857857            if $self->{'verbosity'} > 2; 
    858858        $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>"; 
     
    986986            `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m; 
    987987        if (! ($width && $height)) {  
    988             print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 
     988            print STDERR "HTMLImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 
    989989        } 
    990990        $filesize = (-s $abspath); 
     
    999999           $imgs->{$filepath}{'filesize'} = $filesize; 
    10001000       } else { 
    1001            print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"  
     1001           print {$self->{'outhandle'}} "HTMLImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"  
    10021002               if $self->{'verbosity'} > 2; 
    10031003       }