Changeset 17727

Show
Ignore:
Timestamp:
06.11.2008 11:35:21 (11 years ago)
Author:
kjdon
Message:

W3ImagePlugin renamed to HTMLImagePlugin

Files:
1 moved

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/HTMLImagePlugin.pm

    r16392 r17727  
    11########################################################################### 
    22# 
    3 # W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents 
     3# HTMLImagePlugin.pm -- Context-based image indexing plugin for HTML documents 
    44# 
    55# A component of the Greenstone digital library software 
     
    3939#  collection builds at the import stage. 
    4040# 
    41 W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also  
     41HTMLImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also  
    4242#  if required). It can be used in place of HTMLPlugin to index both 
    4343#  pages and their images. 
     
    5555#    ImageMagick can be downloaded from the website above. 
    5656#    Make sure the system path includes the ImageMagick binaries 
    57 #    before using W3ImagePlugin. 
     57#    before using HTMLImagePlugin. 
    5858# 
    5959#    NOTE: NT/2000/XP contain a filesystem utility 'convert.exe'  
     
    9797#   ... 
    9898# 
    99 #   plugin W3ImagePlugin -index_pages -aggressiveness 6 
     99#   plugin HTMLImagePlugin -index_pages -aggressiveness 6 
    100100# 
    101101#   ... 
     
    110110# 
    111111  
    112 package W3ImagePlugin; 
     112package HTMLImagePlugin; 
    113113 
    114114use HTMLPlugin; 
     
    120120 
    121121sub BEGIN { 
    122     @W3ImagePlugin::ISA = qw( HTMLPlugin ); 
     122    @HTMLImagePlugin::ISA = qw( HTMLPlugin ); 
    123123} 
    124124 
     
    218218    'reqd' => "no" } ]; 
    219219 
    220 my $options = { 'name'     => "W3ImagePlugin", 
     220my $options = { 'name'     => "HTMLImagePlugin", 
    221221        'desc'     => "{W3ImagePlugin.desc}", 
    222222        'abstract' => "no", 
     
    281281 
    282282# get complex configuration options from configuration files 
    283 # -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+) 
     283# -- $GSDLCOLLECTION/etc/HTMLImagePlugin.cfg (tag sets for aggr 2+) 
    284284# -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+) 
    285285 
    286 # If there's no W3ImagePlugin.cfg file we'll use the following default values 
     286# If there's no HTMLImagePlugin.cfg file we'll use the following default values 
    287287my $defaultcfg = ' 
    288288<delimitertagset> 
     
    323323    my ($filepath); 
    324324 
    325     print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n" 
     325    print {$self->{'outhandle'}} "HTMLImagePlugin: Initialising\n" 
    326326    if $self->{'verbosity'} > 1; 
    327     # etc/W3ImagePlugin.cfg (XML) 
     327    # etc/HTMLImagePlugin.cfg (XML) 
    328328    # tag sets for captions and neartext 
    329329    if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) { 
     
    332332    my ($cfg, @tagsets, $tagset, $type, @delims); 
    333333 
    334     $filepath = "$collpath/etc/W3ImagePlugin.cfg"; 
     334    $filepath = "$collpath/etc/HTMLImagePlugin.cfg"; 
    335335    if ( open CFG, "<$filepath" ) { 
    336336        while (<CFG>) { $cfg .= $_ } 
     
    354354    # output a warning if there seem to be no delimiters 
    355355    if ( scalar(@{$self->{'cdelims'}} == 0)) { 
    356         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n"; 
     356        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no caption delimiters found in $filepath\n"; 
    357357    } 
    358358    if ( scalar(@{$self->{'delims'}} == 0)) { 
    359         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 
     359        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 
    360360    } 
    361361    } 
     
    373373        close STOPWORDS; 
    374374    } else { 
    375         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 
     375        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 
    376376    } 
    377377     
     
    380380    if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) { 
    381381    $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33; 
    382     print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
     382    print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
    383383    }  
    384384    if ( $self->{'caption_length'} > $self->{'max_near_text'} ) { 
    385385    $self->{'max_near_text'} = $self->{'caption_length'} * 1.33; 
    386     print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
     386    print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 
    387387    } 
    388388 
     
    420420        ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is ); 
    421421        if (! defined($imgtag)) { $imgtag = $filepath } 
    422         print $outhandle "W3ImagePlugin: extracting $filepath\n" 
     422        print $outhandle "HTMLImagePlugin: extracting $filepath\n" 
    423423        if ( $self->{'verbosity'} > 1 ); 
    424424        $doc_obj = new doc ("", "indexed_doc"); 
     
    434434    return $numdocs; 
    435435    } else { 
    436     print $outhandle "W3ImagePlugin: No images from $file indexed\n" 
     436    print $outhandle "HTMLImagePlugin: No images from $file indexed\n" 
    437437        if ( $self->{'verbosity'} > 2 ); 
    438438    return 1; 
     
    473473    `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp; 
    474474    if ( ! (-e $thumbfp) ) { 
    475     print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;  
     475    print STDERR "HTMLImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;  
    476476    } 
    477477     
     
    854854    } elsif ( $bestlen[$best1] < $mintext ) { 
    855855    # use plain text extraction if tags failed (e.g. usable tag outside context) 
    856     print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n"  
     856    print {$self->{'outhandle'}} "HTMLImagePlugin: Fallback to plain-text extraction for $tag\n"  
    857857        if $self->{'verbosity'} > 2; 
    858858    $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>"; 
     
    986986        `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m; 
    987987    if (! ($width && $height)) {  
    988         print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 
     988        print STDERR "HTMLImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 
    989989    } 
    990990    $filesize = (-s $abspath); 
     
    999999       $imgs->{$filepath}{'filesize'} = $filesize; 
    10001000       } else { 
    1001        print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"  
     1001       print {$self->{'outhandle'}} "HTMLImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"  
    10021002           if $self->{'verbosity'} > 2; 
    10031003       }