Ignore:
Timestamp:
2008-11-06T11:35:21+13:00 (15 years ago)
Author:
kjdon
Message:

W3ImagePlugin renamed to HTMLImagePlugin

File:
1 moved

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/HTMLImagePlugin.pm

    r17723 r17727  
    11###########################################################################
    22#
    3 # W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents
     3# HTMLImagePlugin.pm -- Context-based image indexing plugin for HTML documents
    44#
    55# A component of the Greenstone digital library software
     
    3939#  collection builds at the import stage.
    4040#
    41 W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also
     41HTMLImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also
    4242#  if required). It can be used in place of HTMLPlugin to index both
    4343#  pages and their images.
     
    5555#    ImageMagick can be downloaded from the website above.
    5656#    Make sure the system path includes the ImageMagick binaries
    57 #    before using W3ImagePlugin.
     57#    before using HTMLImagePlugin.
    5858#
    5959#    NOTE: NT/2000/XP contain a filesystem utility 'convert.exe'
     
    9797#   ...
    9898#
    99 #   plugin W3ImagePlugin -index_pages -aggressiveness 6
     99#   plugin HTMLImagePlugin -index_pages -aggressiveness 6
    100100#
    101101#   ...
     
    110110#
    111111 
    112 package W3ImagePlugin;
     112package HTMLImagePlugin;
    113113
    114114use HTMLPlugin;
     
    120120
    121121sub BEGIN {
    122     @W3ImagePlugin::ISA = qw( HTMLPlugin );
     122    @HTMLImagePlugin::ISA = qw( HTMLPlugin );
    123123}
    124124
     
    218218    'reqd' => "no" } ];
    219219
    220 my $options = { 'name'     => "W3ImagePlugin",
     220my $options = { 'name'     => "HTMLImagePlugin",
    221221        'desc'     => "{W3ImagePlugin.desc}",
    222222        'abstract' => "no",
     
    281281
    282282# get complex configuration options from configuration files
    283 # -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+)
     283# -- $GSDLCOLLECTION/etc/HTMLImagePlugin.cfg (tag sets for aggr 2+)
    284284# -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+)
    285285
    286 # If there's no W3ImagePlugin.cfg file we'll use the following default values
     286# If there's no HTMLImagePlugin.cfg file we'll use the following default values
    287287my $defaultcfg = '
    288288<delimitertagset>
     
    323323    my ($filepath);
    324324
    325     print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n"
     325    print {$self->{'outhandle'}} "HTMLImagePlugin: Initialising\n"
    326326    if $self->{'verbosity'} > 1;
    327     # etc/W3ImagePlugin.cfg (XML)
     327    # etc/HTMLImagePlugin.cfg (XML)
    328328    # tag sets for captions and neartext
    329329    if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) {
     
    332332    my ($cfg, @tagsets, $tagset, $type, @delims);
    333333
    334     $filepath = "$collpath/etc/W3ImagePlugin.cfg";
     334    $filepath = "$collpath/etc/HTMLImagePlugin.cfg";
    335335    if ( open CFG, "<$filepath" ) {
    336336        while (<CFG>) { $cfg .= $_ }
     
    354354    # output a warning if there seem to be no delimiters
    355355    if ( scalar(@{$self->{'cdelims'}} == 0)) {
    356         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n";
     356        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no caption delimiters found in $filepath\n";
    357357    }
    358358    if ( scalar(@{$self->{'delims'}} == 0)) {
    359         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n";
     359        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no neartext delimiters found in $filepath\n";
    360360    }
    361361    }
     
    373373        close STOPWORDS;
    374374    } else {
    375         print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n";
     375        print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n";
    376376    }
    377377   
     
    380380    if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) {
    381381    $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33;
    382     print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
     382    print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    383383    }
    384384    if ( $self->{'caption_length'} > $self->{'max_near_text'} ) {
    385385    $self->{'max_near_text'} = $self->{'caption_length'} * 1.33;
    386     print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
     386    print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    387387    }
    388388
     
    420420        ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is );
    421421        if (! defined($imgtag)) { $imgtag = $filepath }
    422         print $outhandle "W3ImagePlugin: extracting $filepath\n"
     422        print $outhandle "HTMLImagePlugin: extracting $filepath\n"
    423423        if ( $self->{'verbosity'} > 1 );
    424424        $doc_obj = new doc ("", "indexed_doc");
     
    434434    return $numdocs;
    435435    } else {
    436     print $outhandle "W3ImagePlugin: No images from $file indexed\n"
     436    print $outhandle "HTMLImagePlugin: No images from $file indexed\n"
    437437        if ( $self->{'verbosity'} > 2 );
    438438    return 1;
     
    473473    `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp;
    474474    if ( ! (-e $thumbfp) ) {
    475     print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
     475    print STDERR "HTMLImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
    476476    }
    477477   
     
    854854    } elsif ( $bestlen[$best1] < $mintext ) {
    855855    # use plain text extraction if tags failed (e.g. usable tag outside context)
    856     print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n"
     856    print {$self->{'outhandle'}} "HTMLImagePlugin: Fallback to plain-text extraction for $tag\n"
    857857        if $self->{'verbosity'} > 2;
    858858    $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>";
     
    986986        `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m;
    987987    if (! ($width && $height)) {
    988         print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
     988        print STDERR "HTMLImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
    989989    }
    990990    $filesize = (-s $abspath);
     
    999999       $imgs->{$filepath}{'filesize'} = $filesize;
    10001000       } else {
    1001        print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
     1001       print {$self->{'outhandle'}} "HTMLImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
    10021002           if $self->{'verbosity'} > 2;
    10031003       }
Note: See TracChangeset for help on using the changeset viewer.