Changeset 17727 for gsdl/trunk
- Timestamp:
- 2008-11-06T11:35:21+13:00 (15 years ago)
- File:
-
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/HTMLImagePlugin.pm
r17723 r17727 1 1 ########################################################################### 2 2 # 3 # W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents3 # HTMLImagePlugin.pm -- Context-based image indexing plugin for HTML documents 4 4 # 5 5 # A component of the Greenstone digital library software … … 39 39 # collection builds at the import stage. 40 40 # 41 # W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also41 # HTMLImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also 42 42 # if required). It can be used in place of HTMLPlugin to index both 43 43 # pages and their images. … … 55 55 # ImageMagick can be downloaded from the website above. 56 56 # Make sure the system path includes the ImageMagick binaries 57 # before using W3ImagePlugin.57 # before using HTMLImagePlugin. 58 58 # 59 59 # NOTE: NT/2000/XP contain a filesystem utility 'convert.exe' … … 97 97 # ... 98 98 # 99 # plugin W3ImagePlugin -index_pages -aggressiveness 699 # plugin HTMLImagePlugin -index_pages -aggressiveness 6 100 100 # 101 101 # ... … … 110 110 # 111 111 112 package W3ImagePlugin;112 package HTMLImagePlugin; 113 113 114 114 use HTMLPlugin; … … 120 120 121 121 sub BEGIN { 122 @ W3ImagePlugin::ISA = qw( HTMLPlugin );122 @HTMLImagePlugin::ISA = qw( HTMLPlugin ); 123 123 } 124 124 … … 218 218 'reqd' => "no" } ]; 219 219 220 my $options = { 'name' => " W3ImagePlugin",220 my $options = { 'name' => "HTMLImagePlugin", 221 221 'desc' => "{W3ImagePlugin.desc}", 222 222 'abstract' => "no", … … 281 281 282 282 # get complex configuration options from configuration files 283 # -- $GSDLCOLLECTION/etc/ W3ImagePlugin.cfg (tag sets for aggr 2+)283 # -- $GSDLCOLLECTION/etc/HTMLImagePlugin.cfg (tag sets for aggr 2+) 284 284 # -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+) 285 285 286 # If there's no W3ImagePlugin.cfg file we'll use the following default values286 # If there's no HTMLImagePlugin.cfg file we'll use the following default values 287 287 my $defaultcfg = ' 288 288 <delimitertagset> … … 323 323 my ($filepath); 324 324 325 print {$self->{'outhandle'}} " W3ImagePlugin: Initialising\n"325 print {$self->{'outhandle'}} "HTMLImagePlugin: Initialising\n" 326 326 if $self->{'verbosity'} > 1; 327 # etc/ W3ImagePlugin.cfg (XML)327 # etc/HTMLImagePlugin.cfg (XML) 328 328 # tag sets for captions and neartext 329 329 if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) { … … 332 332 my ($cfg, @tagsets, $tagset, $type, @delims); 333 333 334 $filepath = "$collpath/etc/ W3ImagePlugin.cfg";334 $filepath = "$collpath/etc/HTMLImagePlugin.cfg"; 335 335 if ( open CFG, "<$filepath" ) { 336 336 while (<CFG>) { $cfg .= $_ } … … 354 354 # output a warning if there seem to be no delimiters 355 355 if ( scalar(@{$self->{'cdelims'}} == 0)) { 356 print {$self->{'outhandle'}} " W3ImagePlugin: Warning: no caption delimiters found in $filepath\n";356 print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no caption delimiters found in $filepath\n"; 357 357 } 358 358 if ( scalar(@{$self->{'delims'}} == 0)) { 359 print {$self->{'outhandle'}} " W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n";359 print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 360 360 } 361 361 } … … 373 373 close STOPWORDS; 374 374 } else { 375 print {$self->{'outhandle'}} " W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n";375 print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 376 376 } 377 377 … … 380 380 if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) { 381 381 $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33; 382 print {$self->{'outhandle'}} " W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";382 print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 383 383 } 384 384 if ( $self->{'caption_length'} > $self->{'max_near_text'} ) { 385 385 $self->{'max_near_text'} = $self->{'caption_length'} * 1.33; 386 print {$self->{'outhandle'}} " W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";386 print {$self->{'outhandle'}} "HTMLImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 387 387 } 388 388 … … 420 420 ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is ); 421 421 if (! defined($imgtag)) { $imgtag = $filepath } 422 print $outhandle " W3ImagePlugin: extracting $filepath\n"422 print $outhandle "HTMLImagePlugin: extracting $filepath\n" 423 423 if ( $self->{'verbosity'} > 1 ); 424 424 $doc_obj = new doc ("", "indexed_doc"); … … 434 434 return $numdocs; 435 435 } else { 436 print $outhandle " W3ImagePlugin: No images from $file indexed\n"436 print $outhandle "HTMLImagePlugin: No images from $file indexed\n" 437 437 if ( $self->{'verbosity'} > 2 ); 438 438 return 1; … … 473 473 `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp; 474 474 if ( ! (-e $thumbfp) ) { 475 print STDERR " W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;475 print STDERR "HTMLImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0; 476 476 } 477 477 … … 854 854 } elsif ( $bestlen[$best1] < $mintext ) { 855 855 # use plain text extraction if tags failed (e.g. usable tag outside context) 856 print {$self->{'outhandle'}} " W3ImagePlugin: Fallback to plain-text extraction for $tag\n"856 print {$self->{'outhandle'}} "HTMLImagePlugin: Fallback to plain-text extraction for $tag\n" 857 857 if $self->{'verbosity'} > 2; 858 858 $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>"; … … 986 986 `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m; 987 987 if (! ($width && $height)) { 988 print STDERR " W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;988 print STDERR "HTMLImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 989 989 } 990 990 $filesize = (-s $abspath); … … 999 999 $imgs->{$filepath}{'filesize'} = $filesize; 1000 1000 } else { 1001 print {$self->{'outhandle'}} " W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"1001 print {$self->{'outhandle'}} "HTMLImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n" 1002 1002 if $self->{'verbosity'} > 2; 1003 1003 }
Note:
See TracChangeset
for help on using the changeset viewer.