Changeset 16644

Show
Ignore:
Timestamp:
04.08.2008 12:48:59 (11 years ago)
Author:
kjdon
Message:

now uses CJKTextSegmenter to add segmentation functionality to text documents

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/AutoExtractMetadata.pm

    r16011 r16644  
    4040use DateExtractor; 
    4141use GISExtractor; 
     42use CJKTextSegmenter; 
     43 
    4244 
    4345sub BEGIN { 
    44     @AutoExtractMetadata::ISA = ( 'BasePlugin', 'AcronymExtractor', 'KeyphraseExtractor', 'EmailAddressExtractor', 'DateExtractor', 'GISExtractor' ); 
     46    @AutoExtractMetadata::ISA = ( 'BasePlugin', 'AcronymExtractor', 'KeyphraseExtractor', 'EmailAddressExtractor', 'DateExtractor', 'CJKTextSegmenter','GISExtractor' ); 
    4547} 
    4648 
     
    7678    new DateExtractor($pluginlist, $inputargs, $hashArgOptLists); 
    7779    new GISExtractor($pluginlist, $inputargs, $hashArgOptLists); 
    78   
     80    new CJKTextSegmenter($pluginlist, $inputargs, $hashArgOptLists); 
    7981    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    8082 
     
    119121    $self->extract_date_metadata($doc_obj); 
    120122    $self->extract_gis_metadata($doc_obj); 
     123    $self->separate_cjk_text($doc_obj); 
    121124 
    122125}