Changeset 16642

Show
Ignore:
Timestamp:
04.08.2008 12:47:36 (11 years ago)
Author:
kjdon
Message:

separate_cjk option and code moved to CJKTextSegmenter, and used by AutoExtractMetadata?

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ReadTextFile.pm

    r16555 r16642  
    3434use encodings; 
    3535use unicode; 
    36 use cnseg; 
    3736use textcat; 
    3837use doc; 
    39 eval "require diagnostics"; # some perl distros (eg mac) don't have this 
    4038use ghtml; 
    4139use gsprintf 'gsprintf'; 
     
    7371    'type' => "string", 
    7472    'deft' => "en", 
    75     'reqd' => "no" }, 
    76      { 'name' => "separate_cjk", 
    77     'desc' => "{ReadTextFile.separate_cjk}", 
    78     'type' => "flag", 
    79     'reqd' => "no"}, 
    80  ]; 
     73    'reqd' => "no" } 
     74      ]; 
    8175 
    8276 
     
    226220    $reader->set_encoding ($encoding); 
    227221    $reader->read_file ($textref); 
    228         #Now segments chinese if the separate_cjk option is set 
    229     if ($self->{'separate_cjk'}) { 
    230         # segment the Chinese words 
    231         $$textref = &cnseg::segment($$textref); 
    232     } 
    233222    } 
    234223    close FILE;