Changeset 2025
- Timestamp:
- 2001-02-20T15:09:22+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify/phind.pm
r2008 r2025 155 155 # Parse classifier arguments 156 156 my $builddir = ""; 157 my $phinddir = "";158 157 if (!parsargv::parse(\@_, 159 158 q^text/.*/section:Title,section:text^, \$self->{'indexes'}, … … 185 184 } 186 185 $self->{'builddir'} = $builddir; 187 $self->{'phinddir'} = &util::filename_cat($builddir, "phind");188 186 189 187 return bless $self, $class; … … 201 199 202 200 # create phind directory 203 my $phinddir = $self->{'phinddir'}; 204 if (-e "$phinddir") { 205 &util::rm_r("$phinddir"); 201 my $phnumber = 1; 202 my $phinddir = &util::filename_cat($builddir, "phind1"); 203 while (-e "$phinddir") { 204 $phnumber++; 205 $phinddir = &util::filename_cat($builddir, "phind$phnumber"); 206 206 } 207 207 &util::mk_dir("$phinddir"); 208 $self->{'phinddir'} = $phinddir; 209 $self->{'phindnumber'} = $phnumber; 208 210 209 211 # open filehandles for documents and text 210 212 my $clausefile = &util::filename_cat("$phinddir", "clauses"); 211 213 &util::rm($clausefile) if (-e $clausefile); 212 open(TEXT, ">$clausefile") || die "Cannot open $clausefile: $!"; 213 $self->{'txthandle'} = TEXT; 214 215 my $txthandle = 'TEXT' . $phnumber; 216 open($txthandle, ">$clausefile") || die "Cannot open $clausefile: $!"; 217 $self->{'txthandle'} = $txthandle; 214 218 215 219 my $docfile = &util::filename_cat("$phinddir", "docs.txt"); 216 220 &util::rm($docfile) if (-e $docfile); 217 open(DOCS, ">$docfile") || die "Cannot open $docfile: $!"; 218 $self->{'dochandle'} = DOCS; 221 222 my $dochandle = 'DOC' . $phnumber; 223 open($dochandle, ">$docfile") || die "Cannot open $docfile: $!"; 224 $self->{'dochandle'} = $dochandle; 219 225 220 226 } … … 229 235 sub classify { 230 236 my $self = shift (@_); 231 my ($doc_obj) =@_;237 my $doc_obj = shift @_; 232 238 233 239 my $verbosity = $self->{'verbosity'}; … … 242 248 my $doclanguage = $doc_obj->get_metadata_element ($top_section, "Language"); 243 249 my $phrlanguage = $self->{'language_exp'}; 244 245 print STDERR "+ CLASSIFY - doclanguage: $doclanguage, phrlanguage $phrlanguage \n";246 247 250 return if ($doclanguage && ($doclanguage !~ /$phrlanguage/i)); 248 251 … … 274 277 ($level, $field) = split(/:/, $part); 275 278 die unless ($level && $field); 276 279 277 280 # Extract the text from every section 278 281 # (In phind, document:text and section:text are equivalent) … … 301 304 $section = $doc_obj->get_top_section(); 302 305 while (defined($section)) { 303 $dataref .= $doc_obj->get_metadata($section, $field);304 $data .= join("\n", $$dataref) . "\n";306 $dataref = $doc_obj->get_metadata($section, $field); 307 $data .= join("\n", @$dataref) . "\n"; 305 308 $section = $doc_obj->get_next_section($section); 306 309 } … … 314 317 315 318 } 316 319 317 320 # output the text 318 321 $text =~ tr/\n//s; … … 338 341 if ($verbosity) { 339 342 print $out "\n*** phind.pm generating indexes for ", $self->{'indexes'}, "\n"; 343 print $out "*** in", $self->{'phinddir'}, "\n"; 340 344 } 341 345 … … 354 358 355 359 # Create the phrase file and put phrase numbers in phind/phrases 356 print $out "\nSorting and Renumbering phrases for input to mgpp\n" if $verbosity;360 print $out "\nSorting and renumbering phrases for input to mgpp\n" if $verbosity; 357 361 &renumber_phrases($self); 358 362 … … 416 420 417 421 418 # Insert the classifier into.... what? 422 # Return the information about the classifier that we'll later want to 423 # use to create macros when the Phind classifier document is displayed. 424 my %classifyinfo = ('thistype'=>'Invisible', 425 'childtype'=>'Phind', 426 'Title'=>$self->{'buttonname'}, 427 'parameters'=>"phindnumber=$self->{'phindnumber'}", 428 'contains'=>[]); 429 419 430 my $collection = $self->{'collection'}; 420 431 my $url = "library?a=p&p=phind&c=$collection"; 421 422 my %classifyinfo = ('thistype'=>'Invisible',423 'childtype'=>'Phind',424 'Title'=>$self->{'buttonname'},425 'contains'=>[]);426 427 432 push (@{$classifyinfo{'contains'}}, {'OID'=>$url}); 433 428 434 return \%classifyinfo; 429 435 } … … 435 441 my ($language_exp, $text) = @_; 436 442 437 print STDERR "+ tokenising in $language_exp\n";438 439 443 if ($language_exp =~ /en/) { 440 444 return &convert_gml_to_tokens_EN($text);
Note:
See TracChangeset
for help on using the changeset viewer.