Changeset 2487 for trunk/gsdl/perllib/classify/phind.pm
- Timestamp:
- 2001-06-01T14:51:29+12:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify/phind.pm
r2481 r2487 84 84 85 85 86 %wanted_index_files = ('td'=>1, 87 't'=>1, 88 'ti'=>1, 89 'tl'=>1, 90 'tsd'=>1, 91 'idb'=>1, 92 'ib1'=>1, 93 'ib2'=>1, 94 'ib3'=>1, 95 'i'=>1, 96 'il'=>1, 97 'w'=>1, 98 'wa'=>1); 99 100 101 86 102 # Phrase delimiter symbols - these should be abstracted out someplace 87 103 … … 110 126 print $out "Checking Phind phrase browser requirements...\n"; 111 127 112 # Make sure we're not in windows113 if ($ENV{'GSDLOS'} =~ /windows/i) {114 print STDERR "Sorry - Phind currently only works under Unix";115 exit(1);116 }117 118 128 # Ensure the Phind generate scripts are in place 119 129 my $file1 = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "suffix"); 130 $file1 .= ".exe" if $ENV{'GSDLOS'} =~ /^windows$/; 120 131 my $src = &util::filename_cat($ENV{'GSDLHOME'}, "src", "phind", "generate"); 121 132 … … 130 141 # Ensure the Phind CGI script is in place 131 142 $file1 = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "phindcgi"); 143 $file1 .= ".exe" if $ENV{'GSDLOS'} =~ /^windows$/; 132 144 $src = &util::filename_cat($ENV{'GSDLHOME'}, "src", "phind", "host"); 133 145 … … 184 196 } 185 197 $self->{'builddir'} = $builddir; 198 199 $self->{'total'} = 0; 186 200 187 201 return bless $self, $class; … … 251 265 252 266 # record this file 253 my $total++;254 print "file $ total: $file\n" if ($self->{'$verbosity'});267 $self->{'total'} ++; 268 print "file $self->{'total'}: $file\n" if ($self->{'$verbosity'}); 255 269 256 270 … … 335 349 my $self = shift (@_); 336 350 351 close $self->{'dochandle'}; 337 352 close $self->{'txthandle'}; 338 353 my $verbosity = $self->{'verbosity'}; 339 354 my $out = $self->{'outhandle'}; 340 355 my $phinddir = $self->{'phinddir'}; 341 my $exe = &util::get_os_exe (); 356 357 my $osextra = ""; 358 if ($ENV{'GSDLOS'} !~ /^windows$/i) { 359 $osextra = " -d /"; 360 } 342 361 343 362 if ($verbosity) { … … 362 381 print $out "\nSorting and renumbering phrases for input to mgpp\n" if $verbosity; 363 382 &renumber_phrases($self); 364 365 # Create the mg phrase database366 383 367 384 print $out "\nCreating phrase databases\n"; 368 385 my $mg_input = &util::filename_cat($phinddir, "pdata.txt"); 369 my $mg_stem = "pdata";370 371 &execute("mgpp_passes $exe -d $phinddir-f $mg_stem -T1 $mg_input", $verbosity, $out);372 &execute("mgpp_compression_dict $exe -d $phinddir-f $mg_stem", $verbosity, $out);373 &execute("mgpp_passes $exe -d $phinddir-f $mg_stem -T2 $mg_input", $verbosity, $out);386 my $mg_stem = &util::filename_cat($phinddir, "pdata"); 387 388 &execute("mgpp_passes $osextra -f $mg_stem -T1 $mg_input", $verbosity, $out); 389 &execute("mgpp_compression_dict $osextra -f $mg_stem", $verbosity, $out); 390 &execute("mgpp_passes $osextra -f $mg_stem -T2 $mg_input", $verbosity, $out); 374 391 375 392 # create the mg index of words 376 393 print $out "\nCreating word-level search indexes\n"; 377 394 $mg_input = &util::filename_cat($phinddir, "pword.txt"); 378 $mg_stem = "pword";379 380 &execute("mgpp_passes $exe -d $phinddir-f $mg_stem -T1 -I1 $mg_input", $verbosity, $out);381 &execute("mgpp_compression_dict $exe -d $phinddir-f $mg_stem", $verbosity, $out);382 &execute("mgpp_perf_hash_build $exe -d $phinddir-f $mg_stem", $verbosity, $out);383 &execute("mgpp_passes $exe -d $phinddir-f $mg_stem -T2 -I2 $mg_input", $verbosity, $out);384 &execute("mgpp_weights_build $exe -d $phinddir-f $mg_stem", $verbosity, $out);385 &execute("mgpp_invf_dict $exe -d $phinddir-f $mg_stem", $verbosity, $out);386 387 &execute("mgpp_stem_idx $exe -d $phinddir-f $mg_stem -s 1", $verbosity, $out);388 &execute("mgpp_stem_idx $exe -d $phinddir-f $mg_stem -s 2", $verbosity, $out);389 &execute("mgpp_stem_idx $exe -d $phinddir-f $mg_stem -s 3", $verbosity, $out);395 $mg_stem = &util::filename_cat($phinddir, "pword"); 396 397 &execute("mgpp_passes $osextra -f $mg_stem -T1 -I1 $mg_input", $verbosity, $out); 398 &execute("mgpp_compression_dict $osextra -f $mg_stem", $verbosity, $out); 399 &execute("mgpp_perf_hash_build $osextra -f $mg_stem", $verbosity, $out); 400 &execute("mgpp_passes $osextra -f $mg_stem -T2 -I2 $mg_input", $verbosity, $out); 401 &execute("mgpp_weights_build $osextra -f $mg_stem", $verbosity, $out); 402 &execute("mgpp_invf_dict $osextra -f $mg_stem", $verbosity, $out); 403 404 &execute("mgpp_stem_idx $osextra -f $mg_stem -s 1", $verbosity, $out); 405 &execute("mgpp_stem_idx $osextra -f $mg_stem -s 2", $verbosity, $out); 406 &execute("mgpp_stem_idx $osextra -f $mg_stem -s 3", $verbosity, $out); 390 407 391 408 # create the mg document information database 392 409 print $out "\nCreating document information databases\n"; 393 410 $mg_input = &util::filename_cat($phinddir, "docs.txt"); 394 $mg_stem = "docs"; 395 396 &execute("mgpp_passes$exe -d $phinddir -f $mg_stem -T1 $mg_input", $verbosity, $out); 397 &execute("mgpp_compression_dict$exe -d $phinddir -f $mg_stem", $verbosity, $out); 398 &execute("mgpp_passes$exe -d $phinddir -f $mg_stem -T2 $mg_input", $verbosity, $out); 399 411 $mg_stem = &util::filename_cat($phinddir, "docs"); 412 413 &execute("mgpp_passes $osextra -f $mg_stem -T1 $mg_input", $verbosity, $out); 414 &execute("mgpp_compression_dict $osextra -f $mg_stem", $verbosity, $out); 415 &execute("mgpp_passes $osextra -f $mg_stem -T2 $mg_input", $verbosity, $out); 400 416 401 417 # Tidy up stray files 402 418 if (!$self->{'untidy'}) { 403 419 print $out "\nCleaning up\n" if ($verbosity > 2); 404 &util::rm("$phinddir/clauses", "$phinddir/clauses.numbers", 405 "$phinddir/clauses.vocab", "$phinddir/clauses.stats", 406 "$phinddir/phrases", "$phinddir/phrases.3", "$phinddir/docs.txt", 407 "$phinddir/pdata.txt", "$phinddir/pword.txt"); 408 my $outfile = 1; 409 while (-e "$phinddir/outPhrase.$outfile") { 410 &util::rm("$phinddir/outPhrase.$outfile"); 411 $outfile++; 412 } 413 } 414 420 opendir (DIR, $phinddir) || die; 421 my @files = readdir DIR; 422 closedir DIR; 423 424 foreach $file (@files) { 425 next if $file =~ /^\.\.?$/; 426 my ($suffix) = $file =~ /\.([^\.]+)$/; 427 if (!defined $suffix || !defined $wanted_index_files{$suffix}) { 428 # delete it! 429 print $out "deleting $file\n"; # if $verbosity > 2; 430 &util::rm (&util::filename_cat ($phinddir, $file)); 431 } 432 } 433 } 415 434 416 435 # Return the information about the classifier that we'll later want to … … 591 610 my ($command, $verbosity, $outhandle) = @_; 592 611 print $outhandle "Executing: $command\n" if ($verbosity > 2); 612 $! = 0; 593 613 my $status = system($command); 594 614 if ($status != 0) { 595 print STDERR "phind - Error executing $command: $!\n";615 print STDERR "phind - Error executing '$command': $!\n"; 596 616 exit($status); 597 617 } … … 1264 1284 $thesaurusdata{$phindid} = "$symbols:$linkcounter:$relations"; 1265 1285 } 1286 close TH; 1266 1287 1267 1288 # 6. … … 1346 1367 $symbol[$i++] = $_; 1347 1368 } 1348 1369 close V; 1349 1370 1350 1371 # 2.
Note:
See TracChangeset
for help on using the changeset viewer.