Changeset 2336
- Timestamp:
- 2001-04-24T16:36:21+12:00 (23 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/buildcol.pl
r1970 r2336 58 58 print STDERR " -keepold will not destroy the current contents of the\n"; 59 59 print STDERR " building directory\n"; 60 print STDERR " -no_text Don't store compressed text. This option is\n"; 61 print STDERR " useful for minimizing the size of the built\n"; 62 print STDERR " indexes if you intend always to display the\n"; 63 print STDERR " original documents at run time (i.e. you won't\n"; 64 print STDERR " be able to retrieve the compressed text version)\n"; 60 65 print STDERR " -allclassifications Don't remove empty classifications\n"; 61 66 print STDERR " -create_images Attempt to create default images for new\n"; … … 79 84 $debug, $mode, $indexname, $keepold, $allclassifications, 80 85 $create_images, $collectdir, $out, $buildtype, $textindex, 81 $no_strip_html );86 $no_strip_html, $no_text); 82 87 if (!parsargv::parse(\@ARGV, 83 88 'verbosity/\d+/2', \$verbosity, … … 89 94 'mode/^(all|compress_text|build_index|infodb)$/all', \$mode, 90 95 'index/.*/', \$indexname, 96 'no_text', \$no_text, 91 97 'keepold', \$keepold, 92 98 'allclassifications', \$allclassifications, … … 136 142 $builddir = $collectcfg->{'builddir'}; 137 143 } 144 if (defined $collectcfg->{'collectdir'} && $collectdir eq "") { 145 $collectdir = $collectcfg->{'collectdir'}; 146 } 147 if (defined $collectcfg->{'no_text'} && $no_text == 0) { 148 if ($collectcfg->{'no_text'} =~ /^true$/) { 149 $no_text = 1; 150 } 151 } 152 if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) { 153 if ($collectcfg->{'allclassifications'} =~ /^true$/) { 154 $allclassifications = 1; 155 } 156 } 138 157 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) { 139 158 $textindex = $collectcfg->{'textcompress'}; … … 219 238 eval("\$builder = new $buildertype(\$collection, " . 220 239 "\$realarchivedir, \$realbuilddir, \$verbosity, " . 221 "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)"); 240 "\$maxdocs, \$debug, \$keepold, \$allclassifications, " . 241 "\$out, \$no_text)"); 222 242 die "$@" if $@; 223 243 -
trunk/gsdl/perllib/colcfg.pm
r1851 r2336 66 66 return &cfgread::read_cfg_file ($filename, 67 67 q/^(creator|public|beta|defaultindex|importdir|/ . 68 q/archivedir|cachedir|builddir|removeold|textcompress|buildtype)$/, 68 q/archivedir|cachedir|builddir|removeold|/ . 69 q/textcompress|buildtype|collectdir|no_text|allclassifications)$/, 69 70 q/(maintainer|languages|indexsubcollections|/ . 70 71 q/indexes|dontbuild|dontgdbm|mirror|phind|levels)$/, … … 78 79 &cfgread::write_cfg_file($filename, $data, 79 80 q/^(creator|public|beta|defaultindex|importdir|/ . 80 q/archivedir|cachedir|builddir|removeold|textcompress|buildtype)$/, 81 q/archivedir|cachedir|builddir|removeold|/ . 82 q/textcompress|buildtype|collectdir|no_text|allclassifications)$/, 81 83 q/^(maintainer|languages|indexsubcollections|/ . 82 84 q/indexes|dontbuild|dontgdbm|levels)$/, -
trunk/gsdl/perllib/mgbuilder.pm
r1973 r2336 61 61 sub new { 62 62 my ($class, $collection, $source_dir, $build_dir, $verbosity, 63 $maxdocs, $debug, $keepold, $allclassifications, $outhandle) = @_; 63 $maxdocs, $debug, $keepold, $allclassifications, 64 $outhandle, $no_text) = @_; 64 65 65 66 $outhandle = STDERR unless defined $outhandle; 67 $no_text = 0 unless defined $no_text; 66 68 67 69 # create an mgbuilder object … … 75 77 'allclassifications'=>$allclassifications, 76 78 'outhandle'=>$outhandle, 79 'no_text'=>$no_text, 77 80 'notbuilt'=>[] # indexes not built 78 81 }, $class; … … 219 222 } else { 220 223 if (!-e "$mg_passes_exe" || 221 # !open (PIPEOUT, "| \"$mg_passes_exe\" -f \"$fulltextprefix\" -b $maxdocsize -T1 $osextra")) {222 224 !open (PIPEOUT, "| mg_passes$exe -f \"$fulltextprefix\" -b $maxdocsize -T1 $osextra")) { 223 225 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n"; … … 230 232 $self->{'buildproc'}->set_index ($textindex); 231 233 $self->{'buildproc'}->set_indexing_text (0); 234 if ($self->{'no_text'}) { 235 $self->{'buildproc'}->set_store_text(0); 236 } else { 237 $self->{'buildproc'}->set_store_text(1); 238 } 232 239 $self->{'buildproc'}->reset(); 233 240 &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'}, … … 251 258 die "mgbuilder::compress_text - couldn't run $mg_compression_dict_exe\n"; 252 259 } 253 # system ("\"$mg_compression_dict_exe\" -f \"$fulltextprefix\" -S -H -2 -k 5120 $osextra");254 260 system ("mg_compression_dict$exe -f \"$fulltextprefix\" -S -H -2 -k 5120 $osextra"); 255 261 256 262 # -b $maxdocsize sets the maximum document size to be 12 meg 257 263 if (!-e "$mg_passes_exe" || 258 # !open ($handle, "| \"$mg_passes_exe\" -f \"$fulltextprefix\" -b $maxdocsize -T2 $osextra")) {259 264 !open ($handle, "| mg_passes$exe -f \"$fulltextprefix\" -b $maxdocsize -T2 $osextra")) { 260 265 die "mgbuilder::compress_text - couldn't run $mg_passes_exe\n"; … … 526 531 } else { 527 532 if (!-e "$mg_passes_exe" || 528 # !open (PIPEOUT, "| \"$mg_passes_exe\" -f \"$fullindexprefix\" -b $maxdocsize " .529 533 !open (PIPEOUT, "| mg_passes$exe -f \"$fullindexprefix\" -b $maxdocsize " . 530 534 "-$index_level -m 32 -s 0 -G -t 10 -N1 $osextra")) { … … 539 543 $self->{'buildproc'}->set_index ($index, $indexexparr); 540 544 $self->{'buildproc'}->set_indexing_text (1); 545 $self->{'buildproc'}->set_store_text(1); 541 546 542 547 $self->{'buildproc'}->reset(); … … 552 557 die "mgbuilder::build_index - couldn't run $mg_perf_hash_build_exe\n"; 553 558 } 554 # system ("\"$mg_perf_hash_build_exe\" -f \"$fullindexprefix\" $osextra");555 559 system ("mg_perf_hash_build$exe -f \"$fullindexprefix\" $osextra"); 556 560 557 561 if (!-e "$mg_passes_exe" || 558 # !open ($handle, "| \"$mg_passes_exe\" -f \"$fullindexprefix\" -b $maxdocsize " .559 562 !open ($handle, "| mg_passes$exe -f \"$fullindexprefix\" -b $maxdocsize " . 560 563 "-$index_level -c 3 -G -t 10 -N2 $osextra")) { … … 581 584 die "mgbuilder::build_index - couldn't run $mg_weights_build_exe\n"; 582 585 } 583 # system ("\"$mg_weights_build_exe\" -f \"$fullindexprefix\" -t \"$fulltextprefix\" $osextra");584 586 system ("mg_weights_build$exe -f \"$fullindexprefix\" -t \"$fulltextprefix\" $osextra"); 585 587 … … 589 591 die "mgbuilder::build_index - couldn't run $mg_invf_dict_exe\n"; 590 592 } 591 # system ("\"$mg_invf_dict_exe\" -f \"$fullindexprefix\" $osextra");592 593 system ("mg_invf_dict$exe -f \"$fullindexprefix\" $osextra"); 593 594 … … 598 599 die "mgbuilder::build_index - couldn't run $mg_stem_idx_exe\n"; 599 600 } 600 # system ("\"$mg_stem_idx_exe\" -b 4096 -s1 -f \"$fullindexprefix\" $osextra");601 601 system ("mg_stem_idx$exe -b 4096 -s1 -f \"$fullindexprefix\" $osextra"); 602 # system ("\"$mg_stem_idx_exe\" -b 4096 -s2 -f \"$fullindexprefix\" $osextra");603 602 system ("mg_stem_idx$exe -b 4096 -s2 -f \"$fullindexprefix\" $osextra"); 604 # system ("\"$mg_stem_idx_exe\" -b 4096 -s3 -f \"$fullindexprefix\" $osextra");605 603 system ("mg_stem_idx$exe -b 4096 -s3 -f \"$fullindexprefix\" $osextra"); 606 607 604 608 605 # remove unwanted files … … 653 650 $handle = STDOUT; 654 651 } else { 655 # if (!-e "$txt2db_exe" || !open (PIPEOUT, "| \"$txt2db_exe\" \"$fulldbname\"")) {656 652 if (!-e "$txt2db_exe" || !open (PIPEOUT, "| txt2db$exe \"$fulldbname\"")) { 657 653 die "mgbuilder::make_infodatabase - couldn't run $txt2db_exe\n"; … … 666 662 $self->{'buildproc'}->set_classifiers ($self->{'classifiers'}); 667 663 $self->{'buildproc'}->set_indexing_text (0); 664 $self->{'buildproc'}->set_store_text(1); 668 665 $self->{'buildproc'}->reset(); 669 666 … … 731 728 my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe"); 732 729 my $input_file = &util::filename_cat ("text", $self->{'collection'}); 733 # if (!-e "$mgstat_exe" || !open (PIPEIN, "\"$mgstat_exe\" -d \"$self->{'build_dir'}\" -f \"$input_file\" |")) {734 730 if (!-e "$mgstat_exe" || !open (PIPEIN, "mgstat$exe -d \"$self->{'build_dir'}\" -f \"$input_file\" |")) { 735 731 print $outhandle "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n"; … … 797 793 print $outhandle "Total bytes in $index: $num_processed_bytes\n"; 798 794 799 if ($num_processed_bytes < 50 ) {795 if ($num_processed_bytes < 50 && ($indexing_text || !$self->{'no_text'})) { 800 796 print $outhandle "***************\n"; 801 print $outhandle "WARNING: There is very little or no text to process for $index\n";802 797 if ($indexing_text) { 803 print $outhandle "This may cause an error while attempting to build the index\n"; 804 } else { 805 print $outhandle "This may cause an error while attempting to compress the text\n"; 806 } 798 print $outhandle "WARNING: There is very little or no text to process for $index\n"; 799 } elsif (!$self->{'no_text'}) { 800 print $outhandle "WARNING: There is very little or no text to compress\n"; 801 } 802 print $outhandle " Was this your intention?\n"; 807 803 print $outhandle "***************\n"; 808 804 } … … 810 806 811 807 1; 812 813 -
trunk/gsdl/perllib/mgbuildproc.pm
r1424 r2336 65 65 $self->{'num_bytes'} = 0; 66 66 $self->{'num_processed_bytes'} = 0; 67 $self->{'store_text'} = 1; 67 68 $self->{'outhandle'} = $outhandle; 68 69 … … 170 171 return $self->{'indexing_text'}; 171 172 } 173 174 sub set_store_text { 175 my $self = shift (@_); 176 my ($store_text) = @_; 177 178 $self->{'store_text'} = $store_text; 179 } 180 172 181 173 182 sub process { … … 446 455 my $new_text = ""; 447 456 if ($real_field eq "text") { 448 $new_text = $doc_obj->get_text ($section) ;457 $new_text = $doc_obj->get_text ($section) if $self->{'store_text'}; 449 458 $self->{'num_processed_bytes'} += length ($new_text); 450 459 $new_text =~ s/[\cB\cC]//g; … … 457 466 $self->{'num_processed_bytes'} += length ($meta); 458 467 $new_text .= "\cC" unless $first; 459 $new_text .= $meta ;468 $new_text .= $meta if $self->{'store_text'}; 460 469 $first = 0; 461 470 }
Note:
See TracChangeset
for help on using the changeset viewer.