Changeset 13589 for trunk/gsdl/perllib/lucenebuilder.pm
- Timestamp:
- 2007-01-12T14:11:37+13:00 (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/lucenebuilder.pm
r12974 r13589 70 70 if ($exe eq ".exe") 71 71 { 72 72 $self->{'full_lucene_passes_exe'} = "perl$exe \"$lucene_passes_script\""; 73 73 } 74 74 else 75 75 { 76 76 $self->{'full_lucene_passes_exe'} = "perl -S \"$lucene_passes_script\""; 77 77 } 78 78 … … 99 99 # this writes a nice version of the text docs 100 100 sub compress_text 101 101 { 102 102 my $self = shift (@_); 103 103 # we don't do anything if we don't want compressed text … … 114 114 my $osextra = ""; 115 115 if ($ENV{'GSDLOS'} =~ /^windows$/i) 116 117 118 116 { 117 $text_dir =~ s@/@\\@g; 118 } 119 119 else 120 121 122 123 124 125 126 120 { 121 if ($outhandle ne "STDERR") 122 { 123 # so lucene_passes doesn't print to stderr if we redirect output 124 $osextra .= " 2>/dev/null"; 125 } 126 } 127 127 128 128 # get any os specific stuff … … 138 138 139 139 if ($self->{'debug'}) 140 141 142 140 { 141 $handle = STDOUT; 142 } 143 143 else 144 144 { 145 145 print STDERR "Full Path: $full_lucene_passes\n"; 146 146 print STDERR "Executable: $full_lucene_passes_exe\n"; … … 148 148 print STDERR "Build Dir: $build_dir\n"; 149 149 print STDERR "Cmd: $full_lucene_passes_exe text $lucene_passes_sections \"$build_dir\" \"dummy\" $osextra\n"; 150 151 152 153 154 155 156 157 150 if (!-e "$full_lucene_passes" || 151 !open (PIPEOUT, "| $full_lucene_passes_exe text $lucene_passes_sections \"$build_dir\" \"dummy\" $osextra")) 152 { 153 print STDERR "<FatalError name='NoRunLucenePasses'/>\n</Stage>\n" if $self->{'gli'}; 154 die "lucenebuilder::build_index - couldn't run $full_lucene_passes_exe\n"; 155 } 156 $handle = lucenebuilder::PIPEOUT; 157 } 158 158 my $levels = $self->{'levels'}; 159 159 my $gdbm_level = "document"; 160 160 if ($levels->{'section'}) 161 162 163 161 { 162 $gdbm_level = "section"; 163 } 164 164 165 165 undef $levels->{'paragraph'}; # get rid of para if we had it. … … 174 174 $self->{'buildproc'}->reset(); 175 175 &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'}, 176 176 $self->{'buildproc'}, $self->{'maxdocs'}); 177 177 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 178 178 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'}); 179 179 &plugin::end($self->{'pluginfo'}); 180 180 close ($handle) unless $self->{'debug'}; … … 183 183 184 184 print STDERR "</Stage>\n" if $self->{'gli'}; 185 185 } 186 186 187 187 sub build_indexes { … … 192 192 my $indexes = []; 193 193 if (defined $indexname && $indexname =~ /\w/) { 194 194 push @$indexes, $indexname; 195 195 } else { 196 196 $indexes = $self->{'collect_cfg'}->{'indexes'}; 197 197 } 198 198 … … 203 203 # build each of the indexes 204 204 foreach $index (@$indexes) { 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 205 if ($self->want_built($index)) { 206 207 my $idx = $self->{'index_mapping'}->{$index}; 208 foreach my $level (keys %{$self->{'levels'}}) { 209 next if $level =~ /paragraph/; # we don't do para indexing 210 my ($pindex) = $level =~ /^(.)/; 211 # should probably check that new name with level 212 # is unique ... but currently (with doc sec and para) 213 # each has unique first letter. 214 $self->{'index_mapping'}->{$index} = $pindex.$idx; 215 216 my $llevel = $mgppbuilder::level_map{$level}; 217 print $outhandle "\n*** building index $index at level $llevel in subdirectory " . 218 "$self->{'index_mapping'}->{$index}\n" if ($self->{'verbosity'} >= 1); 219 print STDERR "<Stage name='Index' source='$index' level=$llevel>\n" if $self->{'gli'}; 220 221 $self->build_index($index,$llevel); 222 } 223 $self->{'index_mapping'}->{$index} = $idx; 224 225 } else { 226 print $outhandle "\n*** ignoring index $index\n" if ($self->{'verbosity'} >= 1); 227 } 228 228 } 229 229 … … 240 240 # */ 241 241 sub remove_document_from_database 242 242 { 243 243 my ($self, $oid) = @_; 244 244 # Find the perl script to call to run lucene … … 246 246 # Call lucene_passes.pl with -remove and the document ID on the command line 247 247 `$full_lucene_passes_exe -remove "$oid"`; 248 248 } 249 249 # /** remove_document_from_database **/ 250 250 … … 275 275 my $osextra = ""; 276 276 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 277 277 $build_dir =~ s@/@\\@g; 278 278 } else { 279 280 281 282 279 if ($outhandle ne "STDERR") { 280 # so lucene_passes doesn't print to stderr if we redirect output 281 $osextra .= " 2>/dev/null"; 282 } 283 283 } 284 284 … … 294 294 295 295 foreach $subcollection (@subcollections) { 296 297 298 296 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) { 297 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection}); 298 } 299 299 } 300 300 … … 305 305 my $language_metadata = "Language"; 306 306 if (defined ($self->{'collect_cfg'}->{'language_metadata'})) { 307 307 $language_metadata = $self->{'collect_cfg'}->{'language_metadata'}; 308 308 } 309 309 @languages = split /,/, $language if (defined $language); 310 310 foreach my $language (@languages) { 311 312 313 314 315 316 317 318 319 311 my $not=0; 312 if ($language =~ s/^\!//) { 313 $not = 1; 314 } 315 if($not) { 316 push (@$langarr, "!$language"); 317 } else { 318 push (@$langarr, "$language"); 319 } 320 320 } 321 321 … … 326 326 327 327 if ($self->{'debug'}) { 328 328 $handle = STDOUT; 329 329 } else { 330 331 332 333 334 335 336 330 print STDERR "Cmd: $full_lucene_passes_exe $opt_create_index index $lucene_passes_sections \"$build_dir\" \"$indexdir\" $osextra\n"; 331 if (!-e "$full_lucene_passes" || 332 !open (PIPEOUT, "| $full_lucene_passes_exe $opt_create_index index $lucene_passes_sections \"$build_dir\" \"$indexdir\" $osextra")) { 333 print STDERR "<FatalError name='NoRunLucenePasses'/>\n</Stage>\n" if $self->{'gli'}; 334 die "lucenebuilder::build_index - couldn't run $lucene_passes_exe\n"; 335 } 336 $handle = lucenebuilder::PIPEOUT; 337 337 } 338 338 … … 340 340 my $gdbm_level = "document"; 341 341 if ($store_levels->{'section'}) { 342 342 $gdbm_level = "section"; 343 343 } 344 344 345 345 my $dom_level = ""; 346 346 foreach my $key (keys %$store_levels) { 347 348 349 347 if ($mgppbuilder::level_map{$key} eq $llevel) { 348 $dom_level = $key; 349 } 350 350 } 351 351 if ($dom_level eq "") { 352 353 352 print STDERR "Warning: unrecognized tag level $llevel\n"; 353 $dom_level = "document"; 354 354 } 355 355 … … 367 367 $self->{'buildproc'}->reset(); 368 368 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 369 369 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'}); 370 370 close ($handle) unless $self->{'debug'}; 371 371 … … 383 383 # */ 384 384 sub make_infodatabase 385 385 { 386 386 my $self = shift (@_); 387 387 my $outhandle = $self->{'outhandle'}; … … 397 397 # called once to generate it. 398 398 if (!$self->{'incremental_dlc'} || !(-e $infodb_file)) 399 399 { 400 400 # basebuilder::make_infodatabase(@_); 401 401 # Note: this doesn't work as the direct reference means all the $self … … 403 403 $self->basebuilder::make_infodatabase(@_); 404 404 return; 405 405 } 406 406 407 407 # Carry on with an incremental addition
Note:
See TracChangeset
for help on using the changeset viewer.