Changeset 20575 for gsdl/trunk/perllib/basebuilder.pm
- Timestamp:
- 2009-09-10T10:46:36+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/basebuilder.pm
r20100 r20575 357 357 358 358 # Get info database file path 359 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir); 359 my $infodb_type = $self->{'infodbtype'}; 360 my $infodb_file_path = &dbutil::get_infodb_file_path($infodb_type, $self->{'collection'}, $textdir); 360 361 361 362 print $outhandle "\n*** creating the info database and processing associated files\n" … … 367 368 368 369 my $reconstructed_docs = undef; 370 my $database_recs = undef; 371 369 372 if ($self->{'keepold'}) { 370 # reconstruct doc_obj metadata from database for all docs 371 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path); 372 } 373 374 # set up the document processor 373 $database_recs = {}; 374 375 &dbutil::read_infodb_file($infodb_type, $infodb_file_path, $database_recs); 376 } 377 378 379 # Important (for memory usage reasons) that we obtain the filehandle 380 # here for writing out to the database, rather than after 381 # $reconstructed_docs has been set up (assuming -keepold is on) 382 # 383 # This is because when we open a pipe to txt2db [using open()] 384 # this triggers a fork() followed by exec(). $reconstructed_docs 385 # can get very large, and so if we did the open() after this, it means 386 # the fork creates a clone of the *large* process image which (admittedly) 387 # is then quickly replaced in the execve() with the much smaller image for 388 # 'txt2db'. The trouble is, in that for a seismic second caused by 389 # the fork(), the system really does need to have all that memory available 390 # even though it isn't ultimately used. The result is an out of memory 391 # error. 392 375 393 my ($infodb_handle); 376 394 if ($self->{'debug'}) { … … 378 396 } 379 397 else { 380 $infodb_handle = &dbutil::open_infodb_write_handle($ self->{'infodbtype'}, $infodb_file_path);398 $infodb_handle = &dbutil::open_infodb_write_handle($infodb_type, $infodb_file_path); 381 399 if (!defined($infodb_handle)) 382 400 { … … 386 404 } 387 405 388 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'}); 406 if ($self->{'keepold'}) { 407 # reconstruct doc_obj metadata from database for all docs 408 $reconstructed_docs 409 = &classify::reconstruct_doc_objs_metadata($infodb_type, 410 $infodb_file_path, 411 $database_recs); 412 } 413 414 # set up the document processor 415 416 $self->{'buildproc'}->set_infodbtype ($infodb_type); 389 417 $self->{'buildproc'}->set_output_handle ($infodb_handle); 390 418 $self->{'buildproc'}->set_mode ('infodb'); … … 420 448 421 449 # output classification information 422 &classify::output_classify_info ($self->{'classifiers'}, $ self->{'infodbtype'}, $infodb_handle,450 &classify::output_classify_info ($self->{'classifiers'}, $infodb_type, $infodb_handle, 423 451 $self->{'remove_empty_classifications'}, 424 452 $self->{'gli'}); … … 434 462 'thistype' => [ "Invisible" ], 435 463 'contains' => [ join(";", @doc_list) ] }; 436 &dbutil::write_infodb_entry($ self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);437 438 &dbutil::close_infodb_write_handle($ self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};464 &dbutil::write_infodb_entry($infodb_type, $infodb_handle, "browselist", $browselist_infodb); 465 466 &dbutil::close_infodb_write_handle($infodb_type, $infodb_handle) if !$self->{'debug'}; 439 467 440 468 print STDERR "</Stage>\n" if $self->{'gli'};
Note:
See TracChangeset
for help on using the changeset viewer.