Changeset 20575
- Timestamp:
- 2009-09-10T10:46:36+12:00 (15 years ago)
- Location:
- gsdl/trunk/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/basebuilder.pm
r20100 r20575 357 357 358 358 # Get info database file path 359 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->{'collection'}, $textdir); 359 my $infodb_type = $self->{'infodbtype'}; 360 my $infodb_file_path = &dbutil::get_infodb_file_path($infodb_type, $self->{'collection'}, $textdir); 360 361 361 362 print $outhandle "\n*** creating the info database and processing associated files\n" … … 367 368 368 369 my $reconstructed_docs = undef; 370 my $database_recs = undef; 371 369 372 if ($self->{'keepold'}) { 370 # reconstruct doc_obj metadata from database for all docs 371 $reconstructed_docs = &classify::reconstruct_doc_objs_metadata($self->{'infodbtype'}, $infodb_file_path); 372 } 373 374 # set up the document processor 373 $database_recs = {}; 374 375 &dbutil::read_infodb_file($infodb_type, $infodb_file_path, $database_recs); 376 } 377 378 379 # Important (for memory usage reasons) that we obtain the filehandle 380 # here for writing out to the database, rather than after 381 # $reconstructed_docs has been set up (assuming -keepold is on) 382 # 383 # This is because when we open a pipe to txt2db [using open()] 384 # this triggers a fork() followed by exec(). $reconstructed_docs 385 # can get very large, and so if we did the open() after this, it means 386 # the fork creates a clone of the *large* process image which (admittedly) 387 # is then quickly replaced in the execve() with the much smaller image for 388 # 'txt2db'. The trouble is, in that for a seismic second caused by 389 # the fork(), the system really does need to have all that memory available 390 # even though it isn't ultimately used. The result is an out of memory 391 # error. 392 375 393 my ($infodb_handle); 376 394 if ($self->{'debug'}) { … … 378 396 } 379 397 else { 380 $infodb_handle = &dbutil::open_infodb_write_handle($ self->{'infodbtype'}, $infodb_file_path);398 $infodb_handle = &dbutil::open_infodb_write_handle($infodb_type, $infodb_file_path); 381 399 if (!defined($infodb_handle)) 382 400 { … … 386 404 } 387 405 388 $self->{'buildproc'}->set_infodbtype ($self->{'infodbtype'}); 406 if ($self->{'keepold'}) { 407 # reconstruct doc_obj metadata from database for all docs 408 $reconstructed_docs 409 = &classify::reconstruct_doc_objs_metadata($infodb_type, 410 $infodb_file_path, 411 $database_recs); 412 } 413 414 # set up the document processor 415 416 $self->{'buildproc'}->set_infodbtype ($infodb_type); 389 417 $self->{'buildproc'}->set_output_handle ($infodb_handle); 390 418 $self->{'buildproc'}->set_mode ('infodb'); … … 420 448 421 449 # output classification information 422 &classify::output_classify_info ($self->{'classifiers'}, $ self->{'infodbtype'}, $infodb_handle,450 &classify::output_classify_info ($self->{'classifiers'}, $infodb_type, $infodb_handle, 423 451 $self->{'remove_empty_classifications'}, 424 452 $self->{'gli'}); … … 434 462 'thistype' => [ "Invisible" ], 435 463 'contains' => [ join(";", @doc_list) ] }; 436 &dbutil::write_infodb_entry($ self->{'infodbtype'}, $infodb_handle, "browselist", $browselist_infodb);437 438 &dbutil::close_infodb_write_handle($ self->{'infodbtype'}, $infodb_handle) if !$self->{'debug'};464 &dbutil::write_infodb_entry($infodb_type, $infodb_handle, "browselist", $browselist_infodb); 465 466 &dbutil::close_infodb_write_handle($infodb_type, $infodb_handle) if !$self->{'debug'}; 439 467 440 468 print STDERR "</Stage>\n" if $self->{'gli'}; -
gsdl/trunk/perllib/classify.pm
r19772 r20575 234 234 my $infodb_type = shift(@_); 235 235 my $infodb_file_path = shift(@_); 236 237 my %database_recs; 238 &dbutil::read_infodb_file($infodb_type, $infodb_file_path, \%database_recs); 236 my $database_recs = shift(@_); 239 237 240 238 # dig out top level doc sections 241 239 my %top_sections = (); 242 240 my %top_docnums = (); 243 foreach my $key ( keys % database_recs )241 foreach my $key ( keys %$database_recs ) 244 242 { 245 my $md_rec = $database_recs {$key};243 my $md_rec = $database_recs->{$key}; 246 244 my $md_hash = db_rec_to_hash($md_rec); 247 245 … … 266 264 add_section_content ($doc_obj, $top, $doc_db_hash); 267 265 my $children = &get_children($doc_db_hash); 268 recurse_sections($doc_obj, $children, $oid, $top, \%database_recs);266 recurse_sections($doc_obj, $children, $oid, $top, $database_recs); 269 267 270 268 push(@all_docs,$doc_obj);
Note:
See TracChangeset
for help on using the changeset viewer.