Changeset 15696
- Timestamp:
- 2008-05-26T13:05:16+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/basebuildproc.pm
r15695 r15696 34 34 use doc; 35 35 use docproc; 36 use strict; no strict 'subs'; 36 37 use util; 37 38 … … 330 331 my $self = shift (@_); 331 332 my ($doc_obj, $filename) = @_; 332 my $handle = $self->{'output_handle'}; 333 333 334 # only output this document if it is a "indexed_doc" or "info_doc" (database only) document 334 335 my $doctype = $doc_obj->get_doc_type(); 335 336 # only output this document if it is a "indexed_doc" or "info_doc" (database only) document337 336 return if ($doctype ne "indexed_doc" && $doctype ne "info_doc"); 338 337 339 338 my $archivedir = ""; 340 341 339 if (defined $filename) 342 340 { 343 341 # doc_obj derived directly from file 344 345 342 my ($dir) = $filename =~ /^(.*?)(?:\/|\\)[^\/\\]*$/; 346 343 $dir = "" unless defined $dir; … … 361 358 } 362 359 363 364 #GRB: moved 1/06/2004 from GRB01062004365 360 #add this document to the browse structure 366 361 push(@{$self->{'doclist'}},$doc_obj->get_OID()) … … 369 364 # classify this document 370 365 &classify::classify_doc ($self->{'classifiers'}, $doc_obj); 371 #GRB: end of moved block372 366 373 367 # this is another document … … 380 374 my $doc_OID = $doc_obj->get_OID(); 381 375 my $first = 1; 382 my $url = "";383 376 384 377 $self->{'doc_mdprefix_fields'} = {}; … … 391 384 $section_OID = $doc_OID . "." . $section; 392 385 } 386 my %section_infodb = (); 393 387 394 388 # update a few statistics 395 389 $self->{'num_bytes'} += $doc_obj->get_text_length ($section); 396 390 $self->{'num_sections'} += 1 unless ($doctype eq "classification"); 397 398 # output the section name399 print $handle "[$section_OID]\n";400 391 401 392 # output the fact that this document is a document (unless doctype … … 403 394 my $dtype = $doc_obj->get_metadata_element ($section, "doctype"); 404 395 if (!defined $dtype || $dtype !~ /\w/) { 405 print $handle "<doctype>doc\n";396 $section_infodb{"doctype"} = "doc"; 406 397 } 407 398 … … 411 402 # explicitly add <hastxt> as this is preserved as metadata when 412 403 # the database file is loaded in 413 414 404 if (defined $filename) 415 405 { 416 406 # doc_obj derived directly from file 417 407 if ($doc_obj->get_text_length($section) > 0) { 418 print $handle "<hastxt>1\n";408 $section_infodb{"hastxt"} = "1"; 419 409 } else { 420 print $handle "<hastxt>0\n";410 $section_infodb{"hastxt"} = "0"; 421 411 } 422 412 } … … 443 433 # special case for URL metadata 444 434 if ($field =~ /^URL$/i) { 445 $url .= "[$value]\n"; 446 $url .= "<section>$section_OID\n"; 447 $url .= '-' x 70 . "\n"; 435 $self->write_infodb_entry($value, { 'section' => $section_OID }); 448 436 } 449 437 450 438 if (!defined $self->{'dontdb'}->{$field}) { 451 print $handle "<$field>$value\n";439 $section_infodb{$field} = $value; 452 440 453 441 if ($section eq "") … … 465 453 foreach my $prefix (keys %$doc_mdprefix_fields) 466 454 { 467 print $handle "<metadataset>$prefix\n";455 $section_infodb{"metadataset"} = $prefix; 468 456 469 457 foreach my $field (keys %{$doc_mdprefix_fields->{$prefix}}) … … 471 459 my $val = $doc_mdprefix_fields->{$prefix}->{$field}; 472 460 473 print $handle "<metadatalist-$prefix>$field\n";474 print $handle "<metadatafreq-$prefix-$field>$val\n";461 $section_infodb{"metadatalist-$prefix"} = $field; 462 $section_infodb{"metadatafreq-$prefix-$field"} = $val; 475 463 } 476 477 464 } 478 465 } … … 481 468 # explicitly add <archivedir> as this is preserved as metadata when 482 469 # the database file is loaded in 483 484 470 if (defined $filename) 485 471 { 486 472 # output archivedir if at top level 487 473 if ($section eq $doc_obj->get_top_section()) { 488 print $handle "<archivedir>$archivedir\n";474 $section_infodb{"archivedir"} = $archivedir; 489 475 } 490 476 } … … 492 478 # output document display type 493 479 if ($first) { 494 print $handle "<thistype>$thistype\n"; 495 } 496 480 $section_infodb{"thistype"} = $thistype; 481 } 497 482 498 483 if ($self->{'db_level'} eq "document") { 499 484 # doc num is num_docs not num_sections 500 485 # output the matching document number 501 print $handle "<docnum>$self->{'num_docs'}\n";502 503 }else {486 $section_infodb{"docnum"} = $self->{'num_docs'}; 487 } 488 else { 504 489 # output a list of children 505 490 my $children = $doc_obj->get_children ($section); 506 491 if (scalar(@$children) > 0) { 507 print $handle "<childtype>$childtype\n"; 508 print $handle "<contains>"; 509 my $firstchild = 1; 510 foreach my $child (@$children) { 511 print $handle ";" unless $firstchild; 512 $firstchild = 0; 513 if ($child =~ /^.*?\.(\d+)$/) { 514 print $handle "\".$1"; 515 } else { 516 print $handle "\".$child"; 492 $section_infodb{"childtype"} = $childtype; 493 my $contains = ""; 494 foreach my $child (@$children) 495 { 496 $contains .= ";" unless ($contains eq ""); 497 if ($child =~ /^.*?\.(\d+)$/) 498 { 499 $contains .= "\".$1"; 500 } 501 else { 502 $contains .= "\".$child"; 517 503 } 518 504 } 519 print $handle "\n"; 520 } 521 #output the matching doc number 522 print $handle "<docnum>$self->{'num_sections'}\n"; 523 505 $section_infodb{"contains"} = $contains; 506 } 507 # output the matching doc number 508 $section_infodb{"docnum"} = $self->{'num_sections'}; 524 509 } 525 510 526 print $handle '-' x 70, "\n"; 527 511 $self->write_infodb_entry($section_OID, \%section_infodb); 528 512 529 513 # output a database entry for the document number 530 514 if ($self->{'db_level'} eq "document") { 531 print $handle "[$self->{'num_docs'}]\n"; 532 print $handle "<section>$doc_OID\n"; 515 $self->write_infodb_entry($self->{'num_docs'}, { 'section' => $doc_OID }); 533 516 } 534 517 else { 535 print $handle "[$self->{'num_sections'}]\n"; 536 print $handle "<section>$section_OID\n"; 537 } 538 print $handle '-' x 70, "\n"; 539 540 # output entry for url 541 if ($url ne "") { 542 print $handle $url; 543 } 518 $self->write_infodb_entry($self->{'num_sections'}, { 'section' => $section_OID }); 519 } 544 520 545 521 $first = 0; … … 547 523 last if ($self->{'db_level'} eq "document"); # if no sections wanted, only add the docs 548 524 } 549 550 #GRB01062004: see code above moved from here 525 } 526 527 528 sub write_infodb_entry 529 { 530 my $self = shift(@_); 531 532 $self->write_infodb_entry_gdbm(@_); 533 } 534 535 536 sub write_infodb_entry_gdbm 537 { 538 my $self = shift(@_); 539 my $infodb_key = shift(@_); 540 my $infodb_map = shift(@_); 541 542 my $handle = $self->{'output_handle'}; 543 544 print $handle "[$infodb_key]\n"; 545 foreach my $infodb_value_key (keys(%$infodb_map)) 546 { 547 print $handle "<$infodb_value_key>" . $infodb_map->{$infodb_value_key} . "\n"; 548 } 549 print $handle '-' x 70, "\n"; 551 550 } 552 551
Note:
See TracChangeset
for help on using the changeset viewer.