Changeset 14934
- Timestamp:
- 2007-12-20T21:53:14+13:00 (16 years ago)
- Location:
- gsdl/trunk/perllib
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/basebuilder.pm
r14930 r14934 368 368 $self->{'buildproc'}->zero_reset(); 369 369 370 $self->{'buildproc'}->{'mdprefix_fields'} = {}; 371 370 372 if ($self->{'keepold'}) { 371 373 # create flat classify structure, ready for new docs to be added … … 377 379 378 380 381 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 382 "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'}); 383 379 384 # this has changed to only output collection meta if its 380 385 # not in the config file 381 386 $self->output_collection_meta($handle); 382 &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},383 "", {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});384 387 385 388 # output classification information … … 562 565 } 563 566 564 # default is to output an empty [collection] entry 567 568 569 sub output_collection_meta_start { 570 my $self = shift(@_); 571 my ($handle) = @_; 572 573 print $handle "[collection]\n"; 574 575 } 576 577 sub output_collection_meta_sets { 578 my $self = shift(@_); 579 my ($handle) = @_; 580 581 my $mdprefix_fields = $self->{'buildproc'}->{'mdprefix_fields'}; 582 583 foreach my $prefix (keys %$mdprefix_fields) 584 { 585 print $handle "<metadataset>$prefix\n"; 586 587 foreach my $field (keys %{$mdprefix_fields->{$prefix}}) 588 { 589 my $val = $mdprefix_fields->{$prefix}->{$field}; 590 591 print $handle "<metadatalist-$prefix>$field\n"; 592 print $handle "<metadatafreq-$prefix-$field>$val\n"; 593 } 594 595 } 596 597 } 598 599 sub output_collection_meta_end { 600 my $self = shift(@_); 601 my ($handle) = @_; 602 603 print $handle ('-' x 70) . "\n";; 604 605 } 606 607 608 # default is to output the metadata sets (prefixes) used in collection 609 565 610 sub output_collection_meta { 566 611 my $self = shift(@_); 567 612 my ($handle) = @_; 568 569 print $handle "[collection]\n". ('-' x 70) . "\n";; 613 614 $self->output_collection_meta_start($handle); 615 $self->output_collection_meta_sets($handle); 616 $self->output_collection_meta_end($handle); 570 617 571 618 } -
gsdl/trunk/perllib/basebuildproc.pm
r12844 r14934 297 297 } 298 298 299 300 301 sub infodb_metadata_stats 302 { 303 my $self = shift (@_); 304 my ($field) = @_; 305 306 # Keep some statistics relating to metadata sets used and 307 # frequency of particular metadata fields within each set 308 309 # Union of metadata prefixes and frequency of fields 310 # (both scoped for this document alone, and across whole collection) 311 312 if ($field =~ m/^(.+)\.(.*)$/) { 313 my $prefix = $1; 314 my $core_field = $2; 315 316 $self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++; 317 $self->{'mdprefix_fields'}->{$prefix}->{$core_field}++; 318 } 319 elsif ($field =~ m/^[[:upper:]]/) { 320 # implicit 'ex' metadata set 321 322 $self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++; 323 $self->{'mdprefix_fields'}->{'ex'}->{$field}++; 324 } 325 326 } 327 328 299 329 sub infodb { 300 330 my $self = shift (@_); … … 351 381 my $first = 1; 352 382 my $url = ""; 383 384 $self->{'doc_mdprefix_fields'} = {}; 385 353 386 while (defined $section) { 354 387 # update a few statistics … … 412 445 if (!defined $self->{'dontgdbm'}->{$field}) { 413 446 print $handle "<$field>$value\n"; 447 448 if ($section eq "") 449 { 450 $self->infodb_metadata_stats($field); 451 } 414 452 } 415 453 } 416 454 } 417 455 456 if ($section eq "") 457 { 458 my $doc_mdprefix_fields = $self->{'doc_mdprefix_fields'}; 459 460 foreach my $prefix (keys %$doc_mdprefix_fields) 461 { 462 print $handle "<metadataset>$prefix\n"; 463 464 foreach my $field (keys %{$doc_mdprefix_fields->{$prefix}}) 465 { 466 my $val = $doc_mdprefix_fields->{$prefix}->{$field}; 467 468 print $handle "<metadatalist-$prefix>$field\n"; 469 print $handle "<metadatafreq-$prefix-$field>$val\n"; 470 } 471 472 } 473 } 418 474 419 475 # If doc_obj reconstructed from GDBM file then no need to -
gsdl/trunk/perllib/lucenebuildproc.pm
r14923 r14934 398 398 # /** process() **/ 399 399 400 401 # Following methods seem to be no different to those defined in basebuildproc.pm 402 # From inspection, it looks like these ones can be removed 403 404 400 405 sub get_num_docs { 401 406 my $self = shift (@_); -
gsdl/trunk/perllib/mgppbuilder.pm
r14666 r14934 634 634 635 635 # do the collection info 636 print $handle "[collection]\n"; 637 636 $self->output_collection_meta_start($handle); 637 $self->output_collection_meta_sets($handle); 638 638 639 # first do the collection meta stuff - everything without a dot 639 640 my $collmetadefined = 0; … … 701 702 } 702 703 } 703 print $handle $lang_entry; 704 # end the collection entry 705 print $handle "\n" . ('-' x 70) . "\n"; 706 707 704 print $handle "$lang_entry\n"; 705 706 $self->output_collection_meta_end($handle); 708 707 } 709 708
Note:
See TracChangeset
for help on using the changeset viewer.