Changeset 26536 for main/trunk/greenstone2/perllib/doc.pm
- Timestamp:
- 2012-11-28T11:59:17+13:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/doc.pm
r26221 r26536 204 204 my ($type, $metadata) = @_; 205 205 206 if (defined $type && $type =~ /^(hash|hash_on_file|hash_on_ga_xml| incremental|filename|dirname|assigned)$/) {206 if (defined $type && $type =~ /^(hash|hash_on_file|hash_on_ga_xml|hash_on_full_filename|incremental|filename|dirname|full_filename|assigned)$/) { 207 207 $self->{'OIDtype'} = $type; 208 208 } else { … … 404 404 my $filename = $self->get_source_filename(); 405 405 $OID = &File::Basename::fileparse($filename, qr/\.[^.]*/); 406 $OID = &util::tidy_up_oid($OID); 407 } elsif ($self->{'OIDtype'} eq "full_filename") { 408 my $source_filename = $self->get_source_filename(); 409 my $dirsep = &util::get_os_dirsep(); 410 411 $source_filename =~ s/^import$dirsep//; 412 $source_filename =~ s/$dirsep/-/g; 413 $source_filename =~ s/\./_/g; 414 415 $OID = $source_filename; 406 416 $OID = &util::tidy_up_oid($OID); 407 417 } elsif ($self->{'OIDtype'} eq "dirname") { … … 439 449 if ($use_hash_oid) { 440 450 my $hash_on_file = 1; 451 my $hash_on_ga_xml = 0; 452 441 453 if ($self->{'OIDtype'} eq "hash_on_ga_xml") { 442 454 $hash_on_file = 0; 455 $hash_on_ga_xml = 1; 443 456 } 457 458 if ($self->{'OIDtype'} eq "hash_on_full_filename") { 459 $hash_on_file = 0; 460 $hash_on_ga_xml = 0; 461 462 my $source_filename = $self->get_source_filename(); 463 my $dirsep = &util::get_os_dirsep(); 464 465 $source_filename =~ s/^import$dirsep//; 466 $source_filename =~ s/$dirsep/-/g; 467 $source_filename =~ s/\./_/g; 468 469 # If the filename is very short then (handled naively) 470 # this can cause conjestion in the hash-values 471 # computed, leading documents sharing the same leading 472 # Hex values in the computed has. 473 # 474 # The solution taken here is to replace the name of 475 # the file name a sufficient number of times (up to 476 # the character limit defined in 'rep_limit' and 477 # make that the content that is hashed on 478 479 # *** Think twice before changing the following value 480 # as it will break backward compatability of computed 481 # document HASH values 482 483 my $rep_limit = 256; 484 my $hash_content = undef; 485 486 if (length($source_filename)<$rep_limit) { 487 my $rep_string = "$source_filename|"; 488 my $rs_len = length($rep_string); 489 490 my $clone_times = int(($rep_limit-1)/$rs_len) +1; 491 492 $hash_content = substr($rep_string x $clone_times, 0, $rep_limit); 493 } 494 else { 495 $hash_content = $source_filename; 496 } 497 498 my $filename = &util::get_tmp_filename(); 499 if (!open (OUTFILE, ">:utf8", $filename)) { 500 print STDERR "doc::set_OID could not write to $filename\n"; 501 } else { 502 print OUTFILE $hash_content; 503 close (OUTFILE); 504 } 505 $OID = $self->_calc_OID ($filename); 506 507 print STDERR "****!!! the computed hash for: '", $source_filename, "' is: ", $OID,"\n\n"; 508 509 &util::rm ($filename); 510 } 511 444 512 if ($hash_on_file) { 445 513 # "hash" OID - feed file to hashfile.exe … … 450 518 $OID = $self->_calc_OID ($filename); 451 519 } else { 452 $hash_on_ file = 0;520 $hash_on_ga_xml = 1; # switch to back-up plan, and hash on GA file instead 453 521 } 454 522 } 455 if (!$hash_on_file) { 523 524 if ($hash_on_ga_xml) { 525 # In addition being asked to explicity calculate the has based on the GA file, 526 # can also end up coming into this block is doing 'hash_on_file' but the file 527 # itself is of zero bytes (as could be the case with 'doc.nul' file 528 456 529 my $filename = &util::get_tmp_filename(); 457 530 if (!open (OUTFILE, ">:utf8", $filename)) {
Note:
See TracChangeset
for help on using the changeset viewer.