Ignore:
Timestamp:
2008-08-29T13:10:39+12:00 (16 years ago)
Author:
davidb
Message:

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin will be upgraded to use these to support these.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugouts/BasPlugout.pm

    r16252 r17087  
    3535use printusage;
    3636use parse2;
     37use GDBMUtils;
     38
    3739
    3840# suppress the annoying "subroutine redefined" warning that various
     
    402404    ##############################
    403405    $self->saveas($doc_obj,$doc_dir);
    404    
     406    $self->archiveinf_gdbm($doc_obj,$doc_dir);
     407
    405408}
    406409
     
    630633}
    631634
     635
     636sub archiveinf_gdbm
     637{
     638    my $self = shift (@_);
     639    my ($doc_obj) = @_;
     640
     641    my $verbosity = $self->{'verbosity'};
     642
     643    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
     644    if (defined $collect_dir) {
     645    my $dirsep_regexp = &util::get_os_dirsep();
     646
     647    if ($collect_dir !~ /$dirsep_regexp$/) {
     648        # ensure there is a slash at the end
     649        $collect_dir .= &util::get_dirsep();
     650    }
     651    }
     652
     653    my $oid = $doc_obj->get_OID();
     654    my $source_filename = $doc_obj->get_source_filename();
     655
     656    my $oid_files = { 'src-file' => $source_filename,
     657              'assoc-files' => [] };
     658   
     659    my @reverse_lookups = ($source_filename);
     660
     661    foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
     662    my $real_filename = $assoc_file_rec->[0];
     663    my $full_afile = $assoc_file_rec->[1];
     664
     665    # for some reasons the image associate file has / before the full path
     666    $real_filename =~ s/^\\(.*)/$1/i;
     667    if (-e $real_filename) {
     668
     669        if (defined $collect_dir) {
     670        $real_filename =~ s/^$collect_dir//;
     671        }
     672
     673        push(@reverse_lookups,$real_filename);
     674
     675        push(@{$oid_files->{'assoc-files'}},$full_afile);
     676
     677    }
     678    else {
     679        print STDERR "Warning: archiveinf_gdbm()\n  $real_filename does not appear to be on the file system\n";
     680    }
     681    }
     682
     683    # better not to commit to a particular db implementation, but
     684    # for simplicity, will use GDBM for now.
     685
     686    my $output_dir = $self->{'output_dir'};
     687    my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb";
     688
     689    my $doc_db = &util::filename_cat($output_dir,"archiveinf-doc$db_ext");
     690    my $src_db = &util::filename_cat($output_dir,"archiveinf-src$db_ext");
     691
     692    my $doc_db_text = "";
     693    $doc_db_text .= "<src-file>$oid_files->{'src-file'}\n";
     694    foreach my $af (@{$oid_files->{'assoc-files'}}) {
     695    $doc_db_text .= "<assoc-file>$af\n";
     696    }
     697    chomp($doc_db_text); # remove trailing \n
     698
     699    &GDBMUtils::gdbmDatabaseSet($doc_db,$oid,$doc_db_text);
     700
     701    foreach my $rl (@reverse_lookups) {
     702    &GDBMUtils::gdbmDatabaseAppend($src_db,$rl,"<oid>$oid\n");
     703    }
     704   
     705}
     706
     707
    632708sub set_sortmeta {
    633709    my $self = shift (@_);
Note: See TracChangeset for help on using the changeset viewer.