Ignore:
Timestamp:
2010-05-05T14:53:53+12:00 (14 years ago)
Author:
davidb
Message:

Manifest file processing upgraded to support OIDs. The code then uses the archiveinf-doc database to look up which files it uses.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/manifest.pm

    r18441 r22037  
    11package manifest;
    22
    3 use XMLParser;
    43use strict;
    54no strict 'refs'; # allow filehandles to be variables and viceversa
    65
     6use XMLParser;
     7use dbutil;
     8
    79our $self;
    810
    911sub new {
    1012    my ($class) = shift (@_);
     13    my ($infodbtype,$archivedir) = @_;
    1114
    1215    $self = {} ;
     
    1518    $self->{'reindex'} = {};
    1619    $self->{'delete'} = {};
     20
     21    my $arcinfo_doc_filename
     22    = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
     23
     24    if (-e $arcinfo_doc_filename) {
     25    # Only store the infodb-doc filename if it exists
     26    # If it doesn't exist then this means the collection has not been
     27    #   built yet (or else the archives folder has been deleted).
     28    #   Either way we have no way to look up which files
     29    #   are associated with an OID.  If we we encounter an OID
     30    #   tag later on, we will use the fact that this field is
     31    #   not defined to issue a warning
     32
     33    $self->{'_arcinfo-doc-filename'} = $arcinfo_doc_filename;
     34    $self->{'_infodbtype'} = $infodbtype;
     35    }
    1736
    1837    return bless $self, $class;
     
    89108    my ($expat, $element) = @_;
    90109
    91     if ($element eq "Filename")
    92     {
    93     $self->{'filename'} = "";
     110    if (($element eq "Filename") || ($element eq "OID"))
     111    {
     112    $self->{'item-val'} = "";
    94113    }
    95114    elsif ($element eq "Manifest") {
     
    99118    if (defined($self->{'file-type'}))
    100119    {
    101         print STDERR "Warning: Malformed XML manifest ($element nested inside " . $self->{'file-type'} . ")\n";
    102     }
    103 
    104     $self->{'file-type'} = $element;
     120        print STDERR "Warning: Malformed XML manifest\n";
     121        print STDERR "         Unrecognized element $element nested inside " . $self->{'file-type'} . ".\n";
     122    }
     123    else {
     124        my $filetype = lc($element);
     125        $self->{'file-type'} = $filetype;
     126        if (!defined $self->{$filetype}) {
     127        print STDERR "Warning: <$element> is not one of the registered tags for manifest format.\n";
     128        }
     129    }
     130
    105131    }
    106132}
     
    114140    if ($element eq "Filename")
    115141    {
    116     $self->{lc($self->{'file-type'})}->{$self->{'filename'}} = 1;
    117     $self->{'filename'} = undef;
     142    my $filetype = $self->{'file-type'};
     143    my $filename  = $self->{'item-val'};
     144
     145    $self->{$filetype}->{$filename} = 1;
     146    $self->{'item-val'} = undef;
     147    }
     148    elsif ($element eq "OID") {
     149    # look up src and assoc filenames used by this doc oid
     150
     151    my $filetype = $self->{'file-type'};
     152    my $oid  = $self->{'item-val'};
     153
     154    if (defined $self->{'_infodbtype'}) {
     155       
     156
     157        my $infodbtype = $self->{'_infodbtype'};
     158        my $arcinfo_doc_filename = $self->{'_arcinfo-doc-filename'};
     159       
     160        my $doc_rec_string = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $oid);
     161
     162        my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
     163       
     164        my $doc_source_file = $doc_rec->{'src-file'}->[0];
     165        my $assoc_files = $doc_rec->{'assoc-file'};
     166        my @all_files = ($doc_source_file,@$assoc_files);
     167       
     168        foreach my $filename (@all_files) {
     169       
     170        if (!&util::filename_is_absolute($filename)) {
     171            $filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$filename);
     172        }
     173
     174        $self->{$filetype}->{$filename} = 1;
     175        }
     176    }
     177    else {
     178        print STDERR "Warning: No archiveinf-doc database in archives directory.\n";
     179        print STDERR "         Unable to look up source files that constitute document $oid.\n";
     180    }
     181
     182    $self->{'item-val'} = undef;
    118183    }
    119184    else
     
    129194    my ($expat) = @_;
    130195
    131     if (defined $self->{'filename'}) {
     196    if (defined $self->{'item-val'}) {
    132197    my $text = $_;
    133198    chomp($text);
     
    136201    $text =~ s/\s+$//; 
    137202   
    138     $self->{'filename'} .= $text if ($text !~ m/^\s*$/);
     203    $self->{'item-val'} .= $text if ($text !~ m/^\s*$/);
    139204    }
    140205}
     
    152217    my ($expat) = @_;
    153218
     219    if (defined $self->{'import'}) {
     220    print STDERR "Warning: <Import> tag is deprecated.\n";
     221    print STDERR "         Processing data as if it were tagged as <Index>\n";
     222    $self->{'index'} = $self->{'import'};
     223    }
     224
    154225}
    155226
Note: See TracChangeset for help on using the changeset viewer.