Ignore:
Timestamp:
2013-08-16T19:31:19+12:00 (11 years ago)
Author:
ak19
Message:

diffcol modifications for getting the METS tutorial collection to work. The docmets.xml files also used (oai)lastmodified timestamps, so these needed to be ignored as well.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r28076 r28078  
    626626    else
    627627    {
    628         my $ignore_line_re = "<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize)\">.*</Metadata>\\s*\\n*";
     628        # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
     629        my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize)\">.*</(.*?:)?Metadata>\\s*\\n*";
    629630       
    630631        my $strResult;
    631632
    632         # for doc.xml files, need to ignore many date fields. Filter these out before diffing,
     633        # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
    633634        # in case these don't appear in the same order between collections, since
    634635        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
    635636        # when they can show up as unnecessary diff 'errors'
    636         if($strModel =~ m/doc\.xml$/) {
    637         my ($model_contents, $test_contents);
     637
     638        my ($model_contents, $test_contents);
     639
     640        if($strModel =~ m/doc(mets)?\.xml$/) {
     641
    638642        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
    639643        sysread(FIN, $model_contents, -s FIN);
     
    645649        $model_contents =~ s/$ignore_line_re//g;
    646650        $test_contents =~ s/$ignore_line_re//g;
    647 
    648 
     651       
     652
     653        # doc.xml needs to additionally be normalised, before comparing a windows test with a linux model or vice-versa
     654        if($strModel =~ m/doc\.xml$/) {
    649655            # equalise/normalise the two doc.xml files for OS differences, if there are any
    650656            my $testIsWin = &isDocXMLFileWindows($test_contents);
     
    731737#       }
    732738       
     739        } # finished special processing of doc.xml files
     740       
     741        # now can diff the normalised versions of the doc.xml/docmets.xml files:
    733742        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
    734743
Note: See TracChangeset for help on using the changeset viewer.