Changeset 28078

Show
Ignore:
Timestamp:
16.08.2013 19:31:19 (6 years ago)
Author:
ak19
Message:

diffcol modifications for getting the METS tutorial collection to work. The docmets.xml files also used (oai)lastmodified timestamps, so these needed to be ignored as well.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r28076 r28078  
    626626    else 
    627627    { 
    628         my $ignore_line_re = "<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize)\">.*</Metadata>\\s*\\n*"; 
     628        # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata> 
     629        my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize)\">.*</(.*?:)?Metadata>\\s*\\n*"; 
    629630         
    630631        my $strResult; 
    631632 
    632         # for doc.xml files, need to ignore many date fields. Filter these out before diffing, 
     633        # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing, 
    633634        # in case these don't appear in the same order between collections, since 
    634635        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done 
    635636        # when they can show up as unnecessary diff 'errors' 
    636         if($strModel =~ m/doc\.xml$/) { 
    637         my ($model_contents, $test_contents); 
     637 
     638        my ($model_contents, $test_contents); 
     639 
     640        if($strModel =~ m/doc(mets)?\.xml$/) { 
     641 
    638642        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n"; 
    639643        sysread(FIN, $model_contents, -s FIN); 
     
    645649        $model_contents =~ s/$ignore_line_re//g; 
    646650        $test_contents =~ s/$ignore_line_re//g; 
    647  
    648  
     651         
     652 
     653        # doc.xml needs to additionally be normalised, before comparing a windows test with a linux model or vice-versa 
     654        if($strModel =~ m/doc\.xml$/) { 
    649655            # equalise/normalise the two doc.xml files for OS differences, if there are any 
    650656            my $testIsWin = &isDocXMLFileWindows($test_contents); 
     
    731737#       } 
    732738         
     739        } # finished special processing of doc.xml files 
     740         
     741        # now can diff the normalised versions of the doc.xml/docmets.xml files: 
    733742        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; 
    734743