Ignore:
Timestamp:
2013-06-21T22:54:56+12:00 (11 years ago)
Author:
ak19
Message:

Better diffing on Windows. If either the test or model collection was built on windows AND the other one was built on linux, there is now special handling for doc.xml and archiveinf-doc/src database files in order to normalise them to the linux situation for better results when diffing.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27666 r27695  
    537537}
    538538
     539
     540# this function is only called on DocXMLFiles.
     541# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
     542# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
     543sub isDocXMLFileWindows
     544{
     545    my ($file_contents) = @_;
     546   
     547    #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
     548   
     549    # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
     550    # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
     551   
     552    # for doc.xml:
     553    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
     554    if($file_contents =~ m@<Metadata name="gsdlsourcefilename">([^>]*)</Metadata>@m) {
     555        $gsdlsourcefilename = $1;
     556        if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
     557            return 1;
     558        }
     559    }
     560   
     561    return 0;   
     562}
     563
    539564sub TestEach
    540565{
     
    547572        my @aryInModel = &diffutil::files_in_dir($strModel);
    548573        my @aryInTest = &diffutil::files_in_dir($strTest);
     574       
     575        # Files to be skipped because they get generated on one OS but not the other
     576        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
     577        my $skipfiles_re = qr/\.invf\.state\.\d+$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
     578        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
     579        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
     580       
     581        # Now check all remaining files in the folder exist in both model and test collections
    549582        my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
    550583        my @aryCorrectFiles = @{$aryTwoPointers[1]};
     
    557590                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
    558591                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
    559                 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
    560                 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g))
     592                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 
     593                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g)) #$strEachFile =~ m/\.invf\.state\.\d+/
    561594                {
    562595                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel));
     
    578611                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
    579612                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
    580                 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g))
     613                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g))
    581614                {
    582615                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel));
     
    611644        $test_contents =~ s/$ignore_line_re//g;
    612645       
     646            # equalise/normalise the two doc.xml files for OS differences, if there are any
     647            my $testIsWin = &isDocXMLFileWindows($test_contents);
     648            my $modelIsWin = &isDocXMLFileWindows($model_contents);
     649           
     650            if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
     651           
     652                my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
     653           
     654                # make all windows slashes into unix slashes
     655                $$win_contents =~ s@\\@\/@g;
     656                # make windows \r newlines into constant \n newlines       
     657                $$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
     658               
     659                #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n ?
     660                # $$win_contents =~ s@\r@\n@mg;
     661            }
     662       
    613663        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
    614664
Note: See TracChangeset for help on using the changeset viewer.