Changeset 27695

Show
Ignore:
Timestamp:
21.06.2013 22:54:56 (6 years ago)
Author:
ak19
Message:

Better diffing on Windows. If either the test or model collection was built on windows AND the other one was built on linux, there is now special handling for doc.xml and archiveinf-doc/src database files in order to normalise them to the linux situation for better results when diffing.

Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27666 r27695  
    537537} 
    538538 
     539 
     540# this function is only called on DocXMLFiles.  
     541# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm) 
     542# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field 
     543sub isDocXMLFileWindows 
     544{ 
     545    my ($file_contents) = @_; 
     546     
     547    #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.  
     548     
     549    # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes. 
     550    # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files? 
     551     
     552    # for doc.xml: 
     553    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata> 
     554    if($file_contents =~ m@<Metadata name="gsdlsourcefilename">([^>]*)</Metadata>@m) { 
     555        $gsdlsourcefilename = $1; 
     556        if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected. 
     557            return 1; 
     558        } 
     559    } 
     560     
     561    return 0;    
     562} 
     563 
    539564sub TestEach 
    540565{ 
     
    547572        my @aryInModel = &diffutil::files_in_dir($strModel); 
    548573        my @aryInTest = &diffutil::files_in_dir($strTest); 
     574         
     575        # Files to be skipped because they get generated on one OS but not the other 
     576        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux 
     577        my $skipfiles_re = qr/\.invf\.state\.\d+$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html 
     578        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl 
     579        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest; 
     580         
     581        # Now check all remaining files in the folder exist in both model and test collections 
    549582        my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel); 
    550583        my @aryCorrectFiles = @{$aryTwoPointers[1]}; 
     
    557590                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 
    558591                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 
    559                 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 
    560                 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g)) 
     592                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file  
     593                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g)) #$strEachFile =~ m/\.invf\.state\.\d+/ 
    561594                { 
    562595                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); 
     
    578611                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 
    579612                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 
    580                 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g)) 
     613                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g)) 
    581614                { 
    582615                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); 
     
    611644        $test_contents =~ s/$ignore_line_re//g; 
    612645         
     646            # equalise/normalise the two doc.xml files for OS differences, if there are any 
     647            my $testIsWin = &isDocXMLFileWindows($test_contents); 
     648            my $modelIsWin = &isDocXMLFileWindows($model_contents); 
     649             
     650            if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant 
     651             
     652                my $win_contents = $testIsWin ? \$test_contents : \$model_contents; 
     653             
     654                # make all windows slashes into unix slashes 
     655                $$win_contents =~ s@\\@\/@g; 
     656                # make windows \r newlines into constant \n newlines         
     657                $$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file 
     658                 
     659                #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n ? 
     660                # $$win_contents =~ s@\r@\n@mg; 
     661            } 
     662         
    613663        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; 
    614664 
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27604 r27695  
    1111use diffutil; 
    1212use Text::Diff; 
     13 
     14use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames 
    1315 
    1416sub readin_gdb 
     
    2931} 
    3032 
     33# for debugging. Prints txt contents of db to file 
     34sub print_string_to_file  
     35{ 
     36    my ($text, $outfile) = @_; 
     37     
     38    open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n"; 
     39    print FOUT $text; 
     40    close(FOUT); 
     41} 
    3142 
    3243sub test_gdb 
     
    3445    my ($full_modeldb, $full_testdb,$strColName) = @_; 
    3546 
    36  
    3747   # print "Now is testing database\n"; 
    38      
     48    my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$"); 
     49    
    3950    # need to sort text output of both test and model col database files, to normalise them for the comparison 
    4051    # the -sort option to db2txt was added specifically to support diffcol 
     
    4556    my $test_text = readin_gdb($test_cmd); 
    4657 
     58#   my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\" 
     59#   print_string_to_file($test_text, $savepath.$dbname."_test.out");     
     60#   print_string_to_file($model_text, $savepath.$dbname."_model.out"); 
    4761 
    4862    # filter out the fields that can be ignored in the two database files 
     
    5064    $model_text =~ s/$ignore_line_re//g; 
    5165    $test_text =~ s/$ignore_line_re//g; 
     66   
    5267 
     68    # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files 
     69    # in test and model collection to an even base for comparison 
     70     
     71    my $testIsWin = &isDBWindowsSensitive($dbname, $test_text); 
     72    my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text); 
     73     
     74    if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines: 
     75            # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files) 
    5376 
    54     # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files) 
     77            # Remember the original model col on SVN could have been built anywhere,  
     78            # and in the gdb files, absolute paths are stored to the collection location.  
     79            # Crop these paths to the collect/<colname> point. 
     80             
     81            # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff, 
     82            # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry 
     83            # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext] 
     84            # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 
    5585 
    56     # Remember the original model col on SVN could have been built anywhere,  
    57     # and in the gdb files, absolute paths are stored to the collection location.  
    58     # Crop these paths to the collect/<colname> point. 
    59      
    60     # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff, 
    61     # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry 
    62     # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext] 
    63     # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 
     86            $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;           
     87            $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;                        
     88    } 
     89     
     90    else { # one of the collections was built on windows 
     91        # handling slashes and other differences between a model coll built on one OS (e.g. linux)  
     92        # and a test collection built and diffed on another OS (windows) 
     93     
     94        my ($win_text, $lin_text); # references 
     95        if($testIsWin) { 
     96            $win_text = \$test_text; 
     97            $lin_text = \$model_text; 
     98        } else { 
     99            $win_text = \$model_text; 
     100            $lin_text = \$test_text; 
     101        } 
     102         
     103        if($dbname =~ m/archiveinf-doc/) { 
     104            my $tmp = ""; # rebuild windows file's set of lines after processing them one by one 
     105     
     106            # convert short filenames to long perl:  
     107            # http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html 
     108            for my $line (split /^/, $$win_text) { # split the string into newlines 
     109                 
     110                if($line =~ m@^<assoc-file>(.*)(\s+)@s) { 
     111                    $line = $1; # may be a short file name 
     112                    # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/ 
     113                 
     114                    $line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again                     
     115                } 
     116                $tmp .= $line; 
     117            } 
     118            $$win_text = $tmp;           
     119        } 
     120         
     121        # slashes in windows text need to be turned into linux style slashes 
     122        $$win_text =~ s@\\@/@g; 
     123         
     124        # cut down absolute paths to files to just collect/colname/.../file, same as before 
     125        $$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
     126        $$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
     127         
     128        # for the windows text, need to further get rid of the driveletter after [ or <meta> 
     129        $$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg;  
     130        # now can go back to using $model_text and $test_text 
     131#   print_string_to_file($$win_text, $savepath.$dbname."_test.out");     
     132#   print_string_to_file($$lin_text, $$savepath.$dbname."_model.out"); 
     133         
     134    } # end of equalising differences between a windows collection's db file and linux coll's db file 
    64135 
    65     $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
    66     $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
    67  
    68  
     136     
     137    # now can go back to using $model_text and $test_text 
     138    #print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out");  
     139    #print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out"); 
     140         
     141     
    69142    my $report_type = "OldStyle"; # Can not change this type. 
    70143    my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type }; 
     
    84157} 
    85158 
     159# returns true if the contents are windows AND it matters for the diffing on the db that it's windows 
     160# For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb 
     161sub isDBWindowsSensitive  
     162{ 
     163    my ($dbtailname, $db_contents) = @_; # db filename without suffix 
     164     
     165    if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb 
     166        return 0; 
     167    } 
     168    return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something 
     169    # for doc.xml: 
     170    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata> 
     171} 
     172 
    861731;