Ignore:
Timestamp:
2013-06-21T22:54:56+12:00 (11 years ago)
Author:
ak19
Message:

Better diffing on Windows. If either the test or model collection was built on windows AND the other one was built on linux, there is now special handling for doc.xml and archiveinf-doc/src database files in order to normalise them to the linux situation for better results when diffing.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27604 r27695  
    1111use diffutil;
    1212use Text::Diff;
     13
     14use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
    1315
    1416sub readin_gdb
     
    2931}
    3032
     33# for debugging. Prints txt contents of db to file
     34sub print_string_to_file
     35{
     36    my ($text, $outfile) = @_;
     37   
     38    open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n";
     39    print FOUT $text;
     40    close(FOUT);
     41}
    3142
    3243sub test_gdb
     
    3445    my ($full_modeldb, $full_testdb,$strColName) = @_;
    3546
    36 
    3747   # print "Now is testing database\n";
    38    
     48    my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$");
     49   
    3950    # need to sort text output of both test and model col database files, to normalise them for the comparison
    4051    # the -sort option to db2txt was added specifically to support diffcol
     
    4556    my $test_text = readin_gdb($test_cmd);
    4657
     58#   my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\"
     59#   print_string_to_file($test_text, $savepath.$dbname."_test.out");   
     60#   print_string_to_file($model_text, $savepath.$dbname."_model.out");
    4761
    4862    # filter out the fields that can be ignored in the two database files
     
    5064    $model_text =~ s/$ignore_line_re//g;
    5165    $test_text =~ s/$ignore_line_re//g;
     66 
    5267
     68    # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files
     69    # in test and model collection to an even base for comparison
     70   
     71    my $testIsWin = &isDBWindowsSensitive($dbname, $test_text);
     72    my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text);
     73   
     74    if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines:
     75            # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)
    5376
    54     # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)
     77            # Remember the original model col on SVN could have been built anywhere,
     78            # and in the gdb files, absolute paths are stored to the collection location.
     79            # Crop these paths to the collect/<colname> point.
     80           
     81            # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
     82            # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
     83            # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
     84            # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm
    5585
    56     # Remember the original model col on SVN could have been built anywhere,
    57     # and in the gdb files, absolute paths are stored to the collection location.
    58     # Crop these paths to the collect/<colname> point.
    59    
    60     # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
    61     # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
    62     # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
    63     # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm
     86            $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;         
     87            $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;                       
     88    }
     89   
     90    else { # one of the collections was built on windows
     91        # handling slashes and other differences between a model coll built on one OS (e.g. linux)
     92        # and a test collection built and diffed on another OS (windows)
     93   
     94        my ($win_text, $lin_text); # references
     95        if($testIsWin) {
     96            $win_text = \$test_text;
     97            $lin_text = \$model_text;
     98        } else {
     99            $win_text = \$model_text;
     100            $lin_text = \$test_text;
     101        }
     102       
     103        if($dbname =~ m/archiveinf-doc/) {
     104            my $tmp = ""; # rebuild windows file's set of lines after processing them one by one
     105   
     106            # convert short filenames to long perl:
     107            # http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html
     108            for my $line (split /^/, $$win_text) { # split the string into newlines
     109               
     110                if($line =~ m@^<assoc-file>(.*)(\s+)@s) {
     111                    $line = $1; # may be a short file name
     112                    # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/
     113               
     114                    $line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again                   
     115                }
     116                $tmp .= $line;
     117            }
     118            $$win_text = $tmp;         
     119        }
     120       
     121        # slashes in windows text need to be turned into linux style slashes
     122        $$win_text =~ s@\\@/@g;
     123       
     124        # cut down absolute paths to files to just collect/colname/.../file, same as before
     125        $$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
     126        $$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
     127       
     128        # for the windows text, need to further get rid of the driveletter after [ or <meta>
     129        $$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg;
     130        # now can go back to using $model_text and $test_text
     131#   print_string_to_file($$win_text, $savepath.$dbname."_test.out");   
     132#   print_string_to_file($$lin_text, $$savepath.$dbname."_model.out");
     133       
     134    } # end of equalising differences between a windows collection's db file and linux coll's db file
    64135
    65     $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
    66     $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
    67 
    68 
     136   
     137    # now can go back to using $model_text and $test_text
     138    #print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out");
     139    #print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out");
     140       
     141   
    69142    my $report_type = "OldStyle"; # Can not change this type.
    70143    my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type };
     
    84157}
    85158
     159# returns true if the contents are windows AND it matters for the diffing on the db that it's windows
     160# For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb
     161sub isDBWindowsSensitive
     162{
     163    my ($dbtailname, $db_contents) = @_; # db filename without suffix
     164   
     165    if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb
     166        return 0;
     167    }
     168    return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something
     169    # for doc.xml:
     170    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
     171}
     172
    861731;
Note: See TracChangeset for help on using the changeset viewer.