Changeset 27695 for other-projects
- Timestamp:
- 2013-06-21T22:54:56+12:00 (11 years ago)
- Location:
- other-projects/nightly-tasks/diffcol/trunk/diffcol
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl
r27666 r27695 537 537 } 538 538 539 540 # this function is only called on DocXMLFiles. 541 # so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm) 542 # Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field 543 sub isDocXMLFileWindows 544 { 545 my ($file_contents) = @_; 546 547 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. 548 549 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes. 550 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files? 551 552 # for doc.xml: 553 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata> 554 if($file_contents =~ m@<Metadata name="gsdlsourcefilename">([^>]*)</Metadata>@m) { 555 $gsdlsourcefilename = $1; 556 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected. 557 return 1; 558 } 559 } 560 561 return 0; 562 } 563 539 564 sub TestEach 540 565 { … … 547 572 my @aryInModel = &diffutil::files_in_dir($strModel); 548 573 my @aryInTest = &diffutil::files_in_dir($strTest); 574 575 # Files to be skipped because they get generated on one OS but not the other 576 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux 577 my $skipfiles_re = qr/\.invf\.state\.\d+$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html 578 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl 579 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest; 580 581 # Now check all remaining files in the folder exist in both model and test collections 549 582 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel); 550 583 my @aryCorrectFiles = @{$aryTwoPointers[1]}; … … 557 590 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 558 591 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 559 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 560 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh )$/g))592 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 593 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g)) #$strEachFile =~ m/\.invf\.state\.\d+/ 561 594 { 562 595 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); … … 578 611 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 579 612 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 580 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh )$/g))613 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w)$/g)) 581 614 { 582 615 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); … … 611 644 $test_contents =~ s/$ignore_line_re//g; 612 645 646 # equalise/normalise the two doc.xml files for OS differences, if there are any 647 my $testIsWin = &isDocXMLFileWindows($test_contents); 648 my $modelIsWin = &isDocXMLFileWindows($model_contents); 649 650 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant 651 652 my $win_contents = $testIsWin ? \$test_contents : \$model_contents; 653 654 # make all windows slashes into unix slashes 655 $$win_contents =~ s@\\@\/@g; 656 # make windows \r newlines into constant \n newlines 657 $$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file 658 659 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n ? 660 # $$win_contents =~ s@\r@\n@mg; 661 } 662 613 663 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; 614 664 -
other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm
r27604 r27695 11 11 use diffutil; 12 12 use Text::Diff; 13 14 use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames 13 15 14 16 sub readin_gdb … … 29 31 } 30 32 33 # for debugging. Prints txt contents of db to file 34 sub print_string_to_file 35 { 36 my ($text, $outfile) = @_; 37 38 open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n"; 39 print FOUT $text; 40 close(FOUT); 41 } 31 42 32 43 sub test_gdb … … 34 45 my ($full_modeldb, $full_testdb,$strColName) = @_; 35 46 36 37 47 # print "Now is testing database\n"; 38 48 my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$"); 49 39 50 # need to sort text output of both test and model col database files, to normalise them for the comparison 40 51 # the -sort option to db2txt was added specifically to support diffcol … … 45 56 my $test_text = readin_gdb($test_cmd); 46 57 58 # my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\" 59 # print_string_to_file($test_text, $savepath.$dbname."_test.out"); 60 # print_string_to_file($model_text, $savepath.$dbname."_model.out"); 47 61 48 62 # filter out the fields that can be ignored in the two database files … … 50 64 $model_text =~ s/$ignore_line_re//g; 51 65 $test_text =~ s/$ignore_line_re//g; 66 52 67 68 # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files 69 # in test and model collection to an even base for comparison 70 71 my $testIsWin = &isDBWindowsSensitive($dbname, $test_text); 72 my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text); 73 74 if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines: 75 # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files) 53 76 54 # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files) 77 # Remember the original model col on SVN could have been built anywhere, 78 # and in the gdb files, absolute paths are stored to the collection location. 79 # Crop these paths to the collect/<colname> point. 80 81 # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff, 82 # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry 83 # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext] 84 # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 55 85 56 # Remember the original model col on SVN could have been built anywhere, 57 # and in the gdb files, absolute paths are stored to the collection location. 58 # Crop these paths to the collect/<colname> point. 59 60 # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff, 61 # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry 62 # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext] 63 # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 86 $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 87 $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 88 } 89 90 else { # one of the collections was built on windows 91 # handling slashes and other differences between a model coll built on one OS (e.g. linux) 92 # and a test collection built and diffed on another OS (windows) 93 94 my ($win_text, $lin_text); # references 95 if($testIsWin) { 96 $win_text = \$test_text; 97 $lin_text = \$model_text; 98 } else { 99 $win_text = \$model_text; 100 $lin_text = \$test_text; 101 } 102 103 if($dbname =~ m/archiveinf-doc/) { 104 my $tmp = ""; # rebuild windows file's set of lines after processing them one by one 105 106 # convert short filenames to long perl: 107 # http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html 108 for my $line (split /^/, $$win_text) { # split the string into newlines 109 110 if($line =~ m@^<assoc-file>(.*)(\s+)@s) { 111 $line = $1; # may be a short file name 112 # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/ 113 114 $line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again 115 } 116 $tmp .= $line; 117 } 118 $$win_text = $tmp; 119 } 120 121 # slashes in windows text need to be turned into linux style slashes 122 $$win_text =~ s@\\@/@g; 123 124 # cut down absolute paths to files to just collect/colname/.../file, same as before 125 $$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 126 $$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 127 128 # for the windows text, need to further get rid of the driveletter after [ or <meta> 129 $$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg; 130 # now can go back to using $model_text and $test_text 131 # print_string_to_file($$win_text, $savepath.$dbname."_test.out"); 132 # print_string_to_file($$lin_text, $$savepath.$dbname."_model.out"); 133 134 } # end of equalising differences between a windows collection's db file and linux coll's db file 64 135 65 $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 66 $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 67 68 136 137 # now can go back to using $model_text and $test_text 138 #print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out"); 139 #print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out"); 140 141 69 142 my $report_type = "OldStyle"; # Can not change this type. 70 143 my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type }; … … 84 157 } 85 158 159 # returns true if the contents are windows AND it matters for the diffing on the db that it's windows 160 # For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb 161 sub isDBWindowsSensitive 162 { 163 my ($dbtailname, $db_contents) = @_; # db filename without suffix 164 165 if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb 166 return 0; 167 } 168 return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something 169 # for doc.xml: 170 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata> 171 } 172 86 173 1;
Note:
See TracChangeset
for help on using the changeset viewer.