source: other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm@ 27695

Last change on this file since 27695 was 27695, checked in by ak19, 11 years ago

Better diffing on Windows. If either the test or model collection was built on windows AND the other one was built on linux, there is now special handling for doc.xml and archiveinf-doc/src database files in order to normalise them to the linux situation for better results when diffing.

File size: 6.6 KB
RevLine 
[21711]1package gdbdiff;
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
6 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
8}
9
10use util;
11use diffutil;
12use Text::Diff;
13
[27695]14use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
15
[21711]16sub readin_gdb
17{
18 my ($cmd) = @_;
19
20 open(PIN,"$cmd|")
21 || die "Unable to open pipe to $cmd: $!\n";
22
23 my $text_content = "";
24
25 while (defined (my $line = <PIN>)) {
26 $text_content .= $line;
27 }
28
29 close(PIN);
30 return $text_content;
31}
32
[27695]33# for debugging. Prints txt contents of db to file
34sub print_string_to_file
35{
36 my ($text, $outfile) = @_;
37
38 open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n";
39 print FOUT $text;
40 close(FOUT);
41}
[21711]42
43sub test_gdb
44{
[27604]45 my ($full_modeldb, $full_testdb,$strColName) = @_;
[21711]46
47 # print "Now is testing database\n";
[27695]48 my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$");
49
[27604]50 # need to sort text output of both test and model col database files, to normalise them for the comparison
51 # the -sort option to db2txt was added specifically to support diffcol
52 my $model_cmd = "db2txt -sort $full_modeldb 2>&1";
53 my $test_cmd = "db2txt -sort $full_testdb 2>&1";
[21711]54
55 my $model_text = readin_gdb($model_cmd);
56 my $test_text = readin_gdb($test_cmd);
[27604]57
[27695]58# my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\"
59# print_string_to_file($test_text, $savepath.$dbname."_test.out");
60# print_string_to_file($model_text, $savepath.$dbname."_model.out");
[27604]61
62 # filter out the fields that can be ignored in the two database files
63 my $ignore_line_re = "\n<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>([^\n])*";
64 $model_text =~ s/$ignore_line_re//g;
65 $test_text =~ s/$ignore_line_re//g;
[27695]66
[27604]67
[27695]68 # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files
69 # in test and model collection to an even base for comparison
70
71 my $testIsWin = &isDBWindowsSensitive($dbname, $test_text);
72 my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text);
73
74 if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines:
75 # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)
[27604]76
[27695]77 # Remember the original model col on SVN could have been built anywhere,
78 # and in the gdb files, absolute paths are stored to the collection location.
79 # Crop these paths to the collect/<colname> point.
80
81 # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
82 # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
83 # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
84 # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm
[27604]85
[27695]86 $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
87 $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
88 }
89
90 else { # one of the collections was built on windows
91 # handling slashes and other differences between a model coll built on one OS (e.g. linux)
92 # and a test collection built and diffed on another OS (windows)
93
94 my ($win_text, $lin_text); # references
95 if($testIsWin) {
96 $win_text = \$test_text;
97 $lin_text = \$model_text;
98 } else {
99 $win_text = \$model_text;
100 $lin_text = \$test_text;
101 }
102
103 if($dbname =~ m/archiveinf-doc/) {
104 my $tmp = ""; # rebuild windows file's set of lines after processing them one by one
105
106 # convert short filenames to long perl:
107 # http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html
108 for my $line (split /^/, $$win_text) { # split the string into newlines
109
110 if($line =~ m@^<assoc-file>(.*)(\s+)@s) {
111 $line = $1; # may be a short file name
112 # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/
113
114 $line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again
115 }
116 $tmp .= $line;
117 }
118 $$win_text = $tmp;
119 }
120
121 # slashes in windows text need to be turned into linux style slashes
122 $$win_text =~ s@\\@/@g;
123
124 # cut down absolute paths to files to just collect/colname/.../file, same as before
125 $$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
126 $$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
127
128 # for the windows text, need to further get rid of the driveletter after [ or <meta>
129 $$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg;
130 # now can go back to using $model_text and $test_text
131# print_string_to_file($$win_text, $savepath.$dbname."_test.out");
132# print_string_to_file($$lin_text, $$savepath.$dbname."_model.out");
133
134 } # end of equalising differences between a windows collection's db file and linux coll's db file
[27604]135
[27695]136
137 # now can go back to using $model_text and $test_text
138 #print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out");
139 #print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out");
140
141
[21711]142 my $report_type = "OldStyle"; # Can not change this type.
143 my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type };
144
[27604]145 # leaving the ignore regex as it used to be in the following, in case it helps with single line comparisons
[27579]146 $diff_gdb = &diffutil::GenerateOutput($diff_gdb,"^<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>.*");
[21711]147
148 if($diff_gdb eq "")
149 {
150 return "";
151 }
152 else
153 {
154 return "Difference Report: Differences found in the Database file: \n$diff_gdb";
155 }
156 # Call diff?
157}
158
[27695]159# returns true if the contents are windows AND it matters for the diffing on the db that it's windows
160# For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb
161sub isDBWindowsSensitive
162{
163 my ($dbtailname, $db_contents) = @_; # db filename without suffix
164
165 if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb
166 return 0;
167 }
168 return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something
169 # for doc.xml:
170 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
171}
172
[21711]1731;
Note: See TracBrowser for help on using the repository browser.