[21711] | 1 | package gdbdiff;
|
---|
| 2 |
|
---|
| 3 | BEGIN {
|
---|
| 4 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
| 5 | die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
|
---|
| 6 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
| 7 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
|
---|
| 8 | }
|
---|
| 9 |
|
---|
| 10 | use util;
|
---|
| 11 | use diffutil;
|
---|
| 12 | use Text::Diff;
|
---|
| 13 |
|
---|
[27695] | 14 | use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
|
---|
| 15 |
|
---|
[21711] | 16 | sub readin_gdb
|
---|
| 17 | {
|
---|
| 18 | my ($cmd) = @_;
|
---|
| 19 |
|
---|
| 20 | open(PIN,"$cmd|")
|
---|
| 21 | || die "Unable to open pipe to $cmd: $!\n";
|
---|
| 22 |
|
---|
| 23 | my $text_content = "";
|
---|
| 24 |
|
---|
| 25 | while (defined (my $line = <PIN>)) {
|
---|
| 26 | $text_content .= $line;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | close(PIN);
|
---|
| 30 | return $text_content;
|
---|
| 31 | }
|
---|
| 32 |
|
---|
[27695] | 33 | # for debugging. Prints txt contents of db to file
|
---|
| 34 | sub print_string_to_file
|
---|
| 35 | {
|
---|
| 36 | my ($text, $outfile) = @_;
|
---|
| 37 |
|
---|
| 38 | open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n";
|
---|
| 39 | print FOUT $text;
|
---|
| 40 | close(FOUT);
|
---|
| 41 | }
|
---|
[21711] | 42 |
|
---|
| 43 | sub test_gdb
|
---|
| 44 | {
|
---|
[27604] | 45 | my ($full_modeldb, $full_testdb,$strColName) = @_;
|
---|
[21711] | 46 |
|
---|
| 47 | # print "Now is testing database\n";
|
---|
[27695] | 48 | my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$");
|
---|
| 49 |
|
---|
[27604] | 50 | # need to sort text output of both test and model col database files, to normalise them for the comparison
|
---|
| 51 | # the -sort option to db2txt was added specifically to support diffcol
|
---|
| 52 | my $model_cmd = "db2txt -sort $full_modeldb 2>&1";
|
---|
| 53 | my $test_cmd = "db2txt -sort $full_testdb 2>&1";
|
---|
[21711] | 54 |
|
---|
| 55 | my $model_text = readin_gdb($model_cmd);
|
---|
| 56 | my $test_text = readin_gdb($test_cmd);
|
---|
[27604] | 57 |
|
---|
[27695] | 58 | # my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\"
|
---|
| 59 | # print_string_to_file($test_text, $savepath.$dbname."_test.out");
|
---|
| 60 | # print_string_to_file($model_text, $savepath.$dbname."_model.out");
|
---|
[27604] | 61 |
|
---|
| 62 | # filter out the fields that can be ignored in the two database files
|
---|
| 63 | my $ignore_line_re = "\n<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>([^\n])*";
|
---|
| 64 | $model_text =~ s/$ignore_line_re//g;
|
---|
| 65 | $test_text =~ s/$ignore_line_re//g;
|
---|
[27695] | 66 |
|
---|
[27604] | 67 |
|
---|
[27695] | 68 | # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files
|
---|
| 69 | # in test and model collection to an even base for comparison
|
---|
| 70 |
|
---|
| 71 | my $testIsWin = &isDBWindowsSensitive($dbname, $test_text);
|
---|
| 72 | my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text);
|
---|
| 73 |
|
---|
| 74 | if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines:
|
---|
| 75 | # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)
|
---|
[27604] | 76 |
|
---|
[27695] | 77 | # Remember the original model col on SVN could have been built anywhere,
|
---|
| 78 | # and in the gdb files, absolute paths are stored to the collection location.
|
---|
| 79 | # Crop these paths to the collect/<colname> point.
|
---|
| 80 |
|
---|
| 81 | # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
|
---|
| 82 | # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
|
---|
| 83 | # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
|
---|
| 84 | # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm
|
---|
[27604] | 85 |
|
---|
[27695] | 86 | $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
|
---|
| 87 | $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | else { # one of the collections was built on windows
|
---|
| 91 | # handling slashes and other differences between a model coll built on one OS (e.g. linux)
|
---|
| 92 | # and a test collection built and diffed on another OS (windows)
|
---|
| 93 |
|
---|
| 94 | my ($win_text, $lin_text); # references
|
---|
| 95 | if($testIsWin) {
|
---|
| 96 | $win_text = \$test_text;
|
---|
| 97 | $lin_text = \$model_text;
|
---|
| 98 | } else {
|
---|
| 99 | $win_text = \$model_text;
|
---|
| 100 | $lin_text = \$test_text;
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | if($dbname =~ m/archiveinf-doc/) {
|
---|
| 104 | my $tmp = ""; # rebuild windows file's set of lines after processing them one by one
|
---|
| 105 |
|
---|
| 106 | # convert short filenames to long perl:
|
---|
| 107 | # http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html
|
---|
| 108 | for my $line (split /^/, $$win_text) { # split the string into newlines
|
---|
| 109 |
|
---|
| 110 | if($line =~ m@^<assoc-file>(.*)(\s+)@s) {
|
---|
| 111 | $line = $1; # may be a short file name
|
---|
| 112 | # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/
|
---|
| 113 |
|
---|
| 114 | $line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again
|
---|
| 115 | }
|
---|
| 116 | $tmp .= $line;
|
---|
| 117 | }
|
---|
| 118 | $$win_text = $tmp;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | # slashes in windows text need to be turned into linux style slashes
|
---|
| 122 | $$win_text =~ s@\\@/@g;
|
---|
| 123 |
|
---|
| 124 | # cut down absolute paths to files to just collect/colname/.../file, same as before
|
---|
| 125 | $$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
|
---|
| 126 | $$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
|
---|
| 127 |
|
---|
| 128 | # for the windows text, need to further get rid of the driveletter after [ or <meta>
|
---|
| 129 | $$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg;
|
---|
| 130 | # now can go back to using $model_text and $test_text
|
---|
| 131 | # print_string_to_file($$win_text, $savepath.$dbname."_test.out");
|
---|
| 132 | # print_string_to_file($$lin_text, $$savepath.$dbname."_model.out");
|
---|
| 133 |
|
---|
| 134 | } # end of equalising differences between a windows collection's db file and linux coll's db file
|
---|
[27604] | 135 |
|
---|
[27695] | 136 |
|
---|
| 137 | # now can go back to using $model_text and $test_text
|
---|
| 138 | #print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out");
|
---|
| 139 | #print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out");
|
---|
| 140 |
|
---|
| 141 |
|
---|
[21711] | 142 | my $report_type = "OldStyle"; # Can not change this type.
|
---|
| 143 | my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type };
|
---|
| 144 |
|
---|
[27604] | 145 | # leaving the ignore regex as it used to be in the following, in case it helps with single line comparisons
|
---|
[27579] | 146 | $diff_gdb = &diffutil::GenerateOutput($diff_gdb,"^<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>.*");
|
---|
[21711] | 147 |
|
---|
| 148 | if($diff_gdb eq "")
|
---|
| 149 | {
|
---|
| 150 | return "";
|
---|
| 151 | }
|
---|
| 152 | else
|
---|
| 153 | {
|
---|
| 154 | return "Difference Report: Differences found in the Database file: \n$diff_gdb";
|
---|
| 155 | }
|
---|
| 156 | # Call diff?
|
---|
| 157 | }
|
---|
| 158 |
|
---|
[27695] | 159 | # returns true if the contents are windows AND it matters for the diffing on the db that it's windows
|
---|
| 160 | # For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb
|
---|
| 161 | sub isDBWindowsSensitive
|
---|
| 162 | {
|
---|
| 163 | my ($dbtailname, $db_contents) = @_; # db filename without suffix
|
---|
| 164 |
|
---|
| 165 | if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb
|
---|
| 166 | return 0;
|
---|
| 167 | }
|
---|
| 168 | return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something
|
---|
| 169 | # for doc.xml:
|
---|
| 170 | # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
|
---|
| 171 | }
|
---|
| 172 |
|
---|
[21711] | 173 | 1;
|
---|