source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 27743

Last change on this file since 27743 was 27743, checked in by ak19, 11 years ago

Basic Word-PDF collection now has the same number of diffing errors on Windows upon diffcol as on Linux and Mac. Needed to do a lot of special processing for windows: to remove carriage returns introduced into doc.xml when doing a multiread on the html version of a pdf doc after it has been converted to html. (And similarly, needed to get rid of windows carriage returns introduced into ex.Title meta for pdf01.pdf converted to HTML. This was handled in HTMLPlugin). Further special tags need either to be ignored, if they're time stamps, or specially handled if they're filepaths. Not sure if it's the encoding setting in multiread or maybe the locale that is introducing the carriage returns, but am dealing with this at the point of diffcol since it's not a 'problem' in Greenstone, just an inconsistency across OS-es. There's still one diffcol error remaining for this collection on all 3 OS: one word document has a different word wrap length on the machine where the model col was built compared to the wrap length on the other machines. This may be a setting to wvware or else libreoffice/staroffice, if these are used.

File size: 28.6 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47
48#--Global Variables Declaration-----------
49$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
50$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
51
52$gv_blnErrorStop = "false";
53$gv_blnErrorShow = "false";
54$gv_intVerbosity = 0;
55$gv_strMode = "Full";
56$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
57
58%gv_IndivList = ("archives" => 0,
59 "etc" => 0,
60 "images" => 0,
61 "building" => 0,
62 "import" => 0,
63 "index" => 0,
64 "log" => 0,
65 "metadata" => 0,
66 "perllib" => 0,
67 "temp" => 0);
68#----##
69
70#--System Setup---------------------------
71sub SetMode
72{
73 my ($strModeList) = @_;
74 $strModeList =~ s/\|/ /g;
75 my @Modes = split(" ",$strModeList);
76
77 my $blnIndividual = "true";
78 my $blnInitial = "false";
79 my $blnFull = "false";
80
81
82 foreach $strEachMode (@Modes)
83 {
84 if($strEachMode eq "all")
85 {
86 $blnFull = "true";
87 $blnIndividual = "false";
88 }
89 elsif($strEachMode eq "init")
90 {
91 $blnInitial = "true";
92 $blnIndividual = "false";
93 }
94 else
95 {
96 if(defined $gv_IndivList{$strEachMode})
97 {
98 $gv_IndivList{$strEachMode} = 1;
99 }
100 else
101 {
102 die Help("Error: used undefined mode");
103 }
104 }
105 }
106
107 if($blnFull eq "true") {return "Full";}
108 elsif($blnInitial eq "true") {return "Initial";}
109 elsif($blnIndividual eq "true") {return "Individual";}
110 else {die "Error occured in function SetMode!!\n";}
111}
112#----##
113
114#--System Process-------------------------
115sub IndivTest
116{
117 my ($strModelCol,$strTestCol,$strColName) = @_;
118 my @Errors = ();
119 my $intNumberOfErrors = 0;
120 foreach $strEachFolder (keys %gv_IndivList)
121 {
122 if($gv_IndivList{$strEachFolder} == 1)
123 {
124
125 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
126 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
127 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
128 $intNumberOfErrors = scalar(@Errors);
129 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
130 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
131 VobPrint("End Comparing \"$strEachFolder\"\n",0);
132 VobPrint("Difference Found: $intNumberOfErrors\n",0);
133 VobPrint ("\n",0);
134 }
135 }
136 return @Errors;
137}
138
139sub InitTest
140{
141
142 my ($strModelCol,$strTestCol,$strColName) = @_;
143 my $intLevel = 1;
144 my @Errors;
145
146 # Testing Log files
147# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
148# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
149#
150# if(-e $strModelLog && -e $strTestLog)
151# {
152# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
153# if($strLogError ne "")
154# {
155# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
156# VobPrint ("$strLogError\n",$intLevel);
157#
158# $strLogError = "$strLogError";
159# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
160# push(@Errors,$strLogError);
161# }
162# else
163# {
164# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
165# }
166# }
167# else
168# {
169# my $strErrorColName;
170# my $strLogError;
171#
172# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
173# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
174#
175# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
176# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
177# VobPrint ("$strLogError\n",$intLevel);
178# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
179#
180# push(@Errors,$strLogError);
181# }
182# VobPrint ("\n",$intLevel);
183
184 # Testing the build.cfg
185 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
186 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
187
188 if(-e $strModelBcfg && -e $strTestBcfg)
189 {
190 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
191 if($strBcfgError ne "")
192 {
193 if( $strOutputFormat eq "xml" ) {
194 print "<build-cfg succeeded=\"no\">\n<message>";
195 } else {
196 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
197 }
198
199 VobPrint ("$strBcfgError",$intLevel);
200 if( $strOutputFormat eq "xml" ) {
201 print "</message></build-cfg>";
202 }
203
204 $strBcfgError = "$strBcfgError";
205 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
206 push(@Errors,$strBcfgError);
207 }
208 else
209 {
210 if( $strOutputFormat eq "xml" ) {
211 print "<build-cfg succeeded=\"yes\"/>";
212 } else {
213 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
214 }
215 }
216 }
217 else
218 {
219 my $strErrorColName;
220 my $strBcfgError;
221
222 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
223 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
224
225 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
226 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
227 VobPrint ("$strBcfgError\n",$intLevel);
228 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
229
230 push(@Errors,$strBcfgError);
231 }
232 VobPrint ("\n",$intLevel);
233
234 # Testing the collect.cfg
235
236 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
237 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
238
239 if(-e $strModelCcfg && -e $strTestCcfg)
240 {
241 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
242 if($strCcfgError ne "")
243 {
244 if( $strOutputFormat eq "xml" ) {
245 print "<collect-cfg succeeded=\"no\"><message>";
246 } else {
247 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
248 }
249
250 VobPrint ("$strCcfgError",$intLevel);
251
252 if( $strOutputFormat eq "xml" ) {
253 print "</message></collect-cfg>";
254 }
255
256 $strCcfgError = "$strCcfgError";
257 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
258 push(@Errors,$strCcfgError);
259 }
260 else
261 {
262 if( $strOutputFormat eq "xml" ) {
263 print "<collect-cfg succeeded=\"yes\"/>";
264 } else {
265 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
266 }
267 }
268 }
269 else
270 {
271 my $strErrorColName;
272 my $strCcfgError;
273
274 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
275 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
276
277 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
278 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
279 VobPrint ("$strCcfgError\n",$intLevel);
280 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
281
282 push(@Errors,$strCcfgError);
283 }
284
285 VobPrint ("\n",$intLevel);
286
287 # Testing databases
288
289 # index
290 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
291 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
292 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); # returns 0 if no error
293 if($strGdbError) {
294 push(@Errors,$strGdbError);
295 }
296
297 # archives
298 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
299 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
300 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
301 if($strGdbError) {
302 push(@Errors,$strGdbError);
303 }
304
305 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
306 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
307 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
308 if($strGdbError) {
309 push(@Errors,$strGdbError);
310 }
311
312 VobPrint ("\n",$intLevel);
313
314 return @Errors;
315}
316
317
318# At present handles gdbm - need to expand to allow for jdbm and other db types
319sub GdbDiff
320{
321 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName) = @_;
322
323 my $strGdbError = 0;
324
325 if(-e $strModelGdb && -e $strTestGdb)
326 {
327 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
328 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb,$strColName);
329 if($strGdbError ne "")
330 {
331 if( $strOutputFormat eq "xml" ) {
332 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
333 } else {
334 AlignPrint("Database Comparsion Result","Failed",$intLevel);
335 }
336 VobPrint ("$strGdbError\n",$intLevel);
337
338 if( $strOutputFormat eq "xml" ) {
339 print "</message></database>";
340 }
341
342 $strGdbError = "$strGdbError";
343 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
344 #push(@Errors,$strGdbError);
345 }
346 else
347 {
348 if( $strOutputFormat eq "xml" ) {
349 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
350 } else {
351 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
352 }
353 }
354 }
355 else
356 {
357 my $strErrorColName;
358
359 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
360 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
361
362 AlignPrint("Database Comparsion Result","Failed",$intLevel);
363
364 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
365 VobPrint ("$strGdbError\n",$intLevel);
366
367 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
368
369 }
370
371 return $strGdbError;
372}
373
374sub FullTest
375{
376 my ($strModelCol,$strTestCol,$strColName) = @_;
377 my @Errors = ();
378 my $intLevel = 0;
379 my $intNumberDiffs = 0;
380
381 # <Initial Test>
382 if( $strOutputFormat eq "xml" ) {
383 #print "<initial-test>";
384 } else {
385 VobPrint("Initial Testing Start\n",$intLevel);
386 }
387
388 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
389 $intNumberDiffs = scalar(@Errors);
390
391 if( $strOutputFormat eq "xml" ) {
392 #print "</initial-test>";
393 } else {
394 VobPrint("Initial Testing End\n",$intLevel);
395 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
396 VobPrint("\n",$intLevel);
397 }
398 # </Initial Test>
399
400 # <Detailed Test>
401 if( $strOutputFormat eq "xml" ) {
402 #print "<detailed-test>";
403 } else {
404 VobPrint("Detail Testing Start\n",$intLevel);
405 }
406 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
407 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
408
409 if( $strOutputFormat eq "xml" ) {
410 #print "</detailed-test>";
411 } else {
412 VobPrint("Detail Testing End\n",$intLevel);
413 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
414 }
415 # </Detailed Test>
416
417 return @Errors;
418}
419#----##
420
421
422#--Other System Utilities
423sub PrintUsage
424{
425 my ($strProgName) = @_;
426 if ( $strOutputFormat eq "xml" ) {
427 print "<error>usage incorrect</error>\n";
428 } else {
429 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
430 }
431 Help("Error: used incorrect parameters");
432}
433
434sub Help
435{
436 my ($strError) = @_;
437 my $aryptHelps =
438 [ { 'name' => "verbosity",
439 'type' => "scale",
440 'argu' => "a integer" ,
441 'descrip' => "this parameter setup the verbosity of the testing result"},
442 { 'name' => "mode",
443 'type' => "option",
444 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
445 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
446 { 'name' => "estop",
447 'type' => "flag",
448 'argu' => "NULL" ,
449 'descrip' => "Set then system will stop once it meet an error"},
450 { 'name' => "eshow",
451 'type' => "flag",
452 'argu' => "NULL" ,
453 'descrip' => "Set then system will show the error summary"}
454 ];
455
456
457 if ( $strOutputFormat ne "xml" ) {
458 print "$strError\n";
459
460 foreach my $hashOneArg (@{$aryptHelps})
461 {
462 print "\n----------------------------\n";
463 print "Parameters: -".$hashOneArg->{"name"}."\n";
464 print "Type: ".$hashOneArg->{"type"}."\n";
465 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
466 print "Description: ".$hashOneArg->{"descrip"}."\n";
467 print "----------------------------\n";
468 }
469 }
470}
471
472sub OutputStart
473{
474 my ($strColName) = @_;
475 my $intPadding = 17 - length($strColName);
476
477 if ( $strOutputFormat eq "xml" ) {
478 print "<diffcol>\n";
479 } else {
480 print "+---------------------------------------------------------+\n";
481 print "| |\n";
482 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
483 print "| |\n";
484 print "+---------------------------------------------------------+\n\n";
485 }
486}
487
488sub OutputEnd
489{
490 my ($strColName,$aryptErrors) = @_;
491 my $intPadding = 12 - length($strColName);
492 if ( $strOutputFormat eq "xml" ) {
493 print "</diffcol>\n";
494 } else {
495 print "\n";
496 print "+---------------------------------------------------------+\n";
497 print "| |\n";
498 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
499 print "| |\n";
500 print "+---------------------------------------------------------+\n\n";
501 }
502
503 my $intTotalErrors = scalar(@{$aryptErrors});
504 if ( $strOutputFormat ne "xml" ) {
505 print "Checking completed, there is $intTotalErrors error(s) found.\n";
506 }
507
508 if($gv_blnErrorShow ne "off")
509 {
510 foreach my $strEachError (@{$aryptErrors})
511 {
512 if ( $strOutputFormat eq "xml" ) {
513 print "<error>";
514 print $strEachError;
515 print "</error>\n";
516 } else {
517 print "+---------------------------------------------------------+\n";
518 print "| Error |\n";
519 print "+---------------------------------------------------------+\n\n";
520 print "$strEachError\n\n";
521 }
522 }
523 }
524 else
525 {
526 if ( $strOutputFormat ne "xml" ) {
527 print "Use -eshow to show the error detail\n\n";
528 }
529 }
530}
531
532sub AlignPrint
533{
534 my ($strMainString,$strStatus,$intLevel) = @_;
535 my $intDot = 100 - length($strMainString) - length($strStatus);
536 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
537}
538
539
540# this function is only called on DocXMLFiles.
541# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
542# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
543sub isDocXMLFileWindows
544{
545 my ($file_contents) = @_;
546
547 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
548
549 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
550 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
551
552 # for doc.xml:
553 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
554 if($file_contents =~ m@<Metadata name="gsdlsourcefilename">([^>]*)</Metadata>@m) {
555 $gsdlsourcefilename = $1;
556 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
557 return 1;
558 }
559 }
560
561 return 0;
562}
563
564sub TestEach
565{
566 my ($strModel,$strTest,$intLevel,$strColName) = @_;
567 my @Errors = ();
568
569 $intLevel++;
570 if (-d $strModel && -d $strTest)
571 {
572 my @aryInModel = &diffutil::files_in_dir($strModel);
573 my @aryInTest = &diffutil::files_in_dir($strTest);
574
575 # Files to be skipped because they get generated on one OS but not the other
576 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
577 my $skipfiles_re = qr/\.invf\.state\.\d+$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
578 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
579 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
580
581 # Now check all remaining files in the folder exist in both model and test collections
582 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
583 my @aryCorrectFiles = @{$aryTwoPointers[1]};
584 @Errors = @{$aryTwoPointers[0]};
585
586 if(scalar(@Errors) == 0)
587 {
588 foreach my $strEachFile (@aryInModel)
589 {
590 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
591 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
592 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
593 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file
594 {
595 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
596 }
597 else
598 {
599 if ( $strOutputFormat eq "xml" ) {
600 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
601 } else {
602 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
603 }
604 }
605 }
606 }
607 else
608 {
609 foreach my $strEachFile (@aryCorrectFiles)
610 {
611 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
612 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
613 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g))
614 {
615 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
616 }
617 }
618 }
619 if($intLevel == $gv_intVerbosity)
620 {
621 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
622 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
623 }
624 }
625 else
626 {
627 my $ignore_line_re = "<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize)\">.*</Metadata>\\s*\\n*";
628
629 my $strResult;
630
631 # for doc.xml files, need to ignore many date fields. Filter these out before diffing,
632 # in case these don't appear in the same order between collections, since
633 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
634 # when they can show up as unnecessary diff 'errors'
635 if($strModel =~ m/doc\.xml$/) {
636 my ($model_contents, $test_contents);
637 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
638 sysread(FIN, $model_contents, -s FIN);
639 close(FIN);
640 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
641 sysread(FIN, $test_contents, -s FIN);
642 close(FIN);
643
644 $model_contents =~ s/$ignore_line_re//g;
645 $test_contents =~ s/$ignore_line_re//g;
646
647
648 # equalise/normalise the two doc.xml files for OS differences, if there are any
649 my $testIsWin = &isDocXMLFileWindows($test_contents);
650 my $modelIsWin = &isDocXMLFileWindows($model_contents);
651
652 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
653
654 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
655
656 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
657 $$win_contents =~ s@[\r]@@g;
658
659 # make all single windows slashes into single unix slashes
660 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
661 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
662 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
663
664 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
665 # $$win_contents =~ s@\r@\n@mg;
666 }
667
668
669
670 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
671 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
672 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
673
674 # remove all absolute paths upto collect folder from <Metadata /> elements
675 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
676 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
677
678# my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
679# &gdbdiff::print_string_to_file($model_contents, $savepath."model_doc.xml");
680# &gdbdiff::print_string_to_file($test_contents, $savepath."test_doc.xml");
681
682 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
683
684 } else {
685 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
686 }
687
688 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
689 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
690 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
691
692 #$strResult = GeneralOutput($strResult);
693 if ( $strOutputFormat eq "xml" ) {
694 #
695 } else {
696 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
697 }
698 if ($strResult eq "")
699 {
700 if ( $strOutputFormat eq "xml" ) {
701 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
702 } else {
703 AlignPrint("Comparing File","Succeed",$intLevel);
704 }
705 }
706 else
707 {
708# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
709
710 my $strOutput = "Difference Report:\n$strResult\n";
711 if ( $strOutputFormat eq "xml" ) {
712 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
713 } else {
714 AlignPrint("Comparing File","Failed",$intLevel);
715 }
716
717 #$result=`file -b $strModel`; # linux specific test for binary file
718 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
719 if ( "$result" =~ "data" ) {
720 VobPrint( "These binary files differ", $intLevel );
721 } else {
722 VobPrint ( "$strOutput" , $intLevel);
723 }
724
725
726 if ( $strOutputFormat eq "xml" ) {
727 print "</message></file-comparison>";
728 }
729
730 if($gv_blnErrorStop ne "off") { exit; }
731 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
732 }
733 }
734
735 return @Errors;
736}
737
738
739sub FolderTesting
740{
741 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
742 my %hashCount = ();
743 my @Errors = ();
744 my @CorrectFiles = ();
745 my @TwoPointers = (\@Errors,\@CorrectFiles);
746
747 if ( $strOutputFormat eq "xml" ) {
748 #print "<folder-comparison location=\"$strModelFolder\">\n";
749 } else {
750 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
751 }
752
753 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
754 foreach my $strEachItem (@$aryptTest)
755 {
756 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
757 else {$hashCount{$strEachItem} = 'T';}
758 }
759
760 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
761 {
762 if ( $strOutputFormat eq "xml" ) {
763 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
764 } else {
765 AlignPrint("Folder Comparsion","Succeed",$intLevel);
766 }
767 return @TwoPointers;
768 }
769 else
770 {
771 if ( $strOutputFormat eq "xml" ) {
772 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
773 } else {
774 AlignPrint("Folder Comparsion","Failed",$intLevel);
775 }
776
777 foreach my $strEachItem (keys %hashCount)
778 {
779 if($hashCount{$strEachItem} ne 'B')
780 {
781 my $strOutput = "";
782 my $strReport = "";
783
784 if($hashCount{$strEachItem} eq 'M')
785 {
786 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
787 $strReport = "Difference Report: difference found at $strTestFolder";
788 }
789 elsif($hashCount{$strEachItem} eq 'T')
790 {
791 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
792 $strReport = "Difference Report: difference found at $strModelFolder";
793 }
794 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
795
796 VobPrint ("$strOutput\n",$intLevel);
797 $strOutput = $strOutput."\n\t".$strReport."\n";
798 push(@Errors,$strOutput);
799 }
800 else {push(@CorrectFiles,$strEachItem);}
801 }
802 if( $strOutputFormat eq "xml" ) {
803 print "</message></folder-comparison>";
804 }
805
806 return @TwoPointers;
807 }
808}
809
810sub VobPrint
811{
812 my ($strOutput, $intLevel) = @_;
813 my $strTab = "";
814 my $intTab = int($intLevel/2);
815 if($intLevel <= $gv_intVerbosity)
816 {
817 if($intLevel >= 1)
818 {
819 $strTab = "\t"x($intTab+1);
820 $strOutput =~ s/\n$//;
821 $strOutput =~ s/\n/\n$strTab/g;
822 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
823 }
824
825 if( $strOutputFormat eq "xml" ) {
826 $strOutput =~ s/&/&amp;/g;
827 $strOutput =~ s/</&lt;/g;
828 $strOutput =~ s/>/&gt;/g;
829 }
830
831 if ( length( $strOutput ) > 1000 ) {
832 $strOutput = substr( $strOutput, 0, 978) . "... (output truncated)";
833 }
834
835
836 print $strTab.$strOutput."\n";
837 }
838}
839#----##
840
841
842#--Main System----------------------------
843#-----------------------------------------
844# Name: main
845# Perameters: arguments from command line
846# Pre-condition: testing will start by calling this main function.
847# Post-condition: output the test results for one or more collections.
848#-----------------------------------------
849sub main
850{
851 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode);
852 my $strProgName = $0;
853 my $intArgc = scalar(@ARGV);
854
855 #--System Arguments Setup
856 if (!parsargv::parse(\@ARGV,
857 'estop//off', \$strErrorStop,
858 'eshow//off', \$strErrorShow,
859 'verbosity/\d+/1', \$intVerbosity,
860 'mode/[\w\-]+/all', \$strMode,
861 'output/[\w\-]+/text', \$strOutputFormat
862 )) {
863 PrintUsage($strProgName);
864 die "\n";
865 }
866
867 if ($intArgc<1) {
868 PrintUsage($strProgName);
869 die "\n";
870 }
871
872 $gv_blnErrorStop = $strErrorStop;
873 $gv_blnErrorShow = $strErrorShow;
874 $gv_intVerbosity = $intVerbosity;
875 $gv_strMode = SetMode($strMode);
876
877 #----##
878
879 #--Collection(s) Testing
880 foreach $strColName (@ARGV)
881 {
882 my @ErrorsInEachCol;
883 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
884 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
885
886 #--Output(Start)
887 OutputStart($strColName);
888 #----##
889
890 if(-e $strModelCol && -e $strTestCol )
891 {
892
893 #--Individual Testing
894 if ($gv_strMode eq "Individual")
895 {
896 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
897 }
898 #----##
899
900 #--Initial Testing
901 elsif ($gv_strMode eq "Initial")
902 {
903 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
904 }
905 #----##
906
907 #--Full Testing
908 elsif ($gv_strMode eq "Full")
909 {
910 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
911 }
912 #----##
913
914 #--Error Checking
915 else
916 {
917 if ( $strOutputFormat eq "xml" ) {
918 die "<error>Error occoured in main function</error>\n";
919 } else {
920 die "Error occoured in main function.\n";
921 }
922 }
923 #----##
924
925 }
926 else
927 {
928 if( $strOutputFormat eq "xml" ) {
929 die "<error>Cannot find collection: $strColName</error>\n";
930 } else {
931 die "Error: cannot find collection: $strColName\n";
932 }
933 }
934 #----##
935
936 #--Output(Results and Errors)
937 OutputEnd($strColName,\@ErrorsInEachCol);
938 #----##
939
940 }
941}
942#----##
943
944&main();
Note: See TracBrowser for help on using the repository browser.