source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28608

Last change on this file since 28608 was 28608, checked in by ak19, 10 years ago

Ignore the fail.log file

File size: 34.9 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
53$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
54
55$gv_blnErrorStop = "false";
56$gv_blnErrorShow = "false";
57$gv_intVerbosity = 0;
58$gv_strMode = "Full";
59$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
60
61%gv_IndivList = ("archives" => 0,
62 "etc" => 0,
63 "images" => 0,
64 "building" => 0,
65 "import" => 0,
66 "index" => 0,
67 "log" => 0,
68 "metadata" => 0,
69 "perllib" => 0,
70 "temp" => 0);
71#----##
72
73#--System Setup---------------------------
74sub SetMode
75{
76 my ($strModeList) = @_;
77 $strModeList =~ s/\|/ /g;
78 my @Modes = split(" ",$strModeList);
79
80 my $blnIndividual = "true";
81 my $blnInitial = "false";
82 my $blnFull = "false";
83
84
85 foreach $strEachMode (@Modes)
86 {
87 if($strEachMode eq "all")
88 {
89 $blnFull = "true";
90 $blnIndividual = "false";
91 }
92 elsif($strEachMode eq "init")
93 {
94 $blnInitial = "true";
95 $blnIndividual = "false";
96 }
97 else
98 {
99 if(defined $gv_IndivList{$strEachMode})
100 {
101 $gv_IndivList{$strEachMode} = 1;
102 }
103 else
104 {
105 die Help("Error: used undefined mode");
106 }
107 }
108 }
109
110 if($blnFull eq "true") {return "Full";}
111 elsif($blnInitial eq "true") {return "Initial";}
112 elsif($blnIndividual eq "true") {return "Individual";}
113 else {die "Error occured in function SetMode!!\n";}
114}
115#----##
116
117#--System Process-------------------------
118sub IndivTest
119{
120 my ($strModelCol,$strTestCol,$strColName) = @_;
121 my @Errors = ();
122 my $intNumberOfErrors = 0;
123 foreach $strEachFolder (keys %gv_IndivList)
124 {
125 if($gv_IndivList{$strEachFolder} == 1)
126 {
127
128 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
129 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
130 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
131 $intNumberOfErrors = scalar(@Errors);
132 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
133 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
134 VobPrint("End Comparing \"$strEachFolder\"\n",0);
135 VobPrint("Difference Found: $intNumberOfErrors\n",0);
136 VobPrint ("\n",0);
137 }
138 }
139 return @Errors;
140}
141
142sub InitTest
143{
144
145 my ($strModelCol,$strTestCol,$strColName) = @_;
146 my $intLevel = 1;
147 my @Errors;
148
149 # Testing Log files
150# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
151# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
152#
153# if(-e $strModelLog && -e $strTestLog)
154# {
155# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
156# if($strLogError ne "")
157# {
158# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
159# VobPrint ("$strLogError\n",$intLevel);
160#
161# $strLogError = "$strLogError";
162# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
163# push(@Errors,$strLogError);
164# }
165# else
166# {
167# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
168# }
169# }
170# else
171# {
172# my $strErrorColName;
173# my $strLogError;
174#
175# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
176# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
177#
178# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
179# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
180# VobPrint ("$strLogError\n",$intLevel);
181# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
182#
183# push(@Errors,$strLogError);
184# }
185# VobPrint ("\n",$intLevel);
186
187 # Testing the build.cfg
188 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
189 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
190
191 if(-e $strModelBcfg && -e $strTestBcfg)
192 {
193 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
194 if($strBcfgError ne "")
195 {
196 if( $strOutputFormat eq "xml" ) {
197 print "<build-cfg succeeded=\"no\">\n<message>";
198 } else {
199 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
200 }
201
202 VobPrint ("$strBcfgError",$intLevel);
203 if( $strOutputFormat eq "xml" ) {
204 print "</message></build-cfg>";
205 }
206
207 $strBcfgError = "$strBcfgError";
208 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
209 push(@Errors,$strBcfgError);
210 }
211 else
212 {
213 if( $strOutputFormat eq "xml" ) {
214 print "<build-cfg succeeded=\"yes\"/>";
215 } else {
216 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
217 }
218 }
219 }
220 else
221 {
222 my $strErrorColName;
223 my $strBcfgError;
224
225 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
226 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
227
228 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
229 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
230 VobPrint ("$strBcfgError\n",$intLevel);
231 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
232
233 push(@Errors,$strBcfgError);
234 }
235 VobPrint ("\n",$intLevel);
236
237 # Testing the collect.cfg
238
239 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
240 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
241
242 if(-e $strModelCcfg && -e $strTestCcfg)
243 {
244 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
245 if($strCcfgError ne "")
246 {
247 if( $strOutputFormat eq "xml" ) {
248 print "<collect-cfg succeeded=\"no\"><message>";
249 } else {
250 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
251 }
252
253 VobPrint ("$strCcfgError",$intLevel);
254
255 if( $strOutputFormat eq "xml" ) {
256 print "</message></collect-cfg>";
257 }
258
259 $strCcfgError = "$strCcfgError";
260 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
261 push(@Errors,$strCcfgError);
262 }
263 else
264 {
265 if( $strOutputFormat eq "xml" ) {
266 print "<collect-cfg succeeded=\"yes\"/>";
267 } else {
268 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
269 }
270 }
271 }
272 else
273 {
274 my $strErrorColName;
275 my $strCcfgError;
276
277 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
278 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
279
280 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
281 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
282 VobPrint ("$strCcfgError\n",$intLevel);
283 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
284
285 push(@Errors,$strCcfgError);
286 }
287
288 VobPrint ("\n",$intLevel);
289
290 # Testing databases
291
292 # index
293 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
294 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
295 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
296 if($strGdbError) {
297 push(@Errors,$strGdbError);
298 }
299
300 # archives
301 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
302 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
303 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
304 if($strGdbError) {
305 push(@Errors,$strGdbError);
306 }
307
308 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
309 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
310 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
311 if($strGdbError) {
312 push(@Errors,$strGdbError);
313 }
314
315 VobPrint ("\n",$intLevel);
316
317 return @Errors;
318}
319
320
321# At present handles gdbm - need to expand to allow for jdbm and other db types
322sub GdbDiff
323{
324 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
325
326 my $strGdbError = 0;
327
328 if(-e $strModelGdb && -e $strTestGdb)
329 {
330 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
331 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
332 if($strGdbError ne "")
333 {
334 if( $strOutputFormat eq "xml" ) {
335 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
336 } else {
337 AlignPrint("Database Comparsion Result","Failed",$intLevel);
338 }
339 VobPrint ("$strGdbError\n",$intLevel);
340
341 if( $strOutputFormat eq "xml" ) {
342 print "</message></database>";
343 }
344
345 $strGdbError = "$strGdbError";
346 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
347 #push(@Errors,$strGdbError);
348 }
349 else
350 {
351 if( $strOutputFormat eq "xml" ) {
352 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
353 } else {
354 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
355 }
356 }
357 }
358 else
359 {
360 my $strErrorColName;
361
362 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
363 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
364
365 AlignPrint("Database Comparsion Result","Failed",$intLevel);
366
367 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
368 VobPrint ("$strGdbError\n",$intLevel);
369
370 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
371
372 }
373
374 return $strGdbError;
375}
376
377sub FullTest
378{
379 my ($strModelCol,$strTestCol,$strColName) = @_;
380 my @Errors = ();
381 my $intLevel = 0;
382 my $intNumberDiffs = 0;
383
384 # <Initial Test>
385 if( $strOutputFormat eq "xml" ) {
386 #print "<initial-test>";
387 } else {
388 VobPrint("Initial Testing Start\n",$intLevel);
389 }
390
391 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
392 $intNumberDiffs = scalar(@Errors);
393
394 if( $strOutputFormat eq "xml" ) {
395 #print "</initial-test>";
396 } else {
397 VobPrint("Initial Testing End\n",$intLevel);
398 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
399 VobPrint("\n",$intLevel);
400 }
401 # </Initial Test>
402
403 # <Detailed Test>
404 if( $strOutputFormat eq "xml" ) {
405 #print "<detailed-test>";
406 } else {
407 VobPrint("Detail Testing Start\n",$intLevel);
408 }
409 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
410 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
411
412 if( $strOutputFormat eq "xml" ) {
413 #print "</detailed-test>";
414 } else {
415 VobPrint("Detail Testing End\n",$intLevel);
416 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
417 }
418 # </Detailed Test>
419
420 return @Errors;
421}
422#----##
423
424
425#--Other System Utilities
426sub PrintUsage
427{
428 my ($strProgName) = @_;
429 if ( $strOutputFormat eq "xml" ) {
430 print "<error>usage incorrect</error>\n";
431 } else {
432 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
433 }
434 Help("Error: used incorrect parameters");
435}
436
437sub Help
438{
439 my ($strError) = @_;
440 my $aryptHelps =
441 [ { 'name' => "verbosity",
442 'type' => "scale",
443 'argu' => "a integer" ,
444 'descrip' => "this parameter setup the verbosity of the testing result"},
445 { 'name' => "mode",
446 'type' => "option",
447 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
448 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
449 { 'name' => "estop",
450 'type' => "flag",
451 'argu' => "NULL" ,
452 'descrip' => "Set then system will stop once it meet an error"},
453 { 'name' => "eshow",
454 'type' => "flag",
455 'argu' => "NULL" ,
456 'descrip' => "Set then system will show the error summary"}
457 ];
458
459
460 if ( $strOutputFormat ne "xml" ) {
461 print "$strError\n";
462
463 foreach my $hashOneArg (@{$aryptHelps})
464 {
465 print "\n----------------------------\n";
466 print "Parameters: -".$hashOneArg->{"name"}."\n";
467 print "Type: ".$hashOneArg->{"type"}."\n";
468 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
469 print "Description: ".$hashOneArg->{"descrip"}."\n";
470 print "----------------------------\n";
471 }
472 }
473}
474
475sub OutputStart
476{
477 my ($strColName) = @_;
478 my $intPadding = 17 - length($strColName);
479
480 if ( $strOutputFormat eq "xml" ) {
481 print "<diffcol>\n";
482 } else {
483 print "+---------------------------------------------------------+\n";
484 print "| |\n";
485 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
486 print "| |\n";
487 print "+---------------------------------------------------------+\n\n";
488 }
489}
490
491sub OutputEnd
492{
493 my ($strColName,$aryptErrors) = @_;
494 my $intPadding = 12 - length($strColName);
495 if ( $strOutputFormat eq "xml" ) {
496 print "</diffcol>\n";
497 } else {
498 print "\n";
499 print "+---------------------------------------------------------+\n";
500 print "| |\n";
501 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
502 print "| |\n";
503 print "+---------------------------------------------------------+\n\n";
504 }
505
506 my $intTotalErrors = scalar(@{$aryptErrors});
507 if ( $strOutputFormat ne "xml" ) {
508 print "Checking completed, there is $intTotalErrors error(s) found.\n";
509 }
510
511 if($gv_blnErrorShow ne "off")
512 {
513 foreach my $strEachError (@{$aryptErrors})
514 {
515 if ( $strOutputFormat eq "xml" ) {
516 print "<error>";
517 print $strEachError;
518 print "</error>\n";
519 } else {
520 print "+---------------------------------------------------------+\n";
521 print "| Error |\n";
522 print "+---------------------------------------------------------+\n\n";
523 print "$strEachError\n\n";
524 }
525 }
526 }
527 else
528 {
529 if ( $strOutputFormat ne "xml" ) {
530 print "Use -eshow to show the error detail\n\n";
531 }
532 }
533}
534
535sub AlignPrint
536{
537 my ($strMainString,$strStatus,$intLevel) = @_;
538 my $intDot = 100 - length($strMainString) - length($strStatus);
539 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
540}
541
542
543# this function is only called on DocXMLFiles.
544# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
545# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
546sub isDocOrMETSXMLFileWindows
547{
548 my ($file_contents) = @_;
549
550 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
551
552 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
553 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
554
555 # for doc.xml:
556 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
557 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
558 $gsdlsourcefilename = $2;
559 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
560 return 1;
561 }
562 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
563 return 1;
564 }
565
566 return 0;
567}
568
569sub TestEach
570{
571 my ($strModel,$strTest,$intLevel,$strColName) = @_;
572 my @Errors = ();
573
574 $intLevel++;
575 if (-d $strModel && -d $strTest)
576 {
577 my @aryInModel = &diffutil::files_in_dir($strModel);
578 my @aryInTest = &diffutil::files_in_dir($strTest);
579
580 # Files to be skipped because they get generated on one OS but not the other
581 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
582 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
583 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
584 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
585
586 # Now check all remaining files in the folder exist in both model and test collections
587 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
588 my @aryCorrectFiles = @{$aryTwoPointers[1]};
589 @Errors = @{$aryTwoPointers[0]};
590
591 if(scalar(@Errors) == 0)
592 {
593 foreach my $strEachFile (@aryInModel)
594 {
595 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
596 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
597 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
598 if(!($strEachFile eq "log" || $strEachFile eq "fail.log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
599 {
600 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
601 }
602 else
603 {
604 if ( $strOutputFormat eq "xml" ) {
605 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
606 } else {
607 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
608 }
609 }
610 }
611 }
612 else
613 {
614 foreach my $strEachFile (@aryCorrectFiles)
615 {
616 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
617 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
618 if(!($strEachFile eq "log" || $strEachFile eq "fail.log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
619 {
620 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
621 }
622 }
623 }
624 if($intLevel == $gv_intVerbosity)
625 {
626 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
627 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
628 }
629 }
630 else
631 {
632 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
633 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
634
635 my $strResult;
636
637 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
638 # in case these don't appear in the same order between collections, since
639 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
640 # when they can show up as unnecessary diff 'errors'
641
642 my ($model_contents, $test_contents);
643
644 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
645 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
646
647 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
648 sysread(FIN, $model_contents, -s FIN);
649 close(FIN);
650 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
651 sysread(FIN, $test_contents, -s FIN);
652 close(FIN);
653
654 $model_contents =~ s/$ignore_line_re//g;
655 $test_contents =~ s/$ignore_line_re//g;
656
657
658 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
659 # before comparing a windows test with a linux model or vice-versa
660 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
661 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
662
663 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
664
665 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
666 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
667
668 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
669 $$win_contents =~ s@[\r]@@g;
670
671 # make all single windows slashes into single unix slashes
672 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
673 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
674 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
675
676 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
677 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
678
679 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
680 # $$win_contents =~ s@\r@\n@mg;
681
682 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
683 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
684 # containing solitary carriage returns instead of linefeed
685 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
686
687
688 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
689 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
690 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
691 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
692 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
693
694 # Advanced Beatles collection,
695 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
696 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
697 # Normalising to windows version for doing a diff
698 $$lin_contents =~ s@href=http:///@href=http://@g;
699 }
700 }
701
702 # processing particular to doc.xml
703 if($strModel =~ m/doc\.xml$/) {
704 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
705 # these tmpdirs are located inside the collection directory
706 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
707 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
708
709 # remove all absolute paths upto collect folder from <Metadata /> elements
710 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
711 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
712
713 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
714 # These tmpdirs are located inside the toplevel *greenstone* directory
715 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
716 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
717 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
718
719 if($test_contents =~ m@$tmpfile_regex@) {
720 # found a match, replace the tmp file name with "random", keeping the original file extension
721 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
722
723 my ($old_tmp_filename, $ext) = ($1, $2);
724 my $new_tmp_filename = "random";
725
726 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
727 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
728 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
729 if($5) {
730 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
731 } else { # OrigSource contains only the filename
732 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
733 }
734
735 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
736 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
737 if($5) {
738 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
739 } else { # OrigSource contains only the filename
740 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
741 }
742 }
743
744 } # finished special processing of doc.xml files
745
746 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
747# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
748# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
749# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
750# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
751# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
752# }
753
754
755
756 # now can diff the normalised versions of the doc.xml/docmets.xml files:
757 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
758
759 } else {
760 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
761 }
762
763 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
764 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
765 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
766
767 #$strResult = GeneralOutput($strResult);
768 if ( $strOutputFormat eq "xml" ) {
769 #
770 } else {
771 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
772 }
773 if ($strResult eq "")
774 {
775 if ( $strOutputFormat eq "xml" ) {
776 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
777 } else {
778 AlignPrint("Comparing File","Succeed",$intLevel);
779 }
780 }
781 else
782 {
783# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
784
785 my $strOutput = "Difference Report:\n$strResult\n";
786 if ( $strOutputFormat eq "xml" ) {
787 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
788 } else {
789 AlignPrint("Comparing File","Failed",$intLevel);
790 }
791
792 #$result=`file -b $strModel`; # linux specific test for binary file
793 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
794 if ( "$result" =~ "data" ) {
795 VobPrint( "These binary files differ", $intLevel );
796 } else {
797 VobPrint ( "$strOutput" , $intLevel);
798 }
799
800
801 if ( $strOutputFormat eq "xml" ) {
802 print "</message></file-comparison>";
803 }
804
805 if($gv_blnErrorStop ne "off") { exit; }
806 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
807 }
808 }
809
810 return @Errors;
811}
812
813
814sub FolderTesting
815{
816 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
817 my %hashCount = ();
818 my @Errors = ();
819 my @CorrectFiles = ();
820 my @TwoPointers = (\@Errors,\@CorrectFiles);
821
822 if ( $strOutputFormat eq "xml" ) {
823 #print "<folder-comparison location=\"$strModelFolder\">\n";
824 } else {
825 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
826 }
827
828 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
829 foreach my $strEachItem (@$aryptTest)
830 {
831 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
832 else {$hashCount{$strEachItem} = 'T';}
833 }
834
835 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
836 {
837 if ( $strOutputFormat eq "xml" ) {
838 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
839 } else {
840 AlignPrint("Folder Comparsion","Succeed",$intLevel);
841 }
842 return @TwoPointers;
843 }
844 else
845 {
846 if ( $strOutputFormat eq "xml" ) {
847 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
848 } else {
849 AlignPrint("Folder Comparsion","Failed",$intLevel);
850 }
851
852 foreach my $strEachItem (keys %hashCount)
853 {
854 if($hashCount{$strEachItem} ne 'B')
855 {
856 my $strOutput = "";
857 my $strReport = "";
858
859 if($hashCount{$strEachItem} eq 'M')
860 {
861 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
862 $strReport = "Difference Report: difference found at $strTestFolder";
863 }
864 elsif($hashCount{$strEachItem} eq 'T')
865 {
866 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
867 $strReport = "Difference Report: difference found at $strModelFolder";
868 }
869 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
870
871 VobPrint ("$strOutput\n",$intLevel);
872 $strOutput = $strOutput."\n\t".$strReport."\n";
873 push(@Errors,$strOutput);
874 }
875 else {push(@CorrectFiles,$strEachItem);}
876 }
877 if( $strOutputFormat eq "xml" ) {
878 print "</message></folder-comparison>";
879 }
880
881 return @TwoPointers;
882 }
883}
884
885sub VobPrint
886{
887 my ($strOutput, $intLevel) = @_;
888 my $strTab = "";
889 my $intTab = int($intLevel/2);
890 if($intLevel <= $gv_intVerbosity)
891 {
892 if($intLevel >= 1)
893 {
894 $strTab = "\t"x($intTab+1);
895 $strOutput =~ s/\n$//;
896 $strOutput =~ s/\n/\n$strTab/g;
897 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
898 }
899
900 if( $strOutputFormat eq "xml" ) {
901 $strOutput =~ s/&/&amp;/g;
902 $strOutput =~ s/</&lt;/g;
903 $strOutput =~ s/>/&gt;/g;
904 }
905
906 if ( length( $strOutput ) > 1000 ) {
907 $strOutput = substr( $strOutput, 0, 978);
908
909 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
910 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
911 if(defined $1 && $1) {
912 my $rest = $1;
913 if($rest =~ m/^a/) {
914 $strOutput =~ s@am?p?$@amp;@;
915 }
916 elsif($rest eq "g" || $rest eq "l") {
917 $strOutput .= "t;"; # close the known tag
918 }
919 elsif($rest eq "gt" || $rest eq "lt") {
920 $strOutput .= ";";
921 }
922 } else { # & on its own
923 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
924 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
925 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
926 }
927 }
928 $strOutput .= "... (output truncated)";
929 }
930
931
932 print $strTab.$strOutput."\n";
933 }
934}
935#----##
936
937
938#--Main System----------------------------
939#-----------------------------------------
940# Name: main
941# Perameters: arguments from command line
942# Pre-condition: testing will start by calling this main function.
943# Post-condition: output the test results for one or more collections.
944#-----------------------------------------
945sub main
946{
947 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
948 my $strProgName = $0;
949 my $intArgc = scalar(@ARGV);
950
951 #--System Arguments Setup
952 if (!parsargv::parse(\@ARGV,
953 'estop//off', \$strErrorStop,
954 'eshow//off', \$strErrorShow,
955 'verbosity/\d+/1', \$intVerbosity,
956 'mode/[\w\-]+/all', \$strMode,
957 'output/[\w\-]+/text', \$strOutputFormat,
958 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
959 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
960 )) {
961 PrintUsage($strProgName);
962 die "\n";
963 }
964
965 if ($intArgc<1) {
966 PrintUsage($strProgName);
967 die "\n";
968 }
969
970 $gv_test_os = $test_os; # if not specified, defaults to "compute"
971 $gv_model_os = $model_os; # tends to be linux
972
973 $gv_blnErrorStop = $strErrorStop;
974 $gv_blnErrorShow = $strErrorShow;
975 $gv_intVerbosity = $intVerbosity;
976 $gv_strMode = SetMode($strMode);
977
978 #----##
979
980 #--Collection(s) Testing
981 foreach $strColName (@ARGV)
982 {
983 my @ErrorsInEachCol;
984 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
985 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
986
987 #--Output(Start)
988 OutputStart($strColName);
989 #----##
990
991 if(-e $strModelCol && -e $strTestCol )
992 {
993
994 #--Individual Testing
995 if ($gv_strMode eq "Individual")
996 {
997 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
998 }
999 #----##
1000
1001 #--Initial Testing
1002 elsif ($gv_strMode eq "Initial")
1003 {
1004 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1005 }
1006 #----##
1007
1008 #--Full Testing
1009 elsif ($gv_strMode eq "Full")
1010 {
1011 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1012 }
1013 #----##
1014
1015 #--Error Checking
1016 else
1017 {
1018 if ( $strOutputFormat eq "xml" ) {
1019 die "<error>Error occoured in main function</error>\n";
1020 } else {
1021 die "Error occoured in main function.\n";
1022 }
1023 }
1024 #----##
1025
1026 }
1027 else
1028 {
1029 if( $strOutputFormat eq "xml" ) {
1030 die "<error>Cannot find collection: $strColName</error>\n";
1031 } else {
1032 die "Error: cannot find collection: $strColName\n";
1033 }
1034 }
1035 #----##
1036
1037 #--Output(Results and Errors)
1038 OutputEnd($strColName,\@ErrorsInEachCol);
1039 #----##
1040
1041 }
1042}
1043#----##
1044
1045&main();
Note: See TracBrowser for help on using the repository browser.