source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

Last change on this file was 38067, checked in by anupama, 8 months ago

Diffcol complained that logdiff was not to be found in @INC, so added its containing diffcol folder into @INC. This may be a problem specific to having TASK_HOME located outside envi, as I've not experienced this issue before.

File size: 37.9 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{TASK_HOME}/diffcol/");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38}
39
40use parsargv;
41use util;
42use FileUtils;
43use logdiff;
44use cfgdiff;
45use flatdbdiff;
46use diffutil;
47use Text::Diff;
48use Cwd;
49
50#--Global Variables Declaration-----------
51my ($gv_test_os, $gv_model_os); # still just file globals
52
53if($ENV{'GSDL3SRCHOME'}) {
54 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
55 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
56} else {
57 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
58 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
59}
60
61$gv_blnDebugging = 0;
62$gv_blnErrorStop = "false";
63$gv_blnErrorShow = "false";
64$gv_intVerbosity = 0;
65$gv_strMode = "Full";
66$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
67
68%gv_IndivList = ("archives" => 0,
69 "etc" => 0,
70 "images" => 0,
71 "building" => 0,
72 "import" => 0,
73 "index" => 0,
74 "log" => 0,
75 "metadata" => 0,
76 "perllib" => 0,
77 "temp" => 0);
78#----##
79
80#--System Setup---------------------------
81sub SetMode
82{
83 my ($strModeList) = @_;
84 $strModeList =~ s/\|/ /g;
85 my @Modes = split(" ",$strModeList);
86
87 my $blnIndividual = "true";
88 my $blnInitial = "false";
89 my $blnFull = "false";
90
91
92 foreach $strEachMode (@Modes)
93 {
94 if($strEachMode eq "all")
95 {
96 $blnFull = "true";
97 $blnIndividual = "false";
98 }
99 elsif($strEachMode eq "init")
100 {
101 $blnInitial = "true";
102 $blnIndividual = "false";
103 }
104 else
105 {
106 if(defined $gv_IndivList{$strEachMode})
107 {
108 $gv_IndivList{$strEachMode} = 1;
109 }
110 else
111 {
112 die Help("Error: used undefined mode");
113 }
114 }
115 }
116
117 if($blnFull eq "true") {return "Full";}
118 elsif($blnInitial eq "true") {return "Initial";}
119 elsif($blnIndividual eq "true") {return "Individual";}
120 else {die "Error occured in function SetMode!!\n";}
121}
122#----##
123
124#--System Process-------------------------
125sub IndivTest
126{
127 my ($strModelCol,$strTestCol,$strColName) = @_;
128 my @Errors = ();
129 my $intNumberOfErrors = 0;
130 foreach $strEachFolder (keys %gv_IndivList)
131 {
132 if($gv_IndivList{$strEachFolder} == 1)
133 {
134
135 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
136 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
137 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
138 $intNumberOfErrors = scalar(@Errors);
139 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
140 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
141 VobPrint("End Comparing \"$strEachFolder\"\n",0);
142 VobPrint("Difference Found: $intNumberOfErrors\n",0);
143 VobPrint ("\n",0);
144 }
145 }
146 return @Errors;
147}
148
149sub InitTest
150{
151
152 my ($strModelCol,$strTestCol,$strColName) = @_;
153 my $intLevel = 1;
154 my @Errors;
155
156 # Testing Log files
157# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
158# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
159#
160# if(-e $strModelLog && -e $strTestLog)
161# {
162# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
163# if($strLogError ne "")
164# {
165# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
166# VobPrint ("$strLogError\n",$intLevel);
167#
168# $strLogError = "$strLogError";
169# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
170# push(@Errors,$strLogError);
171# }
172# else
173# {
174# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
175# }
176# }
177# else
178# {
179# my $strErrorColName;
180# my $strLogError;
181#
182# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
183# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
184#
185# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
186# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
187# VobPrint ("$strLogError\n",$intLevel);
188# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
189#
190# push(@Errors,$strLogError);
191# }
192# VobPrint ("\n",$intLevel);
193
194
195 if($ENV{'GSVERSION'} ne "3") {
196 # Testing the build.cfg
197 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
198 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
199
200 if(-e $strModelBcfg && -e $strTestBcfg)
201 {
202 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
203 if($strBcfgError ne "")
204 {
205 if( $strOutputFormat eq "xml" ) {
206 print "<build-cfg succeeded=\"no\">\n<message>";
207 } else {
208 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
209 }
210
211 VobPrint ("$strBcfgError",$intLevel);
212 if( $strOutputFormat eq "xml" ) {
213 print "</message></build-cfg>";
214 }
215
216 $strBcfgError = "$strBcfgError";
217 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
218 push(@Errors,$strBcfgError);
219 }
220 else
221 {
222 if( $strOutputFormat eq "xml" ) {
223 print "<build-cfg succeeded=\"yes\"/>";
224 } else {
225 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
226 }
227 }
228 }
229 else
230 {
231 my $strErrorColName;
232 my $strBcfgError;
233
234 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
235 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
236
237 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
238 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
239 VobPrint ("$strBcfgError\n",$intLevel);
240 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
241
242 push(@Errors,$strBcfgError);
243 }
244 VobPrint ("\n",$intLevel);
245
246 # Testing the collect.cfg
247
248 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
249 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
250
251 if(-e $strModelCcfg && -e $strTestCcfg)
252 {
253 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
254 if($strCcfgError ne "")
255 {
256 if( $strOutputFormat eq "xml" ) {
257 print "<collect-cfg succeeded=\"no\"><message>";
258 } else {
259 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
260 }
261
262 VobPrint ("$strCcfgError",$intLevel);
263
264 if( $strOutputFormat eq "xml" ) {
265 print "</message></collect-cfg>";
266 }
267
268 $strCcfgError = "$strCcfgError";
269 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
270 push(@Errors,$strCcfgError);
271 }
272 else
273 {
274 if( $strOutputFormat eq "xml" ) {
275 print "<collect-cfg succeeded=\"yes\"/>";
276 } else {
277 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
278 }
279 }
280 }
281 else
282 {
283 my $strErrorColName;
284 my $strCcfgError;
285
286 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
287 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
288
289 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
290 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
291 VobPrint ("$strCcfgError\n",$intLevel);
292 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
293
294 push(@Errors,$strCcfgError);
295 }
296
297 VobPrint ("\n",$intLevel);
298 } # End of code that only really pertains to GS2, as GS3's buildConfig.xml gets diffcolled with XML files later
299
300
301 # Testing databases
302
303 # index
304 my $flat_db_ext = ".gdb";
305
306 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
307 if(!(-e $strModelGdb)){ # try jdb
308 $flat_db_ext = ".jdb";
309 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
310 }
311
312#print STDERR "*********** USING DB EXTENSION: $flat_db_ext\n";
313#print STDERR "@@@ PERL_PERTURB_KEYS: ".$ENV{'PERL_PERTURB_KEYS'}."\n";
314#print STDERR "@@@ PERL_HASH_SEED: ".$ENV{'PERL_HASH_SEED'}."\n";
315
316 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName$flat_db_ext");
317 my $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
318 if($strGdbError) {
319 push(@Errors,$strGdbError);
320 }
321
322 # archives
323 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc$flat_db_ext");
324 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc$flat_db_ext");
325 $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
326 if($strGdbError) {
327 push(@Errors,$strGdbError);
328 }
329
330 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src$flat_db_ext");
331 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src$flat_db_ext");
332 $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
333 if($strGdbError) {
334 push(@Errors,$strGdbError);
335 }
336
337 VobPrint ("\n",$intLevel);
338
339 return @Errors;
340}
341
342
343# At present handles gdbm and jdbm - may need to expand to allow for other db types
344sub FlatdbDiff
345{
346 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
347
348 my $strGdbError = 0;
349
350 if(-e $strModelGdb && -e $strTestGdb)
351 {
352 #my $strGdbError = flatdbdiff::test_flat_db($strModelGdb, $strTestGdb);
353
354 $strGdbError = flatdbdiff::test_flat_db($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
355
356 if($strGdbError ne "")
357 {
358 if( $strOutputFormat eq "xml" ) {
359 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
360 } else {
361 AlignPrint("Database Comparison Result","Failed",$intLevel);
362 }
363 VobPrint ("$strGdbError\n",$intLevel);
364
365 if( $strOutputFormat eq "xml" ) {
366 print "</message></database>";
367 }
368
369 $strGdbError = "$strGdbError";
370 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
371 #push(@Errors,$strGdbError);
372 }
373 else
374 {
375 if( $strOutputFormat eq "xml" ) {
376 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
377 } else {
378 AlignPrint("Database Comparison Result","Succeed",$intLevel);
379 }
380 }
381 }
382 else
383 {
384 my $strErrorColName;
385
386 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
387 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
388
389 AlignPrint("Database Comparison Result","Failed",$intLevel);
390
391 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
392 VobPrint ("$strGdbError\n",$intLevel);
393
394 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
395
396 }
397
398 return $strGdbError;
399}
400
401sub FullTest
402{
403 my ($strModelCol,$strTestCol,$strColName) = @_;
404 my @Errors = ();
405 my $intLevel = 0;
406 my $intNumberDiffs = 0;
407
408 # <Initial Test>
409 if( $strOutputFormat eq "xml" ) {
410 #print "<initial-test>";
411 } else {
412 VobPrint("Initial Testing Start\n",$intLevel);
413 }
414
415 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
416 $intNumberDiffs = scalar(@Errors);
417
418 if( $strOutputFormat eq "xml" ) {
419 #print "</initial-test>";
420 } else {
421 VobPrint("Initial Testing End\n",$intLevel);
422 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
423 VobPrint("\n",$intLevel);
424 }
425 # </Initial Test>
426
427 # <Detailed Test>
428 if( $strOutputFormat eq "xml" ) {
429 #print "<detailed-test>";
430 } else {
431 VobPrint("Detail Testing Start\n",$intLevel);
432 }
433 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
434 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
435
436 if( $strOutputFormat eq "xml" ) {
437 #print "</detailed-test>";
438 } else {
439 VobPrint("Detail Testing End\n",$intLevel);
440 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
441 }
442 # </Detailed Test>
443
444 return @Errors;
445}
446#----##
447
448
449#--Other System Utilities
450sub PrintUsage
451{
452 my ($strProgName) = @_;
453 if ( $strOutputFormat eq "xml" ) {
454 print "<error>usage incorrect</error>\n";
455 } else {
456 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
457 }
458 Help("Error: used incorrect parameters");
459}
460
461sub Help
462{
463 my ($strError) = @_;
464 my $aryptHelps =
465 [ { 'name' => "verbosity",
466 'type' => "scale",
467 'argu' => "a integer" ,
468 'descrip' => "this parameter setup the verbosity of the testing result"},
469 { 'name' => "mode",
470 'type' => "option",
471 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
472 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
473 { 'name' => "estop",
474 'type' => "flag",
475 'argu' => "NULL" ,
476 'descrip' => "Set then system will stop once it meets an error"},
477 { 'name' => "eshow",
478 'type' => "flag",
479 'argu' => "NULL" ,
480 'descrip' => "Set then system will show the error summary"},
481 { 'name' => "debug",
482 'type' => "flag",
483 'argu' => "NULL" ,
484 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
485 ];
486
487
488 if ( $strOutputFormat ne "xml" ) {
489 print "$strError\n";
490
491 foreach my $hashOneArg (@{$aryptHelps})
492 {
493 print "\n----------------------------\n";
494 print "Parameters: -".$hashOneArg->{"name"}."\n";
495 print "Type: ".$hashOneArg->{"type"}."\n";
496 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
497 print "Description: ".$hashOneArg->{"descrip"}."\n";
498 print "----------------------------\n";
499 }
500 }
501}
502
503sub OutputStart
504{
505 my ($strColName) = @_;
506 my $intPadding = 17 - length($strColName);
507
508 if ( $strOutputFormat eq "xml" ) {
509 print "<diffcol>\n";
510 } else {
511 print "+---------------------------------------------------------+\n";
512 print "| |\n";
513 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
514 print "| |\n";
515 print "+---------------------------------------------------------+\n\n";
516 }
517}
518
519sub OutputEnd
520{
521 my ($strColName,$aryptErrors) = @_;
522 my $intPadding = 12 - length($strColName);
523 if ( $strOutputFormat eq "xml" ) {
524 print "</diffcol>\n";
525 } else {
526 print "\n";
527 print "+---------------------------------------------------------+\n";
528 print "| |\n";
529 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
530 print "| |\n";
531 print "+---------------------------------------------------------+\n\n";
532 }
533
534 my $intTotalErrors = scalar(@{$aryptErrors});
535 if ( $strOutputFormat ne "xml" ) {
536 print "Checking completed, there is $intTotalErrors error(s) found.\n";
537 }
538
539 if($gv_blnErrorShow ne "off")
540 {
541 foreach my $strEachError (@{$aryptErrors})
542 {
543 if ( $strOutputFormat eq "xml" ) {
544 print "<error>";
545 print $strEachError;
546 print "</error>\n";
547 } else {
548 print "+---------------------------------------------------------+\n";
549 print "| Error |\n";
550 print "+---------------------------------------------------------+\n\n";
551 print "$strEachError\n\n";
552 }
553 }
554 }
555 else
556 {
557 if ( $strOutputFormat ne "xml" ) {
558 print "Use -eshow to show the error detail\n\n";
559 }
560 }
561}
562
563sub AlignPrint
564{
565 my ($strMainString,$strStatus,$intLevel) = @_;
566 my $intDot = 100 - length($strMainString) - length($strStatus);
567 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
568}
569
570
571# this function is only called on DocXMLFiles.
572# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in flatdbdiff.pm)
573# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
574sub isDocOrMETSXMLFileWindows
575{
576 my ($file_contents) = @_;
577
578 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
579
580 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
581 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
582
583 # for doc.xml:
584 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
585 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
586 $gsdlsourcefilename = $2;
587 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
588 return 1;
589 }
590 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
591 return 1;
592 }
593
594 return 0;
595}
596
597sub TestEach
598{
599 my ($strModel,$strTest,$intLevel,$strColName) = @_;
600 my @Errors = ();
601
602 $intLevel++;
603 if (-d $strModel && -d $strTest)
604 {
605 my @aryInModel = &diffutil::files_in_dir($strModel);
606 my @aryInTest = &diffutil::files_in_dir($strTest);
607
608 # Files to be skipped because they get generated on one OS but not the other
609 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
610 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|archiveinf-timestamp\.out|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
611 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
612 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
613
614 # Now check all remaining files in the folder exist in both model and test collections
615 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
616 my @aryCorrectFiles = @{$aryTwoPointers[1]};
617 @Errors = @{$aryTwoPointers[0]};
618
619 if(scalar(@Errors) == 0)
620 {
621 foreach my $strEachFile (@aryInModel)
622 {
623 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
624 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
625 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
626 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
627 {
628 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
629 }
630 else
631 {
632 if ( $strOutputFormat eq "xml" ) {
633 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
634 } else {
635 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
636 }
637 }
638 }
639 }
640 else
641 {
642 foreach my $strEachFile (@aryCorrectFiles)
643 {
644 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
645 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
646 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
647 {
648 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
649 }
650 }
651 }
652 if($intLevel == $gv_intVerbosity)
653 {
654 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparison","Succeed",$intLevel);}
655 else { AlignPrint("Contents Comparison","Failed",$intLevel);}
656 }
657 }
658 else
659 {
660 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
661 my $ignore_line_re = "<(.*?:)?(?:M|m)etadata name=\"(buildDate|earliestDatestamp|lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ex.File.FileAccessDate|ex.File.FileInodeChangeDate|ex.File.FileCreateDate|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?(?:M|m)etadata>\\s*\\n*";
662
663 my $strResult;
664
665 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
666 # in case these don't appear in the same order between collections, since
667 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
668 # when they can show up as unnecessary diff 'errors'
669
670 my ($model_contents, $test_contents);
671
672 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
673 if($strModel =~ m/buildConfig\.xml$/ || $strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
674
675 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
676 sysread(FIN, $model_contents, -s FIN);
677 close(FIN);
678 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
679 sysread(FIN, $test_contents, -s FIN);
680 close(FIN);
681
682 $model_contents =~ s/$ignore_line_re//g;
683 $test_contents =~ s/$ignore_line_re//g;
684
685
686 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
687 # before comparing a windows test with a linux model or vice-versa
688 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
689 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
690
691 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
692
693 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
694 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
695
696 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
697 $$win_contents =~ s@[\r]@@g;
698
699 # make all single windows slashes into single unix slashes
700 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
701 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
702 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
703
704 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
705 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
706
707 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
708 # $$win_contents =~ s@\r@\n@mg;
709
710 if($strModel =~ m/doc\.xml$/) { # processing that's particular to doc.xml
711 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
712 # containing solitary carriage returns instead of linefeed
713 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
714
715
716 # replace all multiple newlines with single one (for Enhanced-PDF, xpdf-tools introduces more newlines on Windows than linux)
717 $$win_contents =~ s@(\n)+@\n@g;
718 $$lin_contents =~ s@(\n)+@\n@g;
719
720 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
721 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
722 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
723 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
724 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
725
726 # Advanced Beatles collection,
727 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
728 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
729 # Normalising to windows version for doing a diff
730 $$lin_contents =~ s@href=http:///@href=http://@g;
731 }
732 }
733
734 # remove all absolute paths upto collect folder from <Metadata /> elements
735 if($strModel =~ m/docmets\.xml$/) {
736 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
737 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
738 }
739
740 # processing that's particular to doc.xml
741 if($strModel =~ m/doc\.xml$/) {
742 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
743 # these tmpdirs are located inside the collection directory
744 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
745 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
746
747 # remove all absolute paths upto collect folder from <Metadata /> elements
748 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
749 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
750
751 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
752 # These tmpdirs are located inside the toplevel *greenstone* directory
753 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
754 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
755 #my $tmpfile_regex = "<Metadata name=\"URL\">https?://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
756 my $tmpfile_regex = "<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
757
758 if($test_contents =~ m@$tmpfile_regex@) {
759 # found a match, replace the tmp file name with "random", keeping the original file extension
760 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
761
762 my ($old_tmp_filename, $ext) = ($3, $4);
763 my $new_tmp_filename = "random";
764 $old_tmp_filename =~ s@.*?([^/]*)$@$1@; # remove any intermediate timestamp folders under tmp dir
765
766 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
767 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
768 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?.*?($old_tmp_filename)($ext</Metadata>)";
769 if($5) { # there is a tmp folder
770 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$7@mg;
771 } else { # OrigSource contains only the filename
772 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$7@mg;
773 }
774
775 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
776 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
777 if($5) {
778 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
779 } else { # OrigSource contains only the filename
780 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
781 }
782 }
783
784 } # finished special processing of doc.xml files
785
786 #$gv_blnDebugging=1;
787 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
788 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
789 &flatdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
790 &flatdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
791# if($strModel =~ m/doc\.xml$/ && $strModel =~ m/archives/
792# && $strModel =~ m/(HASH019c.dir|HASH1a9c.dir)/) # list the HASH dirs for which you want the (archives) doc.xml file generated, to inspect specific doc.xml files
793# {
794# &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
795# &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
796# }
797 }
798 #$gv_blnDebugging=0;
799
800 # now can diff the normalised versions of the doc.xml/docmets.xml files:
801 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
802
803 } else {
804 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
805 }
806
807 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
808 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
809 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
810
811 #$strResult = GeneralOutput($strResult);
812 if ( $strOutputFormat eq "xml" ) {
813 #
814 } else {
815 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
816 }
817 if ($strResult eq "")
818 {
819 if ( $strOutputFormat eq "xml" ) {
820 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
821 } else {
822 AlignPrint("Comparing File","Succeed",$intLevel);
823 }
824 }
825 else
826 {
827# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
828
829 my $strOutput = "Difference Report:\n$strResult\n";
830 if ( $strOutputFormat eq "xml" ) {
831 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
832 } else {
833 AlignPrint("Comparing File","Failed",$intLevel);
834 }
835
836 #$result=`file -b $strModel`; # linux specific test for binary file
837 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
838 if ( "$result" =~ "data" ) {
839 VobPrint( "These binary files differ", $intLevel );
840 } else {
841 VobPrint ( "$strOutput" , $intLevel);
842 }
843
844
845 if ( $strOutputFormat eq "xml" ) {
846 print "</message></file-comparison>";
847 }
848
849 if($gv_blnErrorStop ne "off") { exit; }
850 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
851 }
852 }
853
854 return @Errors;
855}
856
857
858sub FolderTesting
859{
860 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
861 my %hashCount = ();
862 my @Errors = ();
863 my @CorrectFiles = ();
864 my @TwoPointers = (\@Errors,\@CorrectFiles);
865
866 if ( $strOutputFormat eq "xml" ) {
867 #print "<folder-comparison location=\"$strModelFolder\">\n";
868 } else {
869 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
870 }
871
872 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
873 foreach my $strEachItem (@$aryptTest)
874 {
875 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
876 else {$hashCount{$strEachItem} = 'T';}
877 }
878
879 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
880 {
881 if ( $strOutputFormat eq "xml" ) {
882 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
883 } else {
884 AlignPrint("Folder Comparison","Succeed",$intLevel);
885 }
886 return @TwoPointers;
887 }
888 else
889 {
890 if ( $strOutputFormat eq "xml" ) {
891 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
892 } else {
893 AlignPrint("Folder Comparison","Failed",$intLevel);
894 }
895
896 foreach my $strEachItem (keys %hashCount)
897 {
898 if($hashCount{$strEachItem} ne 'B')
899 {
900 my $strOutput = "";
901 my $strReport = "";
902
903 if($hashCount{$strEachItem} eq 'M')
904 {
905 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
906 $strReport = "Difference Report: difference found at $strTestFolder";
907 }
908 elsif($hashCount{$strEachItem} eq 'T')
909 {
910 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
911 $strReport = "Difference Report: difference found at $strModelFolder";
912 }
913 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
914
915 VobPrint ("$strOutput\n",$intLevel);
916 $strOutput = $strOutput."\n\t".$strReport."\n";
917 push(@Errors,$strOutput);
918 }
919 else {push(@CorrectFiles,$strEachItem);}
920 }
921 if( $strOutputFormat eq "xml" ) {
922 print "</message></folder-comparison>";
923 }
924
925 return @TwoPointers;
926 }
927}
928
929sub VobPrint
930{
931 my ($strOutput, $intLevel) = @_;
932 my $strTab = "";
933 my $intTab = int($intLevel/2);
934 if($intLevel <= $gv_intVerbosity)
935 {
936 if($intLevel >= 1)
937 {
938 $strTab = "\t"x($intTab+1);
939 $strOutput =~ s/\n$//;
940 $strOutput =~ s/\n/\n$strTab/g;
941 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
942 }
943
944 if( $strOutputFormat eq "xml" ) {
945 $strOutput =~ s/&/&amp;/g;
946 $strOutput =~ s/</&amp;lt;/g;
947 $strOutput =~ s/>/&amp;gt;/g;
948 }
949
950 if ( length( $strOutput ) > 1000 ) {
951 $strOutput = substr( $strOutput, 0, 978);
952
953 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
954 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
955 if(defined $1 && $1) {
956 my $rest = $1;
957 if($rest =~ m/^a/) {
958 $strOutput =~ s@am?p?$@amp;@;
959 }
960 elsif($rest eq "g" || $rest eq "l") {
961 $strOutput .= "t;"; # close the known tag
962 }
963 elsif($rest eq "gt" || $rest eq "lt") {
964 $strOutput .= ";";
965 }
966 } else { # & on its own
967 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
968 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
969 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
970 }
971 }
972 $strOutput .= "... (output truncated)";
973 }
974
975
976 print $strTab.$strOutput."\n";
977 }
978}
979#----##
980
981
982#--Main System----------------------------
983#-----------------------------------------
984# Name: main
985# Perameters: arguments from command line
986# Pre-condition: testing will start by calling this main function.
987# Post-condition: output the test results for one or more collections.
988#-----------------------------------------
989sub main
990{
991 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
992 my $strProgName = $0;
993 my $intArgc = scalar(@ARGV);
994
995 #--System Arguments Setup
996 if (!parsargv::parse(\@ARGV,
997 'estop//off', \$strErrorStop,
998 'eshow//off', \$strErrorShow,
999 'debug', \$debugging,
1000 'verbosity/\d+/1', \$intVerbosity,
1001 'mode/[\w\-]+/all', \$strMode,
1002 'output/[\w\-]+/text', \$strOutputFormat,
1003 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
1004 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
1005 )) {
1006 PrintUsage($strProgName);
1007 die "\n";
1008 }
1009
1010 if ($intArgc<1) {
1011 PrintUsage($strProgName);
1012 die "\n";
1013 }
1014
1015 $gv_test_os = $test_os; # if not specified, defaults to "compute"
1016 $gv_model_os = $model_os; # tends to be linux
1017
1018#print STDERR "@@@@@ test_os: $test_os\n";
1019#print STDERR "@@@@@ model_os: $model_os\n";
1020
1021 $gv_blnDebugging = $debugging;
1022 $gv_blnErrorStop = $strErrorStop;
1023 $gv_blnErrorShow = $strErrorShow;
1024 $gv_intVerbosity = $intVerbosity;
1025 $gv_strMode = SetMode($strMode);
1026
1027 #----##
1028
1029# To find out what version of perl we're using
1030 if( $strOutputFormat eq "xml" ) {
1031 my $perloutput = `perl -v`;
1032 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1033 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1034 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1035 }
1036
1037 #--Collection(s) Testing
1038 foreach $strColName (@ARGV)
1039 {
1040 my @ErrorsInEachCol;
1041 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1042 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1043
1044 #--Output(Start)
1045 OutputStart($strColName);
1046 #----##
1047
1048 if(-e $strModelCol && -e $strTestCol )
1049 {
1050
1051 #--Individual Testing
1052 if ($gv_strMode eq "Individual")
1053 {
1054 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1055 }
1056 #----##
1057
1058 #--Initial Testing
1059 elsif ($gv_strMode eq "Initial")
1060 {
1061 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1062 }
1063 #----##
1064
1065 #--Full Testing
1066 elsif ($gv_strMode eq "Full")
1067 {
1068 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1069 }
1070 #----##
1071
1072 #--Error Checking
1073 else
1074 {
1075 if ( $strOutputFormat eq "xml" ) {
1076 die "<error>Error occoured in main function</error>\n";
1077 } else {
1078 die "Error occoured in main function.\n";
1079 }
1080 }
1081 #----##
1082
1083 }
1084 else
1085 {
1086 if( $strOutputFormat eq "xml" ) {
1087 die "<error>Cannot find collection: $strColName</error>\n";
1088 } else {
1089 die "Error: cannot find collection: $strColName\n";
1090 }
1091 }
1092 #----##
1093
1094 #--Output(Results and Errors)
1095 OutputEnd($strColName,\@ErrorsInEachCol);
1096 #----##
1097
1098 }
1099}
1100#----##
1101
1102&main();
Note: See TracBrowser for help on using the repository browser.