source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 36868

Last change on this file since 36868 was 36868, checked in by anupama, 18 months ago

Forgot to commit a tidier version

File size: 37.0 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnDebugging = 0;
61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
126 my ($strModelCol,$strTestCol,$strColName) = @_;
127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
137 $intNumberOfErrors = scalar(@Errors);
138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193
194 if($ENV{'GSVERSION'} ne "3") {
195 # Testing the build.cfg
196 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
197 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
198
199 if(-e $strModelBcfg && -e $strTestBcfg)
200 {
201 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
202 if($strBcfgError ne "")
203 {
204 if( $strOutputFormat eq "xml" ) {
205 print "<build-cfg succeeded=\"no\">\n<message>";
206 } else {
207 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
208 }
209
210 VobPrint ("$strBcfgError",$intLevel);
211 if( $strOutputFormat eq "xml" ) {
212 print "</message></build-cfg>";
213 }
214
215 $strBcfgError = "$strBcfgError";
216 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
217 push(@Errors,$strBcfgError);
218 }
219 else
220 {
221 if( $strOutputFormat eq "xml" ) {
222 print "<build-cfg succeeded=\"yes\"/>";
223 } else {
224 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
225 }
226 }
227 }
228 else
229 {
230 my $strErrorColName;
231 my $strBcfgError;
232
233 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
234 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
235
236 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
237 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
238 VobPrint ("$strBcfgError\n",$intLevel);
239 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
240
241 push(@Errors,$strBcfgError);
242 }
243 VobPrint ("\n",$intLevel);
244
245 # Testing the collect.cfg
246
247 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
248 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
249
250 if(-e $strModelCcfg && -e $strTestCcfg)
251 {
252 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
253 if($strCcfgError ne "")
254 {
255 if( $strOutputFormat eq "xml" ) {
256 print "<collect-cfg succeeded=\"no\"><message>";
257 } else {
258 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
259 }
260
261 VobPrint ("$strCcfgError",$intLevel);
262
263 if( $strOutputFormat eq "xml" ) {
264 print "</message></collect-cfg>";
265 }
266
267 $strCcfgError = "$strCcfgError";
268 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
269 push(@Errors,$strCcfgError);
270 }
271 else
272 {
273 if( $strOutputFormat eq "xml" ) {
274 print "<collect-cfg succeeded=\"yes\"/>";
275 } else {
276 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
277 }
278 }
279 }
280 else
281 {
282 my $strErrorColName;
283 my $strCcfgError;
284
285 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
286 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
287
288 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
289 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
290 VobPrint ("$strCcfgError\n",$intLevel);
291 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
292
293 push(@Errors,$strCcfgError);
294 }
295
296 VobPrint ("\n",$intLevel);
297 } # End of code that only really pertains to GS2, as GS3's buildConfig.xml gets diffcolled with XML files later
298
299
300 # Testing databases
301
302 # index
303 my $flat_db_ext = ".gdb";
304
305 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
306 if(!(-e $strModelGdb)){ # try jdb
307 $flat_db_ext = ".jdb";
308 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
309 }
310
311#print STDERR "*********** USING DB EXTENSION: $flat_db_ext\n";
312#print STDERR "@@@ PERL_PERTURB_KEYS: ".$ENV{'PERL_PERTURB_KEYS'}."\n";
313#print STDERR "@@@ PERL_HASH_SEED: ".$ENV{'PERL_HASH_SEED'}."\n";
314
315 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName$flat_db_ext");
316 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
317 if($strGdbError) {
318 push(@Errors,$strGdbError);
319 }
320
321 # archives
322 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc$flat_db_ext");
323 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc$flat_db_ext");
324 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
325 if($strGdbError) {
326 push(@Errors,$strGdbError);
327 }
328
329 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src$flat_db_ext");
330 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src$flat_db_ext");
331 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
332 if($strGdbError) {
333 push(@Errors,$strGdbError);
334 }
335
336 VobPrint ("\n",$intLevel);
337
338 return @Errors;
339}
340
341
342# At present handles gdbm - need to expand to allow for jdbm and other db types
343sub GdbDiff
344{
345 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
346
347 my $strGdbError = 0;
348
349 if(-e $strModelGdb && -e $strTestGdb)
350 {
351 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
352
353 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
354
355 if($strGdbError ne "")
356 {
357 if( $strOutputFormat eq "xml" ) {
358 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
359 } else {
360 AlignPrint("Database Comparsion Result","Failed",$intLevel);
361 }
362 VobPrint ("$strGdbError\n",$intLevel);
363
364 if( $strOutputFormat eq "xml" ) {
365 print "</message></database>";
366 }
367
368 $strGdbError = "$strGdbError";
369 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
370 #push(@Errors,$strGdbError);
371 }
372 else
373 {
374 if( $strOutputFormat eq "xml" ) {
375 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
376 } else {
377 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
378 }
379 }
380 }
381 else
382 {
383 my $strErrorColName;
384
385 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
386 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
387
388 AlignPrint("Database Comparsion Result","Failed",$intLevel);
389
390 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
391 VobPrint ("$strGdbError\n",$intLevel);
392
393 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
394
395 }
396
397 return $strGdbError;
398}
399
400sub FullTest
401{
402 my ($strModelCol,$strTestCol,$strColName) = @_;
403 my @Errors = ();
404 my $intLevel = 0;
405 my $intNumberDiffs = 0;
406
407 # <Initial Test>
408 if( $strOutputFormat eq "xml" ) {
409 #print "<initial-test>";
410 } else {
411 VobPrint("Initial Testing Start\n",$intLevel);
412 }
413
414 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
415 $intNumberDiffs = scalar(@Errors);
416
417 if( $strOutputFormat eq "xml" ) {
418 #print "</initial-test>";
419 } else {
420 VobPrint("Initial Testing End\n",$intLevel);
421 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
422 VobPrint("\n",$intLevel);
423 }
424 # </Initial Test>
425
426 # <Detailed Test>
427 if( $strOutputFormat eq "xml" ) {
428 #print "<detailed-test>";
429 } else {
430 VobPrint("Detail Testing Start\n",$intLevel);
431 }
432 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
433 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
434
435 if( $strOutputFormat eq "xml" ) {
436 #print "</detailed-test>";
437 } else {
438 VobPrint("Detail Testing End\n",$intLevel);
439 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
440 }
441 # </Detailed Test>
442
443 return @Errors;
444}
445#----##
446
447
448#--Other System Utilities
449sub PrintUsage
450{
451 my ($strProgName) = @_;
452 if ( $strOutputFormat eq "xml" ) {
453 print "<error>usage incorrect</error>\n";
454 } else {
455 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
456 }
457 Help("Error: used incorrect parameters");
458}
459
460sub Help
461{
462 my ($strError) = @_;
463 my $aryptHelps =
464 [ { 'name' => "verbosity",
465 'type' => "scale",
466 'argu' => "a integer" ,
467 'descrip' => "this parameter setup the verbosity of the testing result"},
468 { 'name' => "mode",
469 'type' => "option",
470 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
471 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
472 { 'name' => "estop",
473 'type' => "flag",
474 'argu' => "NULL" ,
475 'descrip' => "Set then system will stop once it meets an error"},
476 { 'name' => "eshow",
477 'type' => "flag",
478 'argu' => "NULL" ,
479 'descrip' => "Set then system will show the error summary"},
480 { 'name' => "debug",
481 'type' => "flag",
482 'argu' => "NULL" ,
483 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
484 ];
485
486
487 if ( $strOutputFormat ne "xml" ) {
488 print "$strError\n";
489
490 foreach my $hashOneArg (@{$aryptHelps})
491 {
492 print "\n----------------------------\n";
493 print "Parameters: -".$hashOneArg->{"name"}."\n";
494 print "Type: ".$hashOneArg->{"type"}."\n";
495 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
496 print "Description: ".$hashOneArg->{"descrip"}."\n";
497 print "----------------------------\n";
498 }
499 }
500}
501
502sub OutputStart
503{
504 my ($strColName) = @_;
505 my $intPadding = 17 - length($strColName);
506
507 if ( $strOutputFormat eq "xml" ) {
508 print "<diffcol>\n";
509 } else {
510 print "+---------------------------------------------------------+\n";
511 print "| |\n";
512 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
513 print "| |\n";
514 print "+---------------------------------------------------------+\n\n";
515 }
516}
517
518sub OutputEnd
519{
520 my ($strColName,$aryptErrors) = @_;
521 my $intPadding = 12 - length($strColName);
522 if ( $strOutputFormat eq "xml" ) {
523 print "</diffcol>\n";
524 } else {
525 print "\n";
526 print "+---------------------------------------------------------+\n";
527 print "| |\n";
528 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
529 print "| |\n";
530 print "+---------------------------------------------------------+\n\n";
531 }
532
533 my $intTotalErrors = scalar(@{$aryptErrors});
534 if ( $strOutputFormat ne "xml" ) {
535 print "Checking completed, there is $intTotalErrors error(s) found.\n";
536 }
537
538 if($gv_blnErrorShow ne "off")
539 {
540 foreach my $strEachError (@{$aryptErrors})
541 {
542 if ( $strOutputFormat eq "xml" ) {
543 print "<error>";
544 print $strEachError;
545 print "</error>\n";
546 } else {
547 print "+---------------------------------------------------------+\n";
548 print "| Error |\n";
549 print "+---------------------------------------------------------+\n\n";
550 print "$strEachError\n\n";
551 }
552 }
553 }
554 else
555 {
556 if ( $strOutputFormat ne "xml" ) {
557 print "Use -eshow to show the error detail\n\n";
558 }
559 }
560}
561
562sub AlignPrint
563{
564 my ($strMainString,$strStatus,$intLevel) = @_;
565 my $intDot = 100 - length($strMainString) - length($strStatus);
566 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
567}
568
569
570# this function is only called on DocXMLFiles.
571# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
572# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
573sub isDocOrMETSXMLFileWindows
574{
575 my ($file_contents) = @_;
576
577 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
578
579 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
580 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
581
582 # for doc.xml:
583 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
584 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
585 $gsdlsourcefilename = $2;
586 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
587 return 1;
588 }
589 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
590 return 1;
591 }
592
593 return 0;
594}
595
596sub TestEach
597{
598 my ($strModel,$strTest,$intLevel,$strColName) = @_;
599 my @Errors = ();
600
601 $intLevel++;
602 if (-d $strModel && -d $strTest)
603 {
604 my @aryInModel = &diffutil::files_in_dir($strModel);
605 my @aryInTest = &diffutil::files_in_dir($strTest);
606
607 # Files to be skipped because they get generated on one OS but not the other
608 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
609 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
610 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
611 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
612
613 # Now check all remaining files in the folder exist in both model and test collections
614 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
615 my @aryCorrectFiles = @{$aryTwoPointers[1]};
616 @Errors = @{$aryTwoPointers[0]};
617
618 if(scalar(@Errors) == 0)
619 {
620 foreach my $strEachFile (@aryInModel)
621 {
622 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
623 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
624 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
625 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
626 {
627 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
628 }
629 else
630 {
631 if ( $strOutputFormat eq "xml" ) {
632 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
633 } else {
634 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
635 }
636 }
637 }
638 }
639 else
640 {
641 foreach my $strEachFile (@aryCorrectFiles)
642 {
643 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
644 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
645 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
646 {
647 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
648 }
649 }
650 }
651 if($intLevel == $gv_intVerbosity)
652 {
653 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
654 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
655 }
656 }
657 else
658 {
659 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
660 my $ignore_line_re = "<(.*?:)?(?:M|m)etadata name=\"(buildDate|earliestDatestamp|lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ex.File.FileAccessDate|ex.File.FileInodeChangeDate|ex.File.FileCreateDate|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?(?:M|m)etadata>\\s*\\n*";
661
662 my $strResult;
663
664 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
665 # in case these don't appear in the same order between collections, since
666 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
667 # when they can show up as unnecessary diff 'errors'
668
669 my ($model_contents, $test_contents);
670
671 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
672 if($strModel =~ m/buildConfig\.xml$/ || $strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
673
674 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
675 sysread(FIN, $model_contents, -s FIN);
676 close(FIN);
677 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
678 sysread(FIN, $test_contents, -s FIN);
679 close(FIN);
680
681 $model_contents =~ s/$ignore_line_re//g;
682 $test_contents =~ s/$ignore_line_re//g;
683
684
685 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
686 # before comparing a windows test with a linux model or vice-versa
687 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
688 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
689
690 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
691
692 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
693 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
694
695 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
696 $$win_contents =~ s@[\r]@@g;
697
698 # make all single windows slashes into single unix slashes
699 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
700 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
701 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
702
703 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
704 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
705
706 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
707 # $$win_contents =~ s@\r@\n@mg;
708
709 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
710 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
711 # containing solitary carriage returns instead of linefeed
712 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
713
714
715 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
716 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
717 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
718 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
719 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
720
721 # Advanced Beatles collection,
722 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
723 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
724 # Normalising to windows version for doing a diff
725 $$lin_contents =~ s@href=http:///@href=http://@g;
726 }
727 }
728
729 # remove all absolute paths upto collect folder from <Metadata /> elements
730 if($strModel =~ m/docmets\.xml$/) {
731 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
732 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
733 }
734
735 # processing particular to doc.xml
736 if($strModel =~ m/doc\.xml$/) {
737 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
738 # these tmpdirs are located inside the collection directory
739 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
740 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
741
742 # remove all absolute paths upto collect folder from <Metadata /> elements
743 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
744 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
745
746 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
747 # These tmpdirs are located inside the toplevel *greenstone* directory
748 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
749 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
750 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
751
752 if($test_contents =~ m@$tmpfile_regex@) {
753 # found a match, replace the tmp file name with "random", keeping the original file extension
754 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
755
756 my ($old_tmp_filename, $ext) = ($1, $2);
757 my $new_tmp_filename = "random";
758
759 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
760 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
761 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
762 if($5) {
763 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
764 } else { # OrigSource contains only the filename
765 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
766 }
767
768 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
769 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
770 if($5) {
771 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
772 } else { # OrigSource contains only the filename
773 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
774 }
775 }
776
777 } # finished special processing of doc.xml files
778
779
780 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
781 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
782 &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
783 &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
784# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
785# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
786# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
787# }
788 }
789
790
791 # now can diff the normalised versions of the doc.xml/docmets.xml files:
792 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
793
794 } else {
795 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
796 }
797
798 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
799 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
800 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
801
802 #$strResult = GeneralOutput($strResult);
803 if ( $strOutputFormat eq "xml" ) {
804 #
805 } else {
806 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
807 }
808 if ($strResult eq "")
809 {
810 if ( $strOutputFormat eq "xml" ) {
811 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
812 } else {
813 AlignPrint("Comparing File","Succeed",$intLevel);
814 }
815 }
816 else
817 {
818# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
819
820 my $strOutput = "Difference Report:\n$strResult\n";
821 if ( $strOutputFormat eq "xml" ) {
822 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
823 } else {
824 AlignPrint("Comparing File","Failed",$intLevel);
825 }
826
827 #$result=`file -b $strModel`; # linux specific test for binary file
828 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
829 if ( "$result" =~ "data" ) {
830 VobPrint( "These binary files differ", $intLevel );
831 } else {
832 VobPrint ( "$strOutput" , $intLevel);
833 }
834
835
836 if ( $strOutputFormat eq "xml" ) {
837 print "</message></file-comparison>";
838 }
839
840 if($gv_blnErrorStop ne "off") { exit; }
841 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
842 }
843 }
844
845 return @Errors;
846}
847
848
849sub FolderTesting
850{
851 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
852 my %hashCount = ();
853 my @Errors = ();
854 my @CorrectFiles = ();
855 my @TwoPointers = (\@Errors,\@CorrectFiles);
856
857 if ( $strOutputFormat eq "xml" ) {
858 #print "<folder-comparison location=\"$strModelFolder\">\n";
859 } else {
860 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
861 }
862
863 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
864 foreach my $strEachItem (@$aryptTest)
865 {
866 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
867 else {$hashCount{$strEachItem} = 'T';}
868 }
869
870 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
871 {
872 if ( $strOutputFormat eq "xml" ) {
873 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
874 } else {
875 AlignPrint("Folder Comparsion","Succeed",$intLevel);
876 }
877 return @TwoPointers;
878 }
879 else
880 {
881 if ( $strOutputFormat eq "xml" ) {
882 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
883 } else {
884 AlignPrint("Folder Comparsion","Failed",$intLevel);
885 }
886
887 foreach my $strEachItem (keys %hashCount)
888 {
889 if($hashCount{$strEachItem} ne 'B')
890 {
891 my $strOutput = "";
892 my $strReport = "";
893
894 if($hashCount{$strEachItem} eq 'M')
895 {
896 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
897 $strReport = "Difference Report: difference found at $strTestFolder";
898 }
899 elsif($hashCount{$strEachItem} eq 'T')
900 {
901 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
902 $strReport = "Difference Report: difference found at $strModelFolder";
903 }
904 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
905
906 VobPrint ("$strOutput\n",$intLevel);
907 $strOutput = $strOutput."\n\t".$strReport."\n";
908 push(@Errors,$strOutput);
909 }
910 else {push(@CorrectFiles,$strEachItem);}
911 }
912 if( $strOutputFormat eq "xml" ) {
913 print "</message></folder-comparison>";
914 }
915
916 return @TwoPointers;
917 }
918}
919
920sub VobPrint
921{
922 my ($strOutput, $intLevel) = @_;
923 my $strTab = "";
924 my $intTab = int($intLevel/2);
925 if($intLevel <= $gv_intVerbosity)
926 {
927 if($intLevel >= 1)
928 {
929 $strTab = "\t"x($intTab+1);
930 $strOutput =~ s/\n$//;
931 $strOutput =~ s/\n/\n$strTab/g;
932 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
933 }
934
935 if( $strOutputFormat eq "xml" ) {
936 $strOutput =~ s/&/&amp;/g;
937 $strOutput =~ s/</&amp;lt;/g;
938 $strOutput =~ s/>/&amp;gt;/g;
939 }
940
941 if ( length( $strOutput ) > 1000 ) {
942 $strOutput = substr( $strOutput, 0, 978);
943
944 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
945 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
946 if(defined $1 && $1) {
947 my $rest = $1;
948 if($rest =~ m/^a/) {
949 $strOutput =~ s@am?p?$@amp;@;
950 }
951 elsif($rest eq "g" || $rest eq "l") {
952 $strOutput .= "t;"; # close the known tag
953 }
954 elsif($rest eq "gt" || $rest eq "lt") {
955 $strOutput .= ";";
956 }
957 } else { # & on its own
958 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
959 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
960 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
961 }
962 }
963 $strOutput .= "... (output truncated)";
964 }
965
966
967 print $strTab.$strOutput."\n";
968 }
969}
970#----##
971
972
973#--Main System----------------------------
974#-----------------------------------------
975# Name: main
976# Perameters: arguments from command line
977# Pre-condition: testing will start by calling this main function.
978# Post-condition: output the test results for one or more collections.
979#-----------------------------------------
980sub main
981{
982 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
983 my $strProgName = $0;
984 my $intArgc = scalar(@ARGV);
985
986 #--System Arguments Setup
987 if (!parsargv::parse(\@ARGV,
988 'estop//off', \$strErrorStop,
989 'eshow//off', \$strErrorShow,
990 'debug', \$debugging,
991 'verbosity/\d+/1', \$intVerbosity,
992 'mode/[\w\-]+/all', \$strMode,
993 'output/[\w\-]+/text', \$strOutputFormat,
994 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
995 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
996 )) {
997 PrintUsage($strProgName);
998 die "\n";
999 }
1000
1001 if ($intArgc<1) {
1002 PrintUsage($strProgName);
1003 die "\n";
1004 }
1005
1006 $gv_test_os = $test_os; # if not specified, defaults to "compute"
1007 $gv_model_os = $model_os; # tends to be linux
1008
1009#print STDERR "@@@@@ test_os: $test_os\n";
1010#print STDERR "@@@@@ model_os: $model_os\n";
1011
1012 $gv_blnDebugging = $debugging;
1013 $gv_blnErrorStop = $strErrorStop;
1014 $gv_blnErrorShow = $strErrorShow;
1015 $gv_intVerbosity = $intVerbosity;
1016 $gv_strMode = SetMode($strMode);
1017
1018 #----##
1019
1020# To find out what version of perl we're using
1021 if( $strOutputFormat eq "xml" ) {
1022 my $perloutput = `perl -v`;
1023 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1024 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1025 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1026 }
1027
1028 #--Collection(s) Testing
1029 foreach $strColName (@ARGV)
1030 {
1031 my @ErrorsInEachCol;
1032 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1033 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1034
1035 #--Output(Start)
1036 OutputStart($strColName);
1037 #----##
1038
1039 if(-e $strModelCol && -e $strTestCol )
1040 {
1041
1042 #--Individual Testing
1043 if ($gv_strMode eq "Individual")
1044 {
1045 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1046 }
1047 #----##
1048
1049 #--Initial Testing
1050 elsif ($gv_strMode eq "Initial")
1051 {
1052 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1053 }
1054 #----##
1055
1056 #--Full Testing
1057 elsif ($gv_strMode eq "Full")
1058 {
1059 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1060 }
1061 #----##
1062
1063 #--Error Checking
1064 else
1065 {
1066 if ( $strOutputFormat eq "xml" ) {
1067 die "<error>Error occoured in main function</error>\n";
1068 } else {
1069 die "Error occoured in main function.\n";
1070 }
1071 }
1072 #----##
1073
1074 }
1075 else
1076 {
1077 if( $strOutputFormat eq "xml" ) {
1078 die "<error>Cannot find collection: $strColName</error>\n";
1079 } else {
1080 die "Error: cannot find collection: $strColName\n";
1081 }
1082 }
1083 #----##
1084
1085 #--Output(Results and Errors)
1086 OutputEnd($strColName,\@ErrorsInEachCol);
1087 #----##
1088
1089 }
1090}
1091#----##
1092
1093&main();
Note: See TracBrowser for help on using the repository browser.