source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 37446

Last change on this file since 37446 was 37446, checked in by anupama, 15 months ago

These changes seem to have fixed the broken parts of diffcol for GS2 on Linux when locally running diff again on checked out GS2 and all pre-built GS2 test collections.

File size: 37.6 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use flatdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnDebugging = 0;
61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
126 my ($strModelCol,$strTestCol,$strColName) = @_;
127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
137 $intNumberOfErrors = scalar(@Errors);
138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193
194 if($ENV{'GSVERSION'} ne "3") {
195 # Testing the build.cfg
196 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
197 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
198
199 if(-e $strModelBcfg && -e $strTestBcfg)
200 {
201 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
202 if($strBcfgError ne "")
203 {
204 if( $strOutputFormat eq "xml" ) {
205 print "<build-cfg succeeded=\"no\">\n<message>";
206 } else {
207 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
208 }
209
210 VobPrint ("$strBcfgError",$intLevel);
211 if( $strOutputFormat eq "xml" ) {
212 print "</message></build-cfg>";
213 }
214
215 $strBcfgError = "$strBcfgError";
216 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
217 push(@Errors,$strBcfgError);
218 }
219 else
220 {
221 if( $strOutputFormat eq "xml" ) {
222 print "<build-cfg succeeded=\"yes\"/>";
223 } else {
224 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
225 }
226 }
227 }
228 else
229 {
230 my $strErrorColName;
231 my $strBcfgError;
232
233 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
234 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
235
236 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
237 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
238 VobPrint ("$strBcfgError\n",$intLevel);
239 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
240
241 push(@Errors,$strBcfgError);
242 }
243 VobPrint ("\n",$intLevel);
244
245 # Testing the collect.cfg
246
247 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
248 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
249
250 if(-e $strModelCcfg && -e $strTestCcfg)
251 {
252 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
253 if($strCcfgError ne "")
254 {
255 if( $strOutputFormat eq "xml" ) {
256 print "<collect-cfg succeeded=\"no\"><message>";
257 } else {
258 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
259 }
260
261 VobPrint ("$strCcfgError",$intLevel);
262
263 if( $strOutputFormat eq "xml" ) {
264 print "</message></collect-cfg>";
265 }
266
267 $strCcfgError = "$strCcfgError";
268 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
269 push(@Errors,$strCcfgError);
270 }
271 else
272 {
273 if( $strOutputFormat eq "xml" ) {
274 print "<collect-cfg succeeded=\"yes\"/>";
275 } else {
276 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
277 }
278 }
279 }
280 else
281 {
282 my $strErrorColName;
283 my $strCcfgError;
284
285 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
286 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
287
288 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
289 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
290 VobPrint ("$strCcfgError\n",$intLevel);
291 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
292
293 push(@Errors,$strCcfgError);
294 }
295
296 VobPrint ("\n",$intLevel);
297 } # End of code that only really pertains to GS2, as GS3's buildConfig.xml gets diffcolled with XML files later
298
299
300 # Testing databases
301
302 # index
303 my $flat_db_ext = ".gdb";
304
305 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
306 if(!(-e $strModelGdb)){ # try jdb
307 $flat_db_ext = ".jdb";
308 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
309 }
310
311#print STDERR "*********** USING DB EXTENSION: $flat_db_ext\n";
312#print STDERR "@@@ PERL_PERTURB_KEYS: ".$ENV{'PERL_PERTURB_KEYS'}."\n";
313#print STDERR "@@@ PERL_HASH_SEED: ".$ENV{'PERL_HASH_SEED'}."\n";
314
315 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName$flat_db_ext");
316 my $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
317 if($strGdbError) {
318 push(@Errors,$strGdbError);
319 }
320
321 # archives
322 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc$flat_db_ext");
323 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc$flat_db_ext");
324 $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
325 if($strGdbError) {
326 push(@Errors,$strGdbError);
327 }
328
329 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src$flat_db_ext");
330 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src$flat_db_ext");
331 $strGdbError = &FlatdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
332 if($strGdbError) {
333 push(@Errors,$strGdbError);
334 }
335
336 VobPrint ("\n",$intLevel);
337
338 return @Errors;
339}
340
341
342# At present handles gdbm and jdbm - may need to expand to allow for other db types
343sub FlatdbDiff
344{
345 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
346
347 my $strGdbError = 0;
348
349 if(-e $strModelGdb && -e $strTestGdb)
350 {
351 #my $strGdbError = flatdbdiff::test_flat_db($strModelGdb, $strTestGdb);
352
353 $strGdbError = flatdbdiff::test_flat_db($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
354
355 if($strGdbError ne "")
356 {
357 if( $strOutputFormat eq "xml" ) {
358 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
359 } else {
360 AlignPrint("Database Comparsion Result","Failed",$intLevel);
361 }
362 VobPrint ("$strGdbError\n",$intLevel);
363
364 if( $strOutputFormat eq "xml" ) {
365 print "</message></database>";
366 }
367
368 $strGdbError = "$strGdbError";
369 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
370 #push(@Errors,$strGdbError);
371 }
372 else
373 {
374 if( $strOutputFormat eq "xml" ) {
375 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
376 } else {
377 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
378 }
379 }
380 }
381 else
382 {
383 my $strErrorColName;
384
385 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
386 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
387
388 AlignPrint("Database Comparison Result","Failed",$intLevel);
389
390 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
391 VobPrint ("$strGdbError\n",$intLevel);
392
393 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
394
395 }
396
397 return $strGdbError;
398}
399
400sub FullTest
401{
402 my ($strModelCol,$strTestCol,$strColName) = @_;
403 my @Errors = ();
404 my $intLevel = 0;
405 my $intNumberDiffs = 0;
406
407 # <Initial Test>
408 if( $strOutputFormat eq "xml" ) {
409 #print "<initial-test>";
410 } else {
411 VobPrint("Initial Testing Start\n",$intLevel);
412 }
413
414 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
415 $intNumberDiffs = scalar(@Errors);
416
417 if( $strOutputFormat eq "xml" ) {
418 #print "</initial-test>";
419 } else {
420 VobPrint("Initial Testing End\n",$intLevel);
421 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
422 VobPrint("\n",$intLevel);
423 }
424 # </Initial Test>
425
426 # <Detailed Test>
427 if( $strOutputFormat eq "xml" ) {
428 #print "<detailed-test>";
429 } else {
430 VobPrint("Detail Testing Start\n",$intLevel);
431 }
432 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
433 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
434
435 if( $strOutputFormat eq "xml" ) {
436 #print "</detailed-test>";
437 } else {
438 VobPrint("Detail Testing End\n",$intLevel);
439 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
440 }
441 # </Detailed Test>
442
443 return @Errors;
444}
445#----##
446
447
448#--Other System Utilities
449sub PrintUsage
450{
451 my ($strProgName) = @_;
452 if ( $strOutputFormat eq "xml" ) {
453 print "<error>usage incorrect</error>\n";
454 } else {
455 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
456 }
457 Help("Error: used incorrect parameters");
458}
459
460sub Help
461{
462 my ($strError) = @_;
463 my $aryptHelps =
464 [ { 'name' => "verbosity",
465 'type' => "scale",
466 'argu' => "a integer" ,
467 'descrip' => "this parameter setup the verbosity of the testing result"},
468 { 'name' => "mode",
469 'type' => "option",
470 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
471 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
472 { 'name' => "estop",
473 'type' => "flag",
474 'argu' => "NULL" ,
475 'descrip' => "Set then system will stop once it meets an error"},
476 { 'name' => "eshow",
477 'type' => "flag",
478 'argu' => "NULL" ,
479 'descrip' => "Set then system will show the error summary"},
480 { 'name' => "debug",
481 'type' => "flag",
482 'argu' => "NULL" ,
483 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
484 ];
485
486
487 if ( $strOutputFormat ne "xml" ) {
488 print "$strError\n";
489
490 foreach my $hashOneArg (@{$aryptHelps})
491 {
492 print "\n----------------------------\n";
493 print "Parameters: -".$hashOneArg->{"name"}."\n";
494 print "Type: ".$hashOneArg->{"type"}."\n";
495 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
496 print "Description: ".$hashOneArg->{"descrip"}."\n";
497 print "----------------------------\n";
498 }
499 }
500}
501
502sub OutputStart
503{
504 my ($strColName) = @_;
505 my $intPadding = 17 - length($strColName);
506
507 if ( $strOutputFormat eq "xml" ) {
508 print "<diffcol>\n";
509 } else {
510 print "+---------------------------------------------------------+\n";
511 print "| |\n";
512 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
513 print "| |\n";
514 print "+---------------------------------------------------------+\n\n";
515 }
516}
517
518sub OutputEnd
519{
520 my ($strColName,$aryptErrors) = @_;
521 my $intPadding = 12 - length($strColName);
522 if ( $strOutputFormat eq "xml" ) {
523 print "</diffcol>\n";
524 } else {
525 print "\n";
526 print "+---------------------------------------------------------+\n";
527 print "| |\n";
528 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
529 print "| |\n";
530 print "+---------------------------------------------------------+\n\n";
531 }
532
533 my $intTotalErrors = scalar(@{$aryptErrors});
534 if ( $strOutputFormat ne "xml" ) {
535 print "Checking completed, there is $intTotalErrors error(s) found.\n";
536 }
537
538 if($gv_blnErrorShow ne "off")
539 {
540 foreach my $strEachError (@{$aryptErrors})
541 {
542 if ( $strOutputFormat eq "xml" ) {
543 print "<error>";
544 print $strEachError;
545 print "</error>\n";
546 } else {
547 print "+---------------------------------------------------------+\n";
548 print "| Error |\n";
549 print "+---------------------------------------------------------+\n\n";
550 print "$strEachError\n\n";
551 }
552 }
553 }
554 else
555 {
556 if ( $strOutputFormat ne "xml" ) {
557 print "Use -eshow to show the error detail\n\n";
558 }
559 }
560}
561
562sub AlignPrint
563{
564 my ($strMainString,$strStatus,$intLevel) = @_;
565 my $intDot = 100 - length($strMainString) - length($strStatus);
566 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
567}
568
569
570# this function is only called on DocXMLFiles.
571# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in flatdbdiff.pm)
572# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
573sub isDocOrMETSXMLFileWindows
574{
575 my ($file_contents) = @_;
576
577 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
578
579 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
580 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
581
582 # for doc.xml:
583 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
584 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
585 $gsdlsourcefilename = $2;
586 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
587 return 1;
588 }
589 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
590 return 1;
591 }
592
593 return 0;
594}
595
596sub TestEach
597{
598 my ($strModel,$strTest,$intLevel,$strColName) = @_;
599 my @Errors = ();
600
601 $intLevel++;
602 if (-d $strModel && -d $strTest)
603 {
604 my @aryInModel = &diffutil::files_in_dir($strModel);
605 my @aryInTest = &diffutil::files_in_dir($strTest);
606
607 # Files to be skipped because they get generated on one OS but not the other
608 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
609 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|archiveinf-timestamp.out|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
610 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
611 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
612
613 # Now check all remaining files in the folder exist in both model and test collections
614 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
615 my @aryCorrectFiles = @{$aryTwoPointers[1]};
616 @Errors = @{$aryTwoPointers[0]};
617
618 if(scalar(@Errors) == 0)
619 {
620 foreach my $strEachFile (@aryInModel)
621 {
622 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
623 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
624 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
625 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
626 {
627 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
628 }
629 else
630 {
631 if ( $strOutputFormat eq "xml" ) {
632 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
633 } else {
634 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
635 }
636 }
637 }
638 }
639 else
640 {
641 foreach my $strEachFile (@aryCorrectFiles)
642 {
643 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
644 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
645 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
646 {
647 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
648 }
649 }
650 }
651 if($intLevel == $gv_intVerbosity)
652 {
653 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
654 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
655 }
656 }
657 else
658 {
659 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
660 my $ignore_line_re = "<(.*?:)?(?:M|m)etadata name=\"(buildDate|earliestDatestamp|lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ex.File.FileAccessDate|ex.File.FileInodeChangeDate|ex.File.FileCreateDate|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?(?:M|m)etadata>\\s*\\n*";
661
662 my $strResult;
663
664 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
665 # in case these don't appear in the same order between collections, since
666 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
667 # when they can show up as unnecessary diff 'errors'
668
669 my ($model_contents, $test_contents);
670
671 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
672 if($strModel =~ m/buildConfig\.xml$/ || $strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
673
674 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
675 sysread(FIN, $model_contents, -s FIN);
676 close(FIN);
677 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
678 sysread(FIN, $test_contents, -s FIN);
679 close(FIN);
680
681 $model_contents =~ s/$ignore_line_re//g;
682 $test_contents =~ s/$ignore_line_re//g;
683
684
685 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
686 # before comparing a windows test with a linux model or vice-versa
687 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
688 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
689
690 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
691
692 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
693 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
694
695 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
696 $$win_contents =~ s@[\r]@@g;
697
698 # make all single windows slashes into single unix slashes
699 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
700 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
701 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
702
703 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
704 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
705
706 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
707 # $$win_contents =~ s@\r@\n@mg;
708
709 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
710 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
711 # containing solitary carriage returns instead of linefeed
712 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
713
714
715 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
716 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
717 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
718 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
719 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
720
721 # Advanced Beatles collection,
722 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
723 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
724 # Normalising to windows version for doing a diff
725 $$lin_contents =~ s@href=http:///@href=http://@g;
726 }
727 }
728
729 # remove all absolute paths upto collect folder from <Metadata /> elements
730 if($strModel =~ m/docmets\.xml$/) {
731 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
732 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
733 }
734
735 # processing particular to doc.xml
736 if($strModel =~ m/doc\.xml$/) {
737 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
738 # these tmpdirs are located inside the collection directory
739 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
740 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
741
742 # remove all absolute paths upto collect folder from <Metadata /> elements
743 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
744 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
745
746 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
747 # These tmpdirs are located inside the toplevel *greenstone* directory
748 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
749 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
750 #my $tmpfile_regex = "<Metadata name=\"URL\">https?://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
751 my $tmpfile_regex = "<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
752
753 if($test_contents =~ m@$tmpfile_regex@) {
754 # found a match, replace the tmp file name with "random", keeping the original file extension
755 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
756
757 my ($old_tmp_filename, $ext) = ($3, $4);
758 my $new_tmp_filename = "random";
759 $old_tmp_filename =~ s@.*?([^/]*)$@$1@; # remove any intermediate timestamp folders under tmp dir
760
761 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
762 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
763 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?.*?($old_tmp_filename)($ext</Metadata>)";
764 if($5) { # there is a tmp folder
765 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$7@mg;
766 } else { # OrigSource contains only the filename
767 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$7@mg;
768 }
769
770 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
771 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
772 if($5) {
773 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
774 } else { # OrigSource contains only the filename
775 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
776 }
777 }
778
779 } # finished special processing of doc.xml files
780
781 #$gv_blnDebugging=1;
782 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
783 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
784 &flatdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
785 &flatdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
786# if($strModel =~ m/(HASH019c.dir|HASH2bdf.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
787# &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
788# &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
789# }
790 }
791 #$gv_blnDebugging=0;
792
793 # now can diff the normalised versions of the doc.xml/docmets.xml files:
794 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
795
796 } else {
797 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
798 }
799
800 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
801 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
802 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
803
804 #$strResult = GeneralOutput($strResult);
805 if ( $strOutputFormat eq "xml" ) {
806 #
807 } else {
808 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
809 }
810 if ($strResult eq "")
811 {
812 if ( $strOutputFormat eq "xml" ) {
813 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
814 } else {
815 AlignPrint("Comparing File","Succeed",$intLevel);
816 }
817 }
818 else
819 {
820# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
821
822 my $strOutput = "Difference Report:\n$strResult\n";
823 if ( $strOutputFormat eq "xml" ) {
824 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
825 } else {
826 AlignPrint("Comparing File","Failed",$intLevel);
827 }
828
829 #$result=`file -b $strModel`; # linux specific test for binary file
830 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
831 if ( "$result" =~ "data" ) {
832 VobPrint( "These binary files differ", $intLevel );
833 } else {
834 VobPrint ( "$strOutput" , $intLevel);
835 }
836
837
838 if ( $strOutputFormat eq "xml" ) {
839 print "</message></file-comparison>";
840 }
841
842 if($gv_blnErrorStop ne "off") { exit; }
843 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
844 }
845 }
846
847 return @Errors;
848}
849
850
851sub FolderTesting
852{
853 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
854 my %hashCount = ();
855 my @Errors = ();
856 my @CorrectFiles = ();
857 my @TwoPointers = (\@Errors,\@CorrectFiles);
858
859 if ( $strOutputFormat eq "xml" ) {
860 #print "<folder-comparison location=\"$strModelFolder\">\n";
861 } else {
862 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
863 }
864
865 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
866 foreach my $strEachItem (@$aryptTest)
867 {
868 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
869 else {$hashCount{$strEachItem} = 'T';}
870 }
871
872 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
873 {
874 if ( $strOutputFormat eq "xml" ) {
875 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
876 } else {
877 AlignPrint("Folder Comparsion","Succeed",$intLevel);
878 }
879 return @TwoPointers;
880 }
881 else
882 {
883 if ( $strOutputFormat eq "xml" ) {
884 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
885 } else {
886 AlignPrint("Folder Comparsion","Failed",$intLevel);
887 }
888
889 foreach my $strEachItem (keys %hashCount)
890 {
891 if($hashCount{$strEachItem} ne 'B')
892 {
893 my $strOutput = "";
894 my $strReport = "";
895
896 if($hashCount{$strEachItem} eq 'M')
897 {
898 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
899 $strReport = "Difference Report: difference found at $strTestFolder";
900 }
901 elsif($hashCount{$strEachItem} eq 'T')
902 {
903 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
904 $strReport = "Difference Report: difference found at $strModelFolder";
905 }
906 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
907
908 VobPrint ("$strOutput\n",$intLevel);
909 $strOutput = $strOutput."\n\t".$strReport."\n";
910 push(@Errors,$strOutput);
911 }
912 else {push(@CorrectFiles,$strEachItem);}
913 }
914 if( $strOutputFormat eq "xml" ) {
915 print "</message></folder-comparison>";
916 }
917
918 return @TwoPointers;
919 }
920}
921
922sub VobPrint
923{
924 my ($strOutput, $intLevel) = @_;
925 my $strTab = "";
926 my $intTab = int($intLevel/2);
927 if($intLevel <= $gv_intVerbosity)
928 {
929 if($intLevel >= 1)
930 {
931 $strTab = "\t"x($intTab+1);
932 $strOutput =~ s/\n$//;
933 $strOutput =~ s/\n/\n$strTab/g;
934 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
935 }
936
937 if( $strOutputFormat eq "xml" ) {
938 $strOutput =~ s/&/&amp;/g;
939 $strOutput =~ s/</&amp;lt;/g;
940 $strOutput =~ s/>/&amp;gt;/g;
941 }
942
943 if ( length( $strOutput ) > 1000 ) {
944 $strOutput = substr( $strOutput, 0, 978);
945
946 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
947 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
948 if(defined $1 && $1) {
949 my $rest = $1;
950 if($rest =~ m/^a/) {
951 $strOutput =~ s@am?p?$@amp;@;
952 }
953 elsif($rest eq "g" || $rest eq "l") {
954 $strOutput .= "t;"; # close the known tag
955 }
956 elsif($rest eq "gt" || $rest eq "lt") {
957 $strOutput .= ";";
958 }
959 } else { # & on its own
960 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
961 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
962 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
963 }
964 }
965 $strOutput .= "... (output truncated)";
966 }
967
968
969 print $strTab.$strOutput."\n";
970 }
971}
972#----##
973
974
975#--Main System----------------------------
976#-----------------------------------------
977# Name: main
978# Perameters: arguments from command line
979# Pre-condition: testing will start by calling this main function.
980# Post-condition: output the test results for one or more collections.
981#-----------------------------------------
982sub main
983{
984 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
985 my $strProgName = $0;
986 my $intArgc = scalar(@ARGV);
987
988 #--System Arguments Setup
989 if (!parsargv::parse(\@ARGV,
990 'estop//off', \$strErrorStop,
991 'eshow//off', \$strErrorShow,
992 'debug', \$debugging,
993 'verbosity/\d+/1', \$intVerbosity,
994 'mode/[\w\-]+/all', \$strMode,
995 'output/[\w\-]+/text', \$strOutputFormat,
996 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
997 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
998 )) {
999 PrintUsage($strProgName);
1000 die "\n";
1001 }
1002
1003 if ($intArgc<1) {
1004 PrintUsage($strProgName);
1005 die "\n";
1006 }
1007
1008 $gv_test_os = $test_os; # if not specified, defaults to "compute"
1009 $gv_model_os = $model_os; # tends to be linux
1010
1011#print STDERR "@@@@@ test_os: $test_os\n";
1012#print STDERR "@@@@@ model_os: $model_os\n";
1013
1014 $gv_blnDebugging = $debugging;
1015 $gv_blnErrorStop = $strErrorStop;
1016 $gv_blnErrorShow = $strErrorShow;
1017 $gv_intVerbosity = $intVerbosity;
1018 $gv_strMode = SetMode($strMode);
1019
1020 #----##
1021
1022# To find out what version of perl we're using
1023 if( $strOutputFormat eq "xml" ) {
1024 my $perloutput = `perl -v`;
1025 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1026 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1027 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1028 }
1029
1030 #--Collection(s) Testing
1031 foreach $strColName (@ARGV)
1032 {
1033 my @ErrorsInEachCol;
1034 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1035 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1036
1037 #--Output(Start)
1038 OutputStart($strColName);
1039 #----##
1040
1041 if(-e $strModelCol && -e $strTestCol )
1042 {
1043
1044 #--Individual Testing
1045 if ($gv_strMode eq "Individual")
1046 {
1047 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1048 }
1049 #----##
1050
1051 #--Initial Testing
1052 elsif ($gv_strMode eq "Initial")
1053 {
1054 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1055 }
1056 #----##
1057
1058 #--Full Testing
1059 elsif ($gv_strMode eq "Full")
1060 {
1061 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1062 }
1063 #----##
1064
1065 #--Error Checking
1066 else
1067 {
1068 if ( $strOutputFormat eq "xml" ) {
1069 die "<error>Error occoured in main function</error>\n";
1070 } else {
1071 die "Error occoured in main function.\n";
1072 }
1073 }
1074 #----##
1075
1076 }
1077 else
1078 {
1079 if( $strOutputFormat eq "xml" ) {
1080 die "<error>Cannot find collection: $strColName</error>\n";
1081 } else {
1082 die "Error: cannot find collection: $strColName\n";
1083 }
1084 }
1085 #----##
1086
1087 #--Output(Results and Errors)
1088 OutputEnd($strColName,\@ErrorsInEachCol);
1089 #----##
1090
1091 }
1092}
1093#----##
1094
1095&main();
Note: See TracBrowser for help on using the repository browser.