source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl2@ 30652

Last change on this file since 30652 was 30652, checked in by ak19, 8 years ago

Committing outstanding files for diffcol supporting jdb for GS3 diffing. Not yet in use, but I want it on SVN and to not go missing.

  • Property svn:executable set to *
File size: 35.4 KB
RevLine 
[30652]1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67 "etc" => 0,
68 "images" => 0,
69 "building" => 0,
70 "import" => 0,
71 "index" => 0,
72 "log" => 0,
73 "metadata" => 0,
74 "perllib" => 0,
75 "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81 my ($strModeList) = @_;
82 $strModeList =~ s/\|/ /g;
83 my @Modes = split(" ",$strModeList);
84
85 my $blnIndividual = "true";
86 my $blnInitial = "false";
87 my $blnFull = "false";
88
89
90 foreach $strEachMode (@Modes)
91 {
92 if($strEachMode eq "all")
93 {
94 $blnFull = "true";
95 $blnIndividual = "false";
96 }
97 elsif($strEachMode eq "init")
98 {
99 $blnInitial = "true";
100 $blnIndividual = "false";
101 }
102 else
103 {
104 if(defined $gv_IndivList{$strEachMode})
105 {
106 $gv_IndivList{$strEachMode} = 1;
107 }
108 else
109 {
110 die Help("Error: used undefined mode");
111 }
112 }
113 }
114
115 if($blnFull eq "true") {return "Full";}
116 elsif($blnInitial eq "true") {return "Initial";}
117 elsif($blnIndividual eq "true") {return "Individual";}
118 else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125 my ($strModelCol,$strTestCol,$strColName) = @_;
126 my @Errors = ();
127 my $intNumberOfErrors = 0;
128 foreach $strEachFolder (keys %gv_IndivList)
129 {
130 if($gv_IndivList{$strEachFolder} == 1)
131 {
132
133 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136 $intNumberOfErrors = scalar(@Errors);
137 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139 VobPrint("End Comparing \"$strEachFolder\"\n",0);
140 VobPrint("Difference Found: $intNumberOfErrors\n",0);
141 VobPrint ("\n",0);
142 }
143 }
144 return @Errors;
145}
146
147sub InitTest
148{
149
150 my ($strModelCol,$strTestCol,$strColName) = @_;
151 my $intLevel = 1;
152 my @Errors;
153
154 # Testing Log files
155# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#
158# if(-e $strModelLog && -e $strTestLog)
159# {
160# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161# if($strLogError ne "")
162# {
163# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164# VobPrint ("$strLogError\n",$intLevel);
165#
166# $strLogError = "$strLogError";
167# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168# push(@Errors,$strLogError);
169# }
170# else
171# {
172# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173# }
174# }
175# else
176# {
177# my $strErrorColName;
178# my $strLogError;
179#
180# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185# VobPrint ("$strLogError\n",$intLevel);
186# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188# push(@Errors,$strLogError);
189# }
190# VobPrint ("\n",$intLevel);
191
192 # Testing the build.cfg
193 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196 if(-e $strModelBcfg && -e $strTestBcfg)
197 {
198 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199 if($strBcfgError ne "")
200 {
201 if( $strOutputFormat eq "xml" ) {
202 print "<build-cfg succeeded=\"no\">\n<message>";
203 } else {
204 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205 }
206
207 VobPrint ("$strBcfgError",$intLevel);
208 if( $strOutputFormat eq "xml" ) {
209 print "</message></build-cfg>";
210 }
211
212 $strBcfgError = "$strBcfgError";
213 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214 push(@Errors,$strBcfgError);
215 }
216 else
217 {
218 if( $strOutputFormat eq "xml" ) {
219 print "<build-cfg succeeded=\"yes\"/>";
220 } else {
221 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222 }
223 }
224 }
225 else
226 {
227 my $strErrorColName;
228 my $strBcfgError;
229
230 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235 VobPrint ("$strBcfgError\n",$intLevel);
236 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238 push(@Errors,$strBcfgError);
239 }
240 VobPrint ("\n",$intLevel);
241
242 # Testing the collect.cfg
243
244 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246
247 if(-e $strModelCcfg && -e $strTestCcfg)
248 {
249 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250 if($strCcfgError ne "")
251 {
252 if( $strOutputFormat eq "xml" ) {
253 print "<collect-cfg succeeded=\"no\"><message>";
254 } else {
255 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256 }
257
258 VobPrint ("$strCcfgError",$intLevel);
259
260 if( $strOutputFormat eq "xml" ) {
261 print "</message></collect-cfg>";
262 }
263
264 $strCcfgError = "$strCcfgError";
265 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266 push(@Errors,$strCcfgError);
267 }
268 else
269 {
270 if( $strOutputFormat eq "xml" ) {
271 print "<collect-cfg succeeded=\"yes\"/>";
272 } else {
273 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274 }
275 }
276 }
277 else
278 {
279 my $strErrorColName;
280 my $strCcfgError;
281
282 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287 VobPrint ("$strCcfgError\n",$intLevel);
288 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289
290 push(@Errors,$strCcfgError);
291 }
292
293 VobPrint ("\n",$intLevel);
294
295 # Testing databases
296
297 # index
298 my $dbext = ".gdb"; # assume we're working with gdbm
299 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName");
300 if(-f "$strModelGdb$dbext") {
301 $strModelGdb .= $dbext;
302 } else { # gdbm file does not exist, try for jdbm file
303 $gdbext = ".jdb";
304 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName".$dbext);
305 }
306 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName".$dbext);
307 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
308 if($strGdbError) {
309 push(@Errors,$strGdbError);
310 }
311
312 # archives
313 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc".$dbext);
314 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc".$dbext);
315 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316 if($strGdbError) {
317 push(@Errors,$strGdbError);
318 }
319
320 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src".$dbext);
321 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src".$dbext);
322 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
323 if($strGdbError) {
324 push(@Errors,$strGdbError);
325 }
326
327 VobPrint ("\n",$intLevel);
328
329 return @Errors;
330}
331
332
333# At present handles gdbm - need to expand to allow for jdbm and other db types
334sub GdbDiff
335{
336 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
337
338 my $strGdbError = 0;
339
340 if(-e $strModelGdb && -e $strTestGdb)
341 {
342 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
343 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
344 if($strGdbError ne "")
345 {
346 if( $strOutputFormat eq "xml" ) {
347 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
348 } else {
349 AlignPrint("Database Comparsion Result","Failed",$intLevel);
350 }
351 VobPrint ("$strGdbError\n",$intLevel);
352
353 if( $strOutputFormat eq "xml" ) {
354 print "</message></database>";
355 }
356
357 $strGdbError = "$strGdbError";
358 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
359 #push(@Errors,$strGdbError);
360 }
361 else
362 {
363 if( $strOutputFormat eq "xml" ) {
364 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
365 } else {
366 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
367 }
368 }
369 }
370 else
371 {
372 my $strErrorColName;
373
374 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
375 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
376
377 AlignPrint("Database Comparsion Result","Failed",$intLevel);
378
379 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
380 VobPrint ("$strGdbError\n",$intLevel);
381
382 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
383
384 }
385
386 return $strGdbError;
387}
388
389sub FullTest
390{
391 my ($strModelCol,$strTestCol,$strColName) = @_;
392 my @Errors = ();
393 my $intLevel = 0;
394 my $intNumberDiffs = 0;
395
396 # <Initial Test>
397 if( $strOutputFormat eq "xml" ) {
398 #print "<initial-test>";
399 } else {
400 VobPrint("Initial Testing Start\n",$intLevel);
401 }
402
403 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
404 $intNumberDiffs = scalar(@Errors);
405
406 if( $strOutputFormat eq "xml" ) {
407 #print "</initial-test>";
408 } else {
409 VobPrint("Initial Testing End\n",$intLevel);
410 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
411 VobPrint("\n",$intLevel);
412 }
413 # </Initial Test>
414
415 # <Detailed Test>
416 if( $strOutputFormat eq "xml" ) {
417 #print "<detailed-test>";
418 } else {
419 VobPrint("Detail Testing Start\n",$intLevel);
420 }
421 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
422 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
423
424 if( $strOutputFormat eq "xml" ) {
425 #print "</detailed-test>";
426 } else {
427 VobPrint("Detail Testing End\n",$intLevel);
428 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
429 }
430 # </Detailed Test>
431
432 return @Errors;
433}
434#----##
435
436
437#--Other System Utilities
438sub PrintUsage
439{
440 my ($strProgName) = @_;
441 if ( $strOutputFormat eq "xml" ) {
442 print "<error>usage incorrect</error>\n";
443 } else {
444 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
445 }
446 Help("Error: used incorrect parameters");
447}
448
449sub Help
450{
451 my ($strError) = @_;
452 my $aryptHelps =
453 [ { 'name' => "verbosity",
454 'type' => "scale",
455 'argu' => "a integer" ,
456 'descrip' => "this parameter setup the verbosity of the testing result"},
457 { 'name' => "mode",
458 'type' => "option",
459 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
460 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
461 { 'name' => "estop",
462 'type' => "flag",
463 'argu' => "NULL" ,
464 'descrip' => "Set then system will stop once it meet an error"},
465 { 'name' => "eshow",
466 'type' => "flag",
467 'argu' => "NULL" ,
468 'descrip' => "Set then system will show the error summary"}
469 ];
470
471
472 if ( $strOutputFormat ne "xml" ) {
473 print "$strError\n";
474
475 foreach my $hashOneArg (@{$aryptHelps})
476 {
477 print "\n----------------------------\n";
478 print "Parameters: -".$hashOneArg->{"name"}."\n";
479 print "Type: ".$hashOneArg->{"type"}."\n";
480 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
481 print "Description: ".$hashOneArg->{"descrip"}."\n";
482 print "----------------------------\n";
483 }
484 }
485}
486
487sub OutputStart
488{
489 my ($strColName) = @_;
490 my $intPadding = 17 - length($strColName);
491
492 if ( $strOutputFormat eq "xml" ) {
493 print "<diffcol>\n";
494 } else {
495 print "+---------------------------------------------------------+\n";
496 print "| |\n";
497 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
498 print "| |\n";
499 print "+---------------------------------------------------------+\n\n";
500 }
501}
502
503sub OutputEnd
504{
505 my ($strColName,$aryptErrors) = @_;
506 my $intPadding = 12 - length($strColName);
507 if ( $strOutputFormat eq "xml" ) {
508 print "</diffcol>\n";
509 } else {
510 print "\n";
511 print "+---------------------------------------------------------+\n";
512 print "| |\n";
513 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
514 print "| |\n";
515 print "+---------------------------------------------------------+\n\n";
516 }
517
518 my $intTotalErrors = scalar(@{$aryptErrors});
519 if ( $strOutputFormat ne "xml" ) {
520 print "Checking completed, there is $intTotalErrors error(s) found.\n";
521 }
522
523 if($gv_blnErrorShow ne "off")
524 {
525 foreach my $strEachError (@{$aryptErrors})
526 {
527 if ( $strOutputFormat eq "xml" ) {
528 print "<error>";
529 print $strEachError;
530 print "</error>\n";
531 } else {
532 print "+---------------------------------------------------------+\n";
533 print "| Error |\n";
534 print "+---------------------------------------------------------+\n\n";
535 print "$strEachError\n\n";
536 }
537 }
538 }
539 else
540 {
541 if ( $strOutputFormat ne "xml" ) {
542 print "Use -eshow to show the error detail\n\n";
543 }
544 }
545}
546
547sub AlignPrint
548{
549 my ($strMainString,$strStatus,$intLevel) = @_;
550 my $intDot = 100 - length($strMainString) - length($strStatus);
551 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
552}
553
554
555# this function is only called on DocXMLFiles.
556# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
557# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
558sub isDocOrMETSXMLFileWindows
559{
560 my ($file_contents) = @_;
561
562 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
563
564 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
565 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
566
567 # for doc.xml:
568 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
569 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
570 $gsdlsourcefilename = $2;
571 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
572 return 1;
573 }
574 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
575 return 1;
576 }
577
578 return 0;
579}
580
581sub TestEach
582{
583 my ($strModel,$strTest,$intLevel,$strColName) = @_;
584 my @Errors = ();
585
586 $intLevel++;
587 if (-d $strModel && -d $strTest)
588 {
589 my @aryInModel = &diffutil::files_in_dir($strModel);
590 my @aryInTest = &diffutil::files_in_dir($strTest);
591
592 # Files to be skipped because they get generated on one OS but not the other
593 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
594 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
595 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
596 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
597
598 # Now check all remaining files in the folder exist in both model and test collections
599 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
600 my @aryCorrectFiles = @{$aryTwoPointers[1]};
601 @Errors = @{$aryTwoPointers[0]};
602
603 if(scalar(@Errors) == 0)
604 {
605 foreach my $strEachFile (@aryInModel)
606 {
607 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
608 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
609 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
610 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
611 {
612 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
613 }
614 else
615 {
616 if ( $strOutputFormat eq "xml" ) {
617 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
618 } else {
619 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
620 }
621 }
622 }
623 }
624 else
625 {
626 foreach my $strEachFile (@aryCorrectFiles)
627 {
628 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
629 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
630 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
631 {
632 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
633 }
634 }
635 }
636 if($intLevel == $gv_intVerbosity)
637 {
638 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
639 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
640 }
641 }
642 else
643 {
644 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
645 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
646
647 my $strResult;
648
649 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
650 # in case these don't appear in the same order between collections, since
651 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
652 # when they can show up as unnecessary diff 'errors'
653
654 my ($model_contents, $test_contents);
655
656 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
657 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
658
659 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
660 sysread(FIN, $model_contents, -s FIN);
661 close(FIN);
662 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
663 sysread(FIN, $test_contents, -s FIN);
664 close(FIN);
665
666 $model_contents =~ s/$ignore_line_re//g;
667 $test_contents =~ s/$ignore_line_re//g;
668
669
670 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
671 # before comparing a windows test with a linux model or vice-versa
672 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
673 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
674
675 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
676
677 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
678 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
679
680 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
681 $$win_contents =~ s@[\r]@@g;
682
683 # make all single windows slashes into single unix slashes
684 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
685 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
686 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
687
688 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
689 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
690
691 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
692 # $$win_contents =~ s@\r@\n@mg;
693
694 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
695 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
696 # containing solitary carriage returns instead of linefeed
697 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
698
699
700 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
701 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
702 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
703 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
704 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
705
706 # Advanced Beatles collection,
707 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
708 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
709 # Normalising to windows version for doing a diff
710 $$lin_contents =~ s@href=http:///@href=http://@g;
711 }
712 }
713
714 # processing particular to doc.xml
715 if($strModel =~ m/doc\.xml$/) {
716 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
717 # these tmpdirs are located inside the collection directory
718 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
719 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
720
721 # remove all absolute paths upto collect folder from <Metadata /> elements
722 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
723 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
724
725 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
726 # These tmpdirs are located inside the toplevel *greenstone* directory
727 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
728 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
729 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
730
731 if($test_contents =~ m@$tmpfile_regex@) {
732 # found a match, replace the tmp file name with "random", keeping the original file extension
733 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
734
735 my ($old_tmp_filename, $ext) = ($1, $2);
736 my $new_tmp_filename = "random";
737
738 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
739 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
740 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
741 if($5) {
742 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
743 } else { # OrigSource contains only the filename
744 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
745 }
746
747 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
748 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
749 if($5) {
750 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
751 } else { # OrigSource contains only the filename
752 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
753 }
754 }
755
756 } # finished special processing of doc.xml files
757
758 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
759# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
760# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
761# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
762# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
763# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
764# }
765
766
767
768 # now can diff the normalised versions of the doc.xml/docmets.xml files:
769 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
770
771 } else {
772 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
773 }
774
775 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
776 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
777 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
778
779 #$strResult = GeneralOutput($strResult);
780 if ( $strOutputFormat eq "xml" ) {
781 #
782 } else {
783 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
784 }
785 if ($strResult eq "")
786 {
787 if ( $strOutputFormat eq "xml" ) {
788 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
789 } else {
790 AlignPrint("Comparing File","Succeed",$intLevel);
791 }
792 }
793 else
794 {
795# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
796
797 my $strOutput = "Difference Report:\n$strResult\n";
798 if ( $strOutputFormat eq "xml" ) {
799 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
800 } else {
801 AlignPrint("Comparing File","Failed",$intLevel);
802 }
803
804 #$result=`file -b $strModel`; # linux specific test for binary file
805 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
806 if ( "$result" =~ "data" ) {
807 VobPrint( "These binary files differ", $intLevel );
808 } else {
809 VobPrint ( "$strOutput" , $intLevel);
810 }
811
812
813 if ( $strOutputFormat eq "xml" ) {
814 print "</message></file-comparison>";
815 }
816
817 if($gv_blnErrorStop ne "off") { exit; }
818 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
819 }
820 }
821
822 return @Errors;
823}
824
825
826sub FolderTesting
827{
828 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
829 my %hashCount = ();
830 my @Errors = ();
831 my @CorrectFiles = ();
832 my @TwoPointers = (\@Errors,\@CorrectFiles);
833
834 if ( $strOutputFormat eq "xml" ) {
835 #print "<folder-comparison location=\"$strModelFolder\">\n";
836 } else {
837 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
838 }
839
840 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
841 foreach my $strEachItem (@$aryptTest)
842 {
843 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
844 else {$hashCount{$strEachItem} = 'T';}
845 }
846
847 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
848 {
849 if ( $strOutputFormat eq "xml" ) {
850 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
851 } else {
852 AlignPrint("Folder Comparsion","Succeed",$intLevel);
853 }
854 return @TwoPointers;
855 }
856 else
857 {
858 if ( $strOutputFormat eq "xml" ) {
859 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
860 } else {
861 AlignPrint("Folder Comparsion","Failed",$intLevel);
862 }
863
864 foreach my $strEachItem (keys %hashCount)
865 {
866 if($hashCount{$strEachItem} ne 'B')
867 {
868 my $strOutput = "";
869 my $strReport = "";
870
871 if($hashCount{$strEachItem} eq 'M')
872 {
873 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
874 $strReport = "Difference Report: difference found at $strTestFolder";
875 }
876 elsif($hashCount{$strEachItem} eq 'T')
877 {
878 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
879 $strReport = "Difference Report: difference found at $strModelFolder";
880 }
881 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
882
883 VobPrint ("$strOutput\n",$intLevel);
884 $strOutput = $strOutput."\n\t".$strReport."\n";
885 push(@Errors,$strOutput);
886 }
887 else {push(@CorrectFiles,$strEachItem);}
888 }
889 if( $strOutputFormat eq "xml" ) {
890 print "</message></folder-comparison>";
891 }
892
893 return @TwoPointers;
894 }
895}
896
897sub VobPrint
898{
899 my ($strOutput, $intLevel) = @_;
900 my $strTab = "";
901 my $intTab = int($intLevel/2);
902 if($intLevel <= $gv_intVerbosity)
903 {
904 if($intLevel >= 1)
905 {
906 $strTab = "\t"x($intTab+1);
907 $strOutput =~ s/\n$//;
908 $strOutput =~ s/\n/\n$strTab/g;
909 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
910 }
911
912 if( $strOutputFormat eq "xml" ) {
913 $strOutput =~ s/&/&amp;/g;
914 $strOutput =~ s/</&lt;/g;
915 $strOutput =~ s/>/&gt;/g;
916 }
917
918 if ( length( $strOutput ) > 1000 ) {
919 $strOutput = substr( $strOutput, 0, 978);
920
921 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
922 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
923 if(defined $1 && $1) {
924 my $rest = $1;
925 if($rest =~ m/^a/) {
926 $strOutput =~ s@am?p?$@amp;@;
927 }
928 elsif($rest eq "g" || $rest eq "l") {
929 $strOutput .= "t;"; # close the known tag
930 }
931 elsif($rest eq "gt" || $rest eq "lt") {
932 $strOutput .= ";";
933 }
934 } else { # & on its own
935 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
936 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
937 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
938 }
939 }
940 $strOutput .= "... (output truncated)";
941 }
942
943
944 print $strTab.$strOutput."\n";
945 }
946}
947#----##
948
949
950#--Main System----------------------------
951#-----------------------------------------
952# Name: main
953# Perameters: arguments from command line
954# Pre-condition: testing will start by calling this main function.
955# Post-condition: output the test results for one or more collections.
956#-----------------------------------------
957sub main
958{
959 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
960 my $strProgName = $0;
961 my $intArgc = scalar(@ARGV);
962
963 #--System Arguments Setup
964 if (!parsargv::parse(\@ARGV,
965 'estop//off', \$strErrorStop,
966 'eshow//off', \$strErrorShow,
967 'verbosity/\d+/1', \$intVerbosity,
968 'mode/[\w\-]+/all', \$strMode,
969 'output/[\w\-]+/text', \$strOutputFormat,
970 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
971 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
972 )) {
973 PrintUsage($strProgName);
974 die "\n";
975 }
976
977 if ($intArgc<1) {
978 PrintUsage($strProgName);
979 die "\n";
980 }
981
982 $gv_test_os = $test_os; # if not specified, defaults to "compute"
983 $gv_model_os = $model_os; # tends to be linux
984
985 $gv_blnErrorStop = $strErrorStop;
986 $gv_blnErrorShow = $strErrorShow;
987 $gv_intVerbosity = $intVerbosity;
988 $gv_strMode = SetMode($strMode);
989
990 #----##
991
992 #--Collection(s) Testing
993 foreach $strColName (@ARGV)
994 {
995 my @ErrorsInEachCol;
996 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
997 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
998
999 #--Output(Start)
1000 OutputStart($strColName);
1001 #----##
1002
1003 if(-e $strModelCol && -e $strTestCol )
1004 {
1005
1006 #--Individual Testing
1007 if ($gv_strMode eq "Individual")
1008 {
1009 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1010 }
1011 #----##
1012
1013 #--Initial Testing
1014 elsif ($gv_strMode eq "Initial")
1015 {
1016 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1017 }
1018 #----##
1019
1020 #--Full Testing
1021 elsif ($gv_strMode eq "Full")
1022 {
1023 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1024 }
1025 #----##
1026
1027 #--Error Checking
1028 else
1029 {
1030 if ( $strOutputFormat eq "xml" ) {
1031 die "<error>Error occoured in main function</error>\n";
1032 } else {
1033 die "Error occoured in main function.\n";
1034 }
1035 }
1036 #----##
1037
1038 }
1039 else
1040 {
1041 if( $strOutputFormat eq "xml" ) {
1042 die "<error>Cannot find collection: $strColName</error>\n";
1043 } else {
1044 die "Error: cannot find collection: $strColName\n";
1045 }
1046 }
1047 #----##
1048
1049 #--Output(Results and Errors)
1050 OutputEnd($strColName,\@ErrorsInEachCol);
1051 #----##
1052
1053 }
1054}
1055#----##
1056
1057&main();
Note: See TracBrowser for help on using the repository browser.