source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28661

Last change on this file since 28661 was 28661, checked in by ak19, 7 years ago

Committing the next installment of code to handle diffcol for GS3. Now it successfully compiles up GS3, while diffcol still works for GS2.

File size: 35.1 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67 "etc" => 0,
68 "images" => 0,
69 "building" => 0,
70 "import" => 0,
71 "index" => 0,
72 "log" => 0,
73 "metadata" => 0,
74 "perllib" => 0,
75 "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81 my ($strModeList) = @_;
82 $strModeList =~ s/\|/ /g;
83 my @Modes = split(" ",$strModeList);
84
85 my $blnIndividual = "true";
86 my $blnInitial = "false";
87 my $blnFull = "false";
88
89
90 foreach $strEachMode (@Modes)
91 {
92 if($strEachMode eq "all")
93 {
94 $blnFull = "true";
95 $blnIndividual = "false";
96 }
97 elsif($strEachMode eq "init")
98 {
99 $blnInitial = "true";
100 $blnIndividual = "false";
101 }
102 else
103 {
104 if(defined $gv_IndivList{$strEachMode})
105 {
106 $gv_IndivList{$strEachMode} = 1;
107 }
108 else
109 {
110 die Help("Error: used undefined mode");
111 }
112 }
113 }
114
115 if($blnFull eq "true") {return "Full";}
116 elsif($blnInitial eq "true") {return "Initial";}
117 elsif($blnIndividual eq "true") {return "Individual";}
118 else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125 my ($strModelCol,$strTestCol,$strColName) = @_;
126 my @Errors = ();
127 my $intNumberOfErrors = 0;
128 foreach $strEachFolder (keys %gv_IndivList)
129 {
130 if($gv_IndivList{$strEachFolder} == 1)
131 {
132
133 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136 $intNumberOfErrors = scalar(@Errors);
137 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139 VobPrint("End Comparing \"$strEachFolder\"\n",0);
140 VobPrint("Difference Found: $intNumberOfErrors\n",0);
141 VobPrint ("\n",0);
142 }
143 }
144 return @Errors;
145}
146
147sub InitTest
148{
149
150 my ($strModelCol,$strTestCol,$strColName) = @_;
151 my $intLevel = 1;
152 my @Errors;
153
154 # Testing Log files
155# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#
158# if(-e $strModelLog && -e $strTestLog)
159# {
160# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161# if($strLogError ne "")
162# {
163# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164# VobPrint ("$strLogError\n",$intLevel);
165#
166# $strLogError = "$strLogError";
167# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168# push(@Errors,$strLogError);
169# }
170# else
171# {
172# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173# }
174# }
175# else
176# {
177# my $strErrorColName;
178# my $strLogError;
179#
180# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185# VobPrint ("$strLogError\n",$intLevel);
186# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188# push(@Errors,$strLogError);
189# }
190# VobPrint ("\n",$intLevel);
191
192 # Testing the build.cfg
193 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196 if(-e $strModelBcfg && -e $strTestBcfg)
197 {
198 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199 if($strBcfgError ne "")
200 {
201 if( $strOutputFormat eq "xml" ) {
202 print "<build-cfg succeeded=\"no\">\n<message>";
203 } else {
204 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205 }
206
207 VobPrint ("$strBcfgError",$intLevel);
208 if( $strOutputFormat eq "xml" ) {
209 print "</message></build-cfg>";
210 }
211
212 $strBcfgError = "$strBcfgError";
213 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214 push(@Errors,$strBcfgError);
215 }
216 else
217 {
218 if( $strOutputFormat eq "xml" ) {
219 print "<build-cfg succeeded=\"yes\"/>";
220 } else {
221 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222 }
223 }
224 }
225 else
226 {
227 my $strErrorColName;
228 my $strBcfgError;
229
230 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235 VobPrint ("$strBcfgError\n",$intLevel);
236 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238 push(@Errors,$strBcfgError);
239 }
240 VobPrint ("\n",$intLevel);
241
242 # Testing the collect.cfg
243
244 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246
247 if(-e $strModelCcfg && -e $strTestCcfg)
248 {
249 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250 if($strCcfgError ne "")
251 {
252 if( $strOutputFormat eq "xml" ) {
253 print "<collect-cfg succeeded=\"no\"><message>";
254 } else {
255 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256 }
257
258 VobPrint ("$strCcfgError",$intLevel);
259
260 if( $strOutputFormat eq "xml" ) {
261 print "</message></collect-cfg>";
262 }
263
264 $strCcfgError = "$strCcfgError";
265 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266 push(@Errors,$strCcfgError);
267 }
268 else
269 {
270 if( $strOutputFormat eq "xml" ) {
271 print "<collect-cfg succeeded=\"yes\"/>";
272 } else {
273 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274 }
275 }
276 }
277 else
278 {
279 my $strErrorColName;
280 my $strCcfgError;
281
282 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287 VobPrint ("$strCcfgError\n",$intLevel);
288 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289
290 push(@Errors,$strCcfgError);
291 }
292
293 VobPrint ("\n",$intLevel);
294
295 # Testing databases
296
297 # index
298 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
299 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
300 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
301 if($strGdbError) {
302 push(@Errors,$strGdbError);
303 }
304
305 # archives
306 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
307 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
308 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
309 if($strGdbError) {
310 push(@Errors,$strGdbError);
311 }
312
313 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
314 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
315 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316 if($strGdbError) {
317 push(@Errors,$strGdbError);
318 }
319
320 VobPrint ("\n",$intLevel);
321
322 return @Errors;
323}
324
325
326# At present handles gdbm - need to expand to allow for jdbm and other db types
327sub GdbDiff
328{
329 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
330
331 my $strGdbError = 0;
332
333 if(-e $strModelGdb && -e $strTestGdb)
334 {
335 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
336 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
337 if($strGdbError ne "")
338 {
339 if( $strOutputFormat eq "xml" ) {
340 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
341 } else {
342 AlignPrint("Database Comparsion Result","Failed",$intLevel);
343 }
344 VobPrint ("$strGdbError\n",$intLevel);
345
346 if( $strOutputFormat eq "xml" ) {
347 print "</message></database>";
348 }
349
350 $strGdbError = "$strGdbError";
351 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
352 #push(@Errors,$strGdbError);
353 }
354 else
355 {
356 if( $strOutputFormat eq "xml" ) {
357 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
358 } else {
359 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
360 }
361 }
362 }
363 else
364 {
365 my $strErrorColName;
366
367 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
368 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
369
370 AlignPrint("Database Comparsion Result","Failed",$intLevel);
371
372 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
373 VobPrint ("$strGdbError\n",$intLevel);
374
375 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
376
377 }
378
379 return $strGdbError;
380}
381
382sub FullTest
383{
384 my ($strModelCol,$strTestCol,$strColName) = @_;
385 my @Errors = ();
386 my $intLevel = 0;
387 my $intNumberDiffs = 0;
388
389 # <Initial Test>
390 if( $strOutputFormat eq "xml" ) {
391 #print "<initial-test>";
392 } else {
393 VobPrint("Initial Testing Start\n",$intLevel);
394 }
395
396 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
397 $intNumberDiffs = scalar(@Errors);
398
399 if( $strOutputFormat eq "xml" ) {
400 #print "</initial-test>";
401 } else {
402 VobPrint("Initial Testing End\n",$intLevel);
403 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
404 VobPrint("\n",$intLevel);
405 }
406 # </Initial Test>
407
408 # <Detailed Test>
409 if( $strOutputFormat eq "xml" ) {
410 #print "<detailed-test>";
411 } else {
412 VobPrint("Detail Testing Start\n",$intLevel);
413 }
414 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
415 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
416
417 if( $strOutputFormat eq "xml" ) {
418 #print "</detailed-test>";
419 } else {
420 VobPrint("Detail Testing End\n",$intLevel);
421 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
422 }
423 # </Detailed Test>
424
425 return @Errors;
426}
427#----##
428
429
430#--Other System Utilities
431sub PrintUsage
432{
433 my ($strProgName) = @_;
434 if ( $strOutputFormat eq "xml" ) {
435 print "<error>usage incorrect</error>\n";
436 } else {
437 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
438 }
439 Help("Error: used incorrect parameters");
440}
441
442sub Help
443{
444 my ($strError) = @_;
445 my $aryptHelps =
446 [ { 'name' => "verbosity",
447 'type' => "scale",
448 'argu' => "a integer" ,
449 'descrip' => "this parameter setup the verbosity of the testing result"},
450 { 'name' => "mode",
451 'type' => "option",
452 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
453 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
454 { 'name' => "estop",
455 'type' => "flag",
456 'argu' => "NULL" ,
457 'descrip' => "Set then system will stop once it meet an error"},
458 { 'name' => "eshow",
459 'type' => "flag",
460 'argu' => "NULL" ,
461 'descrip' => "Set then system will show the error summary"}
462 ];
463
464
465 if ( $strOutputFormat ne "xml" ) {
466 print "$strError\n";
467
468 foreach my $hashOneArg (@{$aryptHelps})
469 {
470 print "\n----------------------------\n";
471 print "Parameters: -".$hashOneArg->{"name"}."\n";
472 print "Type: ".$hashOneArg->{"type"}."\n";
473 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
474 print "Description: ".$hashOneArg->{"descrip"}."\n";
475 print "----------------------------\n";
476 }
477 }
478}
479
480sub OutputStart
481{
482 my ($strColName) = @_;
483 my $intPadding = 17 - length($strColName);
484
485 if ( $strOutputFormat eq "xml" ) {
486 print "<diffcol>\n";
487 } else {
488 print "+---------------------------------------------------------+\n";
489 print "| |\n";
490 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
491 print "| |\n";
492 print "+---------------------------------------------------------+\n\n";
493 }
494}
495
496sub OutputEnd
497{
498 my ($strColName,$aryptErrors) = @_;
499 my $intPadding = 12 - length($strColName);
500 if ( $strOutputFormat eq "xml" ) {
501 print "</diffcol>\n";
502 } else {
503 print "\n";
504 print "+---------------------------------------------------------+\n";
505 print "| |\n";
506 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
507 print "| |\n";
508 print "+---------------------------------------------------------+\n\n";
509 }
510
511 my $intTotalErrors = scalar(@{$aryptErrors});
512 if ( $strOutputFormat ne "xml" ) {
513 print "Checking completed, there is $intTotalErrors error(s) found.\n";
514 }
515
516 if($gv_blnErrorShow ne "off")
517 {
518 foreach my $strEachError (@{$aryptErrors})
519 {
520 if ( $strOutputFormat eq "xml" ) {
521 print "<error>";
522 print $strEachError;
523 print "</error>\n";
524 } else {
525 print "+---------------------------------------------------------+\n";
526 print "| Error |\n";
527 print "+---------------------------------------------------------+\n\n";
528 print "$strEachError\n\n";
529 }
530 }
531 }
532 else
533 {
534 if ( $strOutputFormat ne "xml" ) {
535 print "Use -eshow to show the error detail\n\n";
536 }
537 }
538}
539
540sub AlignPrint
541{
542 my ($strMainString,$strStatus,$intLevel) = @_;
543 my $intDot = 100 - length($strMainString) - length($strStatus);
544 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
545}
546
547
548# this function is only called on DocXMLFiles.
549# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
550# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
551sub isDocOrMETSXMLFileWindows
552{
553 my ($file_contents) = @_;
554
555 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
556
557 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
558 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
559
560 # for doc.xml:
561 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
562 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
563 $gsdlsourcefilename = $2;
564 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
565 return 1;
566 }
567 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
568 return 1;
569 }
570
571 return 0;
572}
573
574sub TestEach
575{
576 my ($strModel,$strTest,$intLevel,$strColName) = @_;
577 my @Errors = ();
578
579 $intLevel++;
580 if (-d $strModel && -d $strTest)
581 {
582 my @aryInModel = &diffutil::files_in_dir($strModel);
583 my @aryInTest = &diffutil::files_in_dir($strTest);
584
585 # Files to be skipped because they get generated on one OS but not the other
586 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
587 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
588 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
589 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
590
591 # Now check all remaining files in the folder exist in both model and test collections
592 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
593 my @aryCorrectFiles = @{$aryTwoPointers[1]};
594 @Errors = @{$aryTwoPointers[0]};
595
596 if(scalar(@Errors) == 0)
597 {
598 foreach my $strEachFile (@aryInModel)
599 {
600 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
601 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
602 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
603 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
604 {
605 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
606 }
607 else
608 {
609 if ( $strOutputFormat eq "xml" ) {
610 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
611 } else {
612 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
613 }
614 }
615 }
616 }
617 else
618 {
619 foreach my $strEachFile (@aryCorrectFiles)
620 {
621 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
622 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
623 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
624 {
625 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
626 }
627 }
628 }
629 if($intLevel == $gv_intVerbosity)
630 {
631 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
632 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
633 }
634 }
635 else
636 {
637 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
638 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
639
640 my $strResult;
641
642 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
643 # in case these don't appear in the same order between collections, since
644 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
645 # when they can show up as unnecessary diff 'errors'
646
647 my ($model_contents, $test_contents);
648
649 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
650 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
651
652 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
653 sysread(FIN, $model_contents, -s FIN);
654 close(FIN);
655 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
656 sysread(FIN, $test_contents, -s FIN);
657 close(FIN);
658
659 $model_contents =~ s/$ignore_line_re//g;
660 $test_contents =~ s/$ignore_line_re//g;
661
662
663 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
664 # before comparing a windows test with a linux model or vice-versa
665 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
666 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
667
668 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
669
670 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
671 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
672
673 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
674 $$win_contents =~ s@[\r]@@g;
675
676 # make all single windows slashes into single unix slashes
677 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
678 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
679 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
680
681 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
682 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
683
684 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
685 # $$win_contents =~ s@\r@\n@mg;
686
687 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
688 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
689 # containing solitary carriage returns instead of linefeed
690 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
691
692
693 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
694 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
695 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
696 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
697 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
698
699 # Advanced Beatles collection,
700 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
701 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
702 # Normalising to windows version for doing a diff
703 $$lin_contents =~ s@href=http:///@href=http://@g;
704 }
705 }
706
707 # processing particular to doc.xml
708 if($strModel =~ m/doc\.xml$/) {
709 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
710 # these tmpdirs are located inside the collection directory
711 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
712 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
713
714 # remove all absolute paths upto collect folder from <Metadata /> elements
715 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
716 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
717
718 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
719 # These tmpdirs are located inside the toplevel *greenstone* directory
720 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
721 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
722 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
723
724 if($test_contents =~ m@$tmpfile_regex@) {
725 # found a match, replace the tmp file name with "random", keeping the original file extension
726 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
727
728 my ($old_tmp_filename, $ext) = ($1, $2);
729 my $new_tmp_filename = "random";
730
731 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
732 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
733 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
734 if($5) {
735 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
736 } else { # OrigSource contains only the filename
737 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
738 }
739
740 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
741 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
742 if($5) {
743 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
744 } else { # OrigSource contains only the filename
745 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
746 }
747 }
748
749 } # finished special processing of doc.xml files
750
751 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
752# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
753# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
754# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
755# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
756# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
757# }
758
759
760
761 # now can diff the normalised versions of the doc.xml/docmets.xml files:
762 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
763
764 } else {
765 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
766 }
767
768 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
769 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
770 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
771
772 #$strResult = GeneralOutput($strResult);
773 if ( $strOutputFormat eq "xml" ) {
774 #
775 } else {
776 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
777 }
778 if ($strResult eq "")
779 {
780 if ( $strOutputFormat eq "xml" ) {
781 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
782 } else {
783 AlignPrint("Comparing File","Succeed",$intLevel);
784 }
785 }
786 else
787 {
788# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
789
790 my $strOutput = "Difference Report:\n$strResult\n";
791 if ( $strOutputFormat eq "xml" ) {
792 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
793 } else {
794 AlignPrint("Comparing File","Failed",$intLevel);
795 }
796
797 #$result=`file -b $strModel`; # linux specific test for binary file
798 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
799 if ( "$result" =~ "data" ) {
800 VobPrint( "These binary files differ", $intLevel );
801 } else {
802 VobPrint ( "$strOutput" , $intLevel);
803 }
804
805
806 if ( $strOutputFormat eq "xml" ) {
807 print "</message></file-comparison>";
808 }
809
810 if($gv_blnErrorStop ne "off") { exit; }
811 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
812 }
813 }
814
815 return @Errors;
816}
817
818
819sub FolderTesting
820{
821 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
822 my %hashCount = ();
823 my @Errors = ();
824 my @CorrectFiles = ();
825 my @TwoPointers = (\@Errors,\@CorrectFiles);
826
827 if ( $strOutputFormat eq "xml" ) {
828 #print "<folder-comparison location=\"$strModelFolder\">\n";
829 } else {
830 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
831 }
832
833 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
834 foreach my $strEachItem (@$aryptTest)
835 {
836 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
837 else {$hashCount{$strEachItem} = 'T';}
838 }
839
840 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
841 {
842 if ( $strOutputFormat eq "xml" ) {
843 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
844 } else {
845 AlignPrint("Folder Comparsion","Succeed",$intLevel);
846 }
847 return @TwoPointers;
848 }
849 else
850 {
851 if ( $strOutputFormat eq "xml" ) {
852 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
853 } else {
854 AlignPrint("Folder Comparsion","Failed",$intLevel);
855 }
856
857 foreach my $strEachItem (keys %hashCount)
858 {
859 if($hashCount{$strEachItem} ne 'B')
860 {
861 my $strOutput = "";
862 my $strReport = "";
863
864 if($hashCount{$strEachItem} eq 'M')
865 {
866 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
867 $strReport = "Difference Report: difference found at $strTestFolder";
868 }
869 elsif($hashCount{$strEachItem} eq 'T')
870 {
871 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
872 $strReport = "Difference Report: difference found at $strModelFolder";
873 }
874 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
875
876 VobPrint ("$strOutput\n",$intLevel);
877 $strOutput = $strOutput."\n\t".$strReport."\n";
878 push(@Errors,$strOutput);
879 }
880 else {push(@CorrectFiles,$strEachItem);}
881 }
882 if( $strOutputFormat eq "xml" ) {
883 print "</message></folder-comparison>";
884 }
885
886 return @TwoPointers;
887 }
888}
889
890sub VobPrint
891{
892 my ($strOutput, $intLevel) = @_;
893 my $strTab = "";
894 my $intTab = int($intLevel/2);
895 if($intLevel <= $gv_intVerbosity)
896 {
897 if($intLevel >= 1)
898 {
899 $strTab = "\t"x($intTab+1);
900 $strOutput =~ s/\n$//;
901 $strOutput =~ s/\n/\n$strTab/g;
902 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
903 }
904
905 if( $strOutputFormat eq "xml" ) {
906 $strOutput =~ s/&/&amp;/g;
907 $strOutput =~ s/</&lt;/g;
908 $strOutput =~ s/>/&gt;/g;
909 }
910
911 if ( length( $strOutput ) > 1000 ) {
912 $strOutput = substr( $strOutput, 0, 978);
913
914 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
915 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
916 if(defined $1 && $1) {
917 my $rest = $1;
918 if($rest =~ m/^a/) {
919 $strOutput =~ s@am?p?$@amp;@;
920 }
921 elsif($rest eq "g" || $rest eq "l") {
922 $strOutput .= "t;"; # close the known tag
923 }
924 elsif($rest eq "gt" || $rest eq "lt") {
925 $strOutput .= ";";
926 }
927 } else { # & on its own
928 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
929 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
930 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
931 }
932 }
933 $strOutput .= "... (output truncated)";
934 }
935
936
937 print $strTab.$strOutput."\n";
938 }
939}
940#----##
941
942
943#--Main System----------------------------
944#-----------------------------------------
945# Name: main
946# Perameters: arguments from command line
947# Pre-condition: testing will start by calling this main function.
948# Post-condition: output the test results for one or more collections.
949#-----------------------------------------
950sub main
951{
952 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
953 my $strProgName = $0;
954 my $intArgc = scalar(@ARGV);
955
956 #--System Arguments Setup
957 if (!parsargv::parse(\@ARGV,
958 'estop//off', \$strErrorStop,
959 'eshow//off', \$strErrorShow,
960 'verbosity/\d+/1', \$intVerbosity,
961 'mode/[\w\-]+/all', \$strMode,
962 'output/[\w\-]+/text', \$strOutputFormat,
963 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
964 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
965 )) {
966 PrintUsage($strProgName);
967 die "\n";
968 }
969
970 if ($intArgc<1) {
971 PrintUsage($strProgName);
972 die "\n";
973 }
974
975 $gv_test_os = $test_os; # if not specified, defaults to "compute"
976 $gv_model_os = $model_os; # tends to be linux
977
978 $gv_blnErrorStop = $strErrorStop;
979 $gv_blnErrorShow = $strErrorShow;
980 $gv_intVerbosity = $intVerbosity;
981 $gv_strMode = SetMode($strMode);
982
983 #----##
984
985 #--Collection(s) Testing
986 foreach $strColName (@ARGV)
987 {
988 my @ErrorsInEachCol;
989 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
990 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
991
992 #--Output(Start)
993 OutputStart($strColName);
994 #----##
995
996 if(-e $strModelCol && -e $strTestCol )
997 {
998
999 #--Individual Testing
1000 if ($gv_strMode eq "Individual")
1001 {
1002 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1003 }
1004 #----##
1005
1006 #--Initial Testing
1007 elsif ($gv_strMode eq "Initial")
1008 {
1009 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1010 }
1011 #----##
1012
1013 #--Full Testing
1014 elsif ($gv_strMode eq "Full")
1015 {
1016 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1017 }
1018 #----##
1019
1020 #--Error Checking
1021 else
1022 {
1023 if ( $strOutputFormat eq "xml" ) {
1024 die "<error>Error occoured in main function</error>\n";
1025 } else {
1026 die "Error occoured in main function.\n";
1027 }
1028 }
1029 #----##
1030
1031 }
1032 else
1033 {
1034 if( $strOutputFormat eq "xml" ) {
1035 die "<error>Cannot find collection: $strColName</error>\n";
1036 } else {
1037 die "Error: cannot find collection: $strColName\n";
1038 }
1039 }
1040 #----##
1041
1042 #--Output(Results and Errors)
1043 OutputEnd($strColName,\@ErrorsInEachCol);
1044 #----##
1045
1046 }
1047}
1048#----##
1049
1050&main();
Note: See TracBrowser for help on using the repository browser.