source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28661

Last change on this file since 28661 was 28661, checked in by ak19, 10 years ago

Committing the next installment of code to handle diffcol for GS3. Now it successfully compiles up GS3, while diffcol still works for GS2.

File size: 35.1 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67 "etc" => 0,
68 "images" => 0,
69 "building" => 0,
70 "import" => 0,
71 "index" => 0,
72 "log" => 0,
73 "metadata" => 0,
74 "perllib" => 0,
75 "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81 my ($strModeList) = @_;
82 $strModeList =~ s/\|/ /g;
83 my @Modes = split(" ",$strModeList);
84
85 my $blnIndividual = "true";
86 my $blnInitial = "false";
87 my $blnFull = "false";
88
89
90 foreach $strEachMode (@Modes)
91 {
92 if($strEachMode eq "all")
93 {
94 $blnFull = "true";
95 $blnIndividual = "false";
96 }
97 elsif($strEachMode eq "init")
98 {
99 $blnInitial = "true";
100 $blnIndividual = "false";
101 }
102 else
103 {
104 if(defined $gv_IndivList{$strEachMode})
105 {
106 $gv_IndivList{$strEachMode} = 1;
107 }
108 else
109 {
110 die Help("Error: used undefined mode");
111 }
112 }
113 }
114
115 if($blnFull eq "true") {return "Full";}
116 elsif($blnInitial eq "true") {return "Initial";}
117 elsif($blnIndividual eq "true") {return "Individual";}
118 else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125 my ($strModelCol,$strTestCol,$strColName) = @_;
126 my @Errors = ();
127 my $intNumberOfErrors = 0;
128 foreach $strEachFolder (keys %gv_IndivList)
129 {
130 if($gv_IndivList{$strEachFolder} == 1)
131 {
132
133 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136 $intNumberOfErrors = scalar(@Errors);
137 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139 VobPrint("End Comparing \"$strEachFolder\"\n",0);
140 VobPrint("Difference Found: $intNumberOfErrors\n",0);
141 VobPrint ("\n",0);
142 }
143 }
144 return @Errors;
145}
146
147sub InitTest
148{
149
150 my ($strModelCol,$strTestCol,$strColName) = @_;
151 my $intLevel = 1;
152 my @Errors;
153
154 # Testing Log files
155# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#
158# if(-e $strModelLog && -e $strTestLog)
159# {
160# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161# if($strLogError ne "")
162# {
163# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164# VobPrint ("$strLogError\n",$intLevel);
165#
166# $strLogError = "$strLogError";
167# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168# push(@Errors,$strLogError);
169# }
170# else
171# {
172# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173# }
174# }
175# else
176# {
177# my $strErrorColName;
178# my $strLogError;
179#
180# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185# VobPrint ("$strLogError\n",$intLevel);
186# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188# push(@Errors,$strLogError);
189# }
190# VobPrint ("\n",$intLevel);
191
192 # Testing the build.cfg
193 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196 if(-e $strModelBcfg && -e $strTestBcfg)
197 {
198 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199 if($strBcfgError ne "")
200 {
201 if( $strOutputFormat eq "xml" ) {
202 print "<build-cfg succeeded=\"no\">\n<message>";
203 } else {
204 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205 }
206
207 VobPrint ("$strBcfgError",$intLevel);
208 if( $strOutputFormat eq "xml" ) {
209 print "</message></build-cfg>";
210 }
211
212 $strBcfgError = "$strBcfgError";
213 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214 push(@Errors,$strBcfgError);
215 }
216 else
217 {
218 if( $strOutputFormat eq "xml" ) {
219 print "<build-cfg succeeded=\"yes\"/>";
220 } else {
221 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222 }
223 }
224 }
225 else
226 {
227 my $strErrorColName;
228 my $strBcfgError;
229
230 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235 VobPrint ("$strBcfgError\n",$intLevel);
236 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238 push(@Errors,$strBcfgError);
239 }
240 VobPrint ("\n",$intLevel);
241
242 # Testing the collect.cfg
243
244 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246
247 if(-e $strModelCcfg && -e $strTestCcfg)
248 {
249 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250 if($strCcfgError ne "")
251 {
252 if( $strOutputFormat eq "xml" ) {
253 print "<collect-cfg succeeded=\"no\"><message>";
254 } else {
255 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256 }
257
258 VobPrint ("$strCcfgError",$intLevel);
259
260 if( $strOutputFormat eq "xml" ) {
261 print "</message></collect-cfg>";
262 }
263
264 $strCcfgError = "$strCcfgError";
265 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266 push(@Errors,$strCcfgError);
267 }
268 else
269 {
270 if( $strOutputFormat eq "xml" ) {
271 print "<collect-cfg succeeded=\"yes\"/>";
272 } else {
273 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274 }
275 }
276 }
277 else
278 {
279 my $strErrorColName;
280 my $strCcfgError;
281
282 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287 VobPrint ("$strCcfgError\n",$intLevel);
288 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289
290 push(@Errors,$strCcfgError);
291 }
292
293 VobPrint ("\n",$intLevel);
294
295 # Testing databases
296
297 # index
298 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
299 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
300 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
301 if($strGdbError) {
302 push(@Errors,$strGdbError);
303 }
304
305 # archives
306 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
307 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
308 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
309 if($strGdbError) {
310 push(@Errors,$strGdbError);
311 }
312
313 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
314 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
315 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316 if($strGdbError) {
317 push(@Errors,$strGdbError);
318 }
319
320 VobPrint ("\n",$intLevel);
321
322 return @Errors;
323}
324
325
326# At present handles gdbm - need to expand to allow for jdbm and other db types
327sub GdbDiff
328{
329 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
330
331 my $strGdbError = 0;
332
333 if(-e $strModelGdb && -e $strTestGdb)
334 {
335 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
336 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
337 if($strGdbError ne "")
338 {
339 if( $strOutputFormat eq "xml" ) {
340 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
341 } else {
342 AlignPrint("Database Comparsion Result","Failed",$intLevel);
343 }
344 VobPrint ("$strGdbError\n",$intLevel);
345
346 if( $strOutputFormat eq "xml" ) {
347 print "</message></database>";
348 }
349
350 $strGdbError = "$strGdbError";
351 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
352 #push(@Errors,$strGdbError);
353 }
354 else
355 {
356 if( $strOutputFormat eq "xml" ) {
357 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
358 } else {
359 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
360 }
361 }
362 }
363 else
364 {
365 my $strErrorColName;
366
367 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
368 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
369
370 AlignPrint("Database Comparsion Result","Failed",$intLevel);
371
372 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
373 VobPrint ("$strGdbError\n",$intLevel);
374
375 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
376
377 }
378
379 return $strGdbError;
380}
381
382sub FullTest
383{
384 my ($strModelCol,$strTestCol,$strColName) = @_;
385 my @Errors = ();
386 my $intLevel = 0;
387 my $intNumberDiffs = 0;
388
389 # <Initial Test>
390 if( $strOutputFormat eq "xml" ) {
391 #print "<initial-test>";
392 } else {
393 VobPrint("Initial Testing Start\n",$intLevel);
394 }
395
396 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
397 $intNumberDiffs = scalar(@Errors);
398
399 if( $strOutputFormat eq "xml" ) {
400 #print "</initial-test>";
401 } else {
402 VobPrint("Initial Testing End\n",$intLevel);
403 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
404 VobPrint("\n",$intLevel);
405 }
406 # </Initial Test>
407
408 # <Detailed Test>
409 if( $strOutputFormat eq "xml" ) {
410 #print "<detailed-test>";
411 } else {
412 VobPrint("Detail Testing Start\n",$intLevel);
413 }
414 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
415 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
416
417 if( $strOutputFormat eq "xml" ) {
418 #print "</detailed-test>";
419 } else {
420 VobPrint("Detail Testing End\n",$intLevel);
421 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
422 }
423 # </Detailed Test>
424
425 return @Errors;
426}
427#----##
428
429
430#--Other System Utilities
431sub PrintUsage
432{
433 my ($strProgName) = @_;
434 if ( $strOutputFormat eq "xml" ) {
435 print "<error>usage incorrect</error>\n";
436 } else {
437 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
438 }
439 Help("Error: used incorrect parameters");
440}
441
442sub Help
443{
444 my ($strError) = @_;
445 my $aryptHelps =
446 [ { 'name' => "verbosity",
447 'type' => "scale",
448 'argu' => "a integer" ,
449 'descrip' => "this parameter setup the verbosity of the testing result"},
450 { 'name' => "mode",
451 'type' => "option",
452 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
453 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
454 { 'name' => "estop",
455 'type' => "flag",
456 'argu' => "NULL" ,
457 'descrip' => "Set then system will stop once it meet an error"},
458 { 'name' => "eshow",
459 'type' => "flag",
460 'argu' => "NULL" ,
461 'descrip' => "Set then system will show the error summary"}
462 ];
463
464
465 if ( $strOutputFormat ne "xml" ) {
466 print "$strError\n";
467
468 foreach my $hashOneArg (@{$aryptHelps})
469 {
470 print "\n----------------------------\n";
471 print "Parameters: -".$hashOneArg->{"name"}."\n";
472 print "Type: ".$hashOneArg->{"type"}."\n";
473 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
474 print "Description: ".$hashOneArg->{"descrip"}."\n";
475 print "----------------------------\n";
476 }
477 }
478}
479
480sub OutputStart
481{
482 my ($strColName) = @_;
483 my $intPadding = 17 - length($strColName);
484
485 if ( $strOutputFormat eq "xml" ) {
486 print "<diffcol>\n";
487 } else {
488 print "+---------------------------------------------------------+\n";
489 print "| |\n";
490 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
491 print "| |\n";
492 print "+---------------------------------------------------------+\n\n";
493 }
494}
495
496sub OutputEnd
497{
498 my ($strColName,$aryptErrors) = @_;
499 my $intPadding = 12 - length($strColName);
500 if ( $strOutputFormat eq "xml" ) {
501 print "</diffcol>\n";
502 } else {
503 print "\n";
504 print "+---------------------------------------------------------+\n";
505 print "| |\n";
506 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
507 print "| |\n";
508 print "+---------------------------------------------------------+\n\n";
509 }
510
511 my $intTotalErrors = scalar(@{$aryptErrors});
512 if ( $strOutputFormat ne "xml" ) {
513 print "Checking completed, there is $intTotalErrors error(s) found.\n";
514 }
515
516 if($gv_blnErrorShow ne "off")
517 {
518 foreach my $strEachError (@{$aryptErrors})
519 {
520 if ( $strOutputFormat eq "xml" ) {
521 print "<error>";
522 print $strEachError;
523 print "</error>\n";
524 } else {
525 print "+---------------------------------------------------------+\n";
526 print "| Error |\n";
527 print "+---------------------------------------------------------+\n\n";
528 print "$strEachError\n\n";
529 }
530 }
531 }
532 else
533 {
534 if ( $strOutputFormat ne "xml" ) {
535 print "Use -eshow to show the error detail\n\n";
536 }
537 }
538}
539
540sub AlignPrint
541{
542 my ($strMainString,$strStatus,$intLevel) = @_;
543 my $intDot = 100 - length($strMainString) - length($strStatus);
544 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
545}
546
547
548# this function is only called on DocXMLFiles.
549# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
550# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
551sub isDocOrMETSXMLFileWindows
552{
553 my ($file_contents) = @_;
554
555 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
556
557 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
558 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
559
560 # for doc.xml:
561 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
562 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
563 $gsdlsourcefilename = $2;
564 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
565 return 1;
566 }
567 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
568 return 1;
569 }
570
571 return 0;
572}
573
574sub TestEach
575{
576 my ($strModel,$strTest,$intLevel,$strColName) = @_;
577 my @Errors = ();
578
579 $intLevel++;
580 if (-d $strModel && -d $strTest)
581 {
582 my @aryInModel = &diffutil::files_in_dir($strModel);
583 my @aryInTest = &diffutil::files_in_dir($strTest);
584
585 # Files to be skipped because they get generated on one OS but not the other
586 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
587 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
588 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
589 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
590
591 # Now check all remaining files in the folder exist in both model and test collections
592 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
593 my @aryCorrectFiles = @{$aryTwoPointers[1]};
594 @Errors = @{$aryTwoPointers[0]};
595
596 if(scalar(@Errors) == 0)
597 {
598 foreach my $strEachFile (@aryInModel)
599 {
600 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
601 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
602 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
603 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
604 {
605 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
606 }
607 else
608 {
609 if ( $strOutputFormat eq "xml" ) {
610 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
611 } else {
612 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
613 }
614 }
615 }
616 }
617 else
618 {
619 foreach my $strEachFile (@aryCorrectFiles)
620 {
621 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
622 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
623 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
624 {
625 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
626 }
627 }
628 }
629 if($intLevel == $gv_intVerbosity)
630 {
631 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
632 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
633 }
634 }
635 else
636 {
637 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
638 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
639
640 my $strResult;
641
642 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
643 # in case these don't appear in the same order between collections, since
644 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
645 # when they can show up as unnecessary diff 'errors'
646
647 my ($model_contents, $test_contents);
648
649 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
650 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
651
652 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
653 sysread(FIN, $model_contents, -s FIN);
654 close(FIN);
655 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
656 sysread(FIN, $test_contents, -s FIN);
657 close(FIN);
658
659 $model_contents =~ s/$ignore_line_re//g;
660 $test_contents =~ s/$ignore_line_re//g;
661
662
663 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
664 # before comparing a windows test with a linux model or vice-versa
665 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
666 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
667
668 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
669
670 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
671 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
672
673 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
674 $$win_contents =~ s@[\r]@@g;
675
676 # make all single windows slashes into single unix slashes
677 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
678 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
679 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
680
681 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
682 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
683
684 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
685 # $$win_contents =~ s@\r@\n@mg;
686
687 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
688 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
689 # containing solitary carriage returns instead of linefeed
690 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
691
692
693 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
694 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
695 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
696 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
697 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
698
699 # Advanced Beatles collection,
700 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
701 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
702 # Normalising to windows version for doing a diff
703 $$lin_contents =~ s@href=http:///@href=http://@g;
704 }
705 }
706
707 # processing particular to doc.xml
708 if($strModel =~ m/doc\.xml$/) {
709 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
710 # these tmpdirs are located inside the collection directory
711 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
712 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
713
714 # remove all absolute paths upto collect folder from <Metadata /> elements
715 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
716 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
717
718 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
719 # These tmpdirs are located inside the toplevel *greenstone* directory
720 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
721 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
722 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
723
724 if($test_contents =~ m@$tmpfile_regex@) {
725 # found a match, replace the tmp file name with "random", keeping the original file extension
726 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
727
728 my ($old_tmp_filename, $ext) = ($1, $2);
729 my $new_tmp_filename = "random";
730
731 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
732 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
733 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
734 if($5) {
735 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
736 } else { # OrigSource contains only the filename
737 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
738 }
739
740 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
741 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
742 if($5) {
743 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
744 } else { # OrigSource contains only the filename
745 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
746 }
747 }
748
749 } # finished special processing of doc.xml files
750
751 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
752# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
753# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
754# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
755# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
756# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
757# }
758
759
760
761 # now can diff the normalised versions of the doc.xml/docmets.xml files:
762 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
763
764 } else {
765 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
766 }
767
768 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
769 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
770 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
771
772 #$strResult = GeneralOutput($strResult);
773 if ( $strOutputFormat eq "xml" ) {
774 #
775 } else {
776 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
777 }
778 if ($strResult eq "")
779 {
780 if ( $strOutputFormat eq "xml" ) {
781 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
782 } else {
783 AlignPrint("Comparing File","Succeed",$intLevel);
784 }
785 }
786 else
787 {
788# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
789
790 my $strOutput = "Difference Report:\n$strResult\n";
791 if ( $strOutputFormat eq "xml" ) {
792 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
793 } else {
794 AlignPrint("Comparing File","Failed",$intLevel);
795 }
796
797 #$result=`file -b $strModel`; # linux specific test for binary file
798 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
799 if ( "$result" =~ "data" ) {
800 VobPrint( "These binary files differ", $intLevel );
801 } else {
802 VobPrint ( "$strOutput" , $intLevel);
803 }
804
805
806 if ( $strOutputFormat eq "xml" ) {
807 print "</message></file-comparison>";
808 }
809
810 if($gv_blnErrorStop ne "off") { exit; }
811 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
812 }
813 }
814
815 return @Errors;
816}
817
818
819sub FolderTesting
820{
821 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
822 my %hashCount = ();
823 my @Errors = ();
824 my @CorrectFiles = ();
825 my @TwoPointers = (\@Errors,\@CorrectFiles);
826
827 if ( $strOutputFormat eq "xml" ) {
828 #print "<folder-comparison location=\"$strModelFolder\">\n";
829 } else {
830 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
831 }
832
833 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
834 foreach my $strEachItem (@$aryptTest)
835 {
836 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
837 else {$hashCount{$strEachItem} = 'T';}
838 }
839
840 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
841 {
842 if ( $strOutputFormat eq "xml" ) {
843 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
844 } else {
845 AlignPrint("Folder Comparsion","Succeed",$intLevel);
846 }
847 return @TwoPointers;
848 }
849 else
850 {
851 if ( $strOutputFormat eq "xml" ) {
852 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
853 } else {
854 AlignPrint("Folder Comparsion","Failed",$intLevel);
855 }
856
857 foreach my $strEachItem (keys %hashCount)
858 {
859 if($hashCount{$strEachItem} ne 'B')
860 {
861 my $strOutput = "";
862 my $strReport = "";
863
864 if($hashCount{$strEachItem} eq 'M')
865 {
866 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
867 $strReport = "Difference Report: difference found at $strTestFolder";
868 }
869 elsif($hashCount{$strEachItem} eq 'T')
870 {
871 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
872 $strReport = "Difference Report: difference found at $strModelFolder";
873 }
874 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
875
876 VobPrint ("$strOutput\n",$intLevel);
877 $strOutput = $strOutput."\n\t".$strReport."\n";
878 push(@Errors,$strOutput);
879 }
880 else {push(@CorrectFiles,$strEachItem);}
881 }
882 if( $strOutputFormat eq "xml" ) {
883 print "</message></folder-comparison>";
884 }
885
886 return @TwoPointers;
887 }
888}
889
890sub VobPrint
891{
892 my ($strOutput, $intLevel) = @_;
893 my $strTab = "";
894 my $intTab = int($intLevel/2);
895 if($intLevel <= $gv_intVerbosity)
896 {
897 if($intLevel >= 1)
898 {
899 $strTab = "\t"x($intTab+1);
900 $strOutput =~ s/\n$//;
901 $strOutput =~ s/\n/\n$strTab/g;
902 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
903 }
904
905 if( $strOutputFormat eq "xml" ) {
906 $strOutput =~ s/&/&amp;/g;
907 $strOutput =~ s/</&lt;/g;
908 $strOutput =~ s/>/&gt;/g;
909 }
910
911 if ( length( $strOutput ) > 1000 ) {
912 $strOutput = substr( $strOutput, 0, 978);
913
914 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
915 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
916 if(defined $1 && $1) {
917 my $rest = $1;
918 if($rest =~ m/^a/) {
919 $strOutput =~ s@am?p?$@amp;@;
920 }
921 elsif($rest eq "g" || $rest eq "l") {
922 $strOutput .= "t;"; # close the known tag
923 }
924 elsif($rest eq "gt" || $rest eq "lt") {
925 $strOutput .= ";";
926 }
927 } else { # & on its own
928 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
929 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
930 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
931 }
932 }
933 $strOutput .= "... (output truncated)";
934 }
935
936
937 print $strTab.$strOutput."\n";
938 }
939}
940#----##
941
942
943#--Main System----------------------------
944#-----------------------------------------
945# Name: main
946# Perameters: arguments from command line
947# Pre-condition: testing will start by calling this main function.
948# Post-condition: output the test results for one or more collections.
949#-----------------------------------------
950sub main
951{
952 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
953 my $strProgName = $0;
954 my $intArgc = scalar(@ARGV);
955
956 #--System Arguments Setup
957 if (!parsargv::parse(\@ARGV,
958 'estop//off', \$strErrorStop,
959 'eshow//off', \$strErrorShow,
960 'verbosity/\d+/1', \$intVerbosity,
961 'mode/[\w\-]+/all', \$strMode,
962 'output/[\w\-]+/text', \$strOutputFormat,
963 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
964 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
965 )) {
966 PrintUsage($strProgName);
967 die "\n";
968 }
969
970 if ($intArgc<1) {
971 PrintUsage($strProgName);
972 die "\n";
973 }
974
975 $gv_test_os = $test_os; # if not specified, defaults to "compute"
976 $gv_model_os = $model_os; # tends to be linux
977
978 $gv_blnErrorStop = $strErrorStop;
979 $gv_blnErrorShow = $strErrorShow;
980 $gv_intVerbosity = $intVerbosity;
981 $gv_strMode = SetMode($strMode);
982
983 #----##
984
985 #--Collection(s) Testing
986 foreach $strColName (@ARGV)
987 {
988 my @ErrorsInEachCol;
989 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
990 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
991
992 #--Output(Start)
993 OutputStart($strColName);
994 #----##
995
996 if(-e $strModelCol && -e $strTestCol )
997 {
998
999 #--Individual Testing
1000 if ($gv_strMode eq "Individual")
1001 {
1002 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1003 }
1004 #----##
1005
1006 #--Initial Testing
1007 elsif ($gv_strMode eq "Initial")
1008 {
1009 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1010 }
1011 #----##
1012
1013 #--Full Testing
1014 elsif ($gv_strMode eq "Full")
1015 {
1016 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1017 }
1018 #----##
1019
1020 #--Error Checking
1021 else
1022 {
1023 if ( $strOutputFormat eq "xml" ) {
1024 die "<error>Error occoured in main function</error>\n";
1025 } else {
1026 die "Error occoured in main function.\n";
1027 }
1028 }
1029 #----##
1030
1031 }
1032 else
1033 {
1034 if( $strOutputFormat eq "xml" ) {
1035 die "<error>Cannot find collection: $strColName</error>\n";
1036 } else {
1037 die "Error: cannot find collection: $strColName\n";
1038 }
1039 }
1040 #----##
1041
1042 #--Output(Results and Errors)
1043 OutputEnd($strColName,\@ErrorsInEachCol);
1044 #----##
1045
1046 }
1047}
1048#----##
1049
1050&main();
Note: See TracBrowser for help on using the repository browser.