source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 34417

Last change on this file since 34417 was 34417, checked in by ak19, 4 years ago

Updates to diffcol to handle change introduced in commit 34394, which brought in new meta gsdlfullsourcepath. METS model coll was still failing after rebuild until update to diffcol.pl (this commit) that now handles docMETS.xml files to support the new meta field. The new meta field is converted to a relative path, relative to collect, so that modelcollect and test collect can be properly compared, as with doc.xml files. Hopefully this change will work on windows and mac too, but at least diffcol finally succeeded on linux again. Can't make out why the diffcol report is not being uploaded to nzdl, however. Maybe I wasn't logged in as the correct user. But it didn't complain about not being able to upload the report to wwwinternal.

File size: 36.1 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnDebugging = 0;
61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
126 my ($strModelCol,$strTestCol,$strColName) = @_;
127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
137 $intNumberOfErrors = scalar(@Errors);
138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193 # Testing the build.cfg
194 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
195 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
196
197 if(-e $strModelBcfg && -e $strTestBcfg)
198 {
199 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
200 if($strBcfgError ne "")
201 {
202 if( $strOutputFormat eq "xml" ) {
203 print "<build-cfg succeeded=\"no\">\n<message>";
204 } else {
205 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
206 }
207
208 VobPrint ("$strBcfgError",$intLevel);
209 if( $strOutputFormat eq "xml" ) {
210 print "</message></build-cfg>";
211 }
212
213 $strBcfgError = "$strBcfgError";
214 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
215 push(@Errors,$strBcfgError);
216 }
217 else
218 {
219 if( $strOutputFormat eq "xml" ) {
220 print "<build-cfg succeeded=\"yes\"/>";
221 } else {
222 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
223 }
224 }
225 }
226 else
227 {
228 my $strErrorColName;
229 my $strBcfgError;
230
231 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
232 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
233
234 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
235 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
236 VobPrint ("$strBcfgError\n",$intLevel);
237 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
238
239 push(@Errors,$strBcfgError);
240 }
241 VobPrint ("\n",$intLevel);
242
243 # Testing the collect.cfg
244
245 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
246 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
247
248 if(-e $strModelCcfg && -e $strTestCcfg)
249 {
250 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
251 if($strCcfgError ne "")
252 {
253 if( $strOutputFormat eq "xml" ) {
254 print "<collect-cfg succeeded=\"no\"><message>";
255 } else {
256 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
257 }
258
259 VobPrint ("$strCcfgError",$intLevel);
260
261 if( $strOutputFormat eq "xml" ) {
262 print "</message></collect-cfg>";
263 }
264
265 $strCcfgError = "$strCcfgError";
266 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
267 push(@Errors,$strCcfgError);
268 }
269 else
270 {
271 if( $strOutputFormat eq "xml" ) {
272 print "<collect-cfg succeeded=\"yes\"/>";
273 } else {
274 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
275 }
276 }
277 }
278 else
279 {
280 my $strErrorColName;
281 my $strCcfgError;
282
283 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
284 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
285
286 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
287 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
288 VobPrint ("$strCcfgError\n",$intLevel);
289 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
290
291 push(@Errors,$strCcfgError);
292 }
293
294 VobPrint ("\n",$intLevel);
295
296 # Testing databases
297
298 # index
299 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
300 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
301 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
302 if($strGdbError) {
303 push(@Errors,$strGdbError);
304 }
305
306 # archives
307 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
308 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
309 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
310 if($strGdbError) {
311 push(@Errors,$strGdbError);
312 }
313
314 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
315 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
316 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
317 if($strGdbError) {
318 push(@Errors,$strGdbError);
319 }
320
321 VobPrint ("\n",$intLevel);
322
323 return @Errors;
324}
325
326
327# At present handles gdbm - need to expand to allow for jdbm and other db types
328sub GdbDiff
329{
330 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
331
332 my $strGdbError = 0;
333
334 if(-e $strModelGdb && -e $strTestGdb)
335 {
336 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
337 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
338 if($strGdbError ne "")
339 {
340 if( $strOutputFormat eq "xml" ) {
341 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
342 } else {
343 AlignPrint("Database Comparsion Result","Failed",$intLevel);
344 }
345 VobPrint ("$strGdbError\n",$intLevel);
346
347 if( $strOutputFormat eq "xml" ) {
348 print "</message></database>";
349 }
350
351 $strGdbError = "$strGdbError";
352 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
353 #push(@Errors,$strGdbError);
354 }
355 else
356 {
357 if( $strOutputFormat eq "xml" ) {
358 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
359 } else {
360 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
361 }
362 }
363 }
364 else
365 {
366 my $strErrorColName;
367
368 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
369 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
370
371 AlignPrint("Database Comparsion Result","Failed",$intLevel);
372
373 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
374 VobPrint ("$strGdbError\n",$intLevel);
375
376 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
377
378 }
379
380 return $strGdbError;
381}
382
383sub FullTest
384{
385 my ($strModelCol,$strTestCol,$strColName) = @_;
386 my @Errors = ();
387 my $intLevel = 0;
388 my $intNumberDiffs = 0;
389
390 # <Initial Test>
391 if( $strOutputFormat eq "xml" ) {
392 #print "<initial-test>";
393 } else {
394 VobPrint("Initial Testing Start\n",$intLevel);
395 }
396
397 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
398 $intNumberDiffs = scalar(@Errors);
399
400 if( $strOutputFormat eq "xml" ) {
401 #print "</initial-test>";
402 } else {
403 VobPrint("Initial Testing End\n",$intLevel);
404 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
405 VobPrint("\n",$intLevel);
406 }
407 # </Initial Test>
408
409 # <Detailed Test>
410 if( $strOutputFormat eq "xml" ) {
411 #print "<detailed-test>";
412 } else {
413 VobPrint("Detail Testing Start\n",$intLevel);
414 }
415 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
416 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
417
418 if( $strOutputFormat eq "xml" ) {
419 #print "</detailed-test>";
420 } else {
421 VobPrint("Detail Testing End\n",$intLevel);
422 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
423 }
424 # </Detailed Test>
425
426 return @Errors;
427}
428#----##
429
430
431#--Other System Utilities
432sub PrintUsage
433{
434 my ($strProgName) = @_;
435 if ( $strOutputFormat eq "xml" ) {
436 print "<error>usage incorrect</error>\n";
437 } else {
438 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
439 }
440 Help("Error: used incorrect parameters");
441}
442
443sub Help
444{
445 my ($strError) = @_;
446 my $aryptHelps =
447 [ { 'name' => "verbosity",
448 'type' => "scale",
449 'argu' => "a integer" ,
450 'descrip' => "this parameter setup the verbosity of the testing result"},
451 { 'name' => "mode",
452 'type' => "option",
453 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
454 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
455 { 'name' => "estop",
456 'type' => "flag",
457 'argu' => "NULL" ,
458 'descrip' => "Set then system will stop once it meets an error"},
459 { 'name' => "eshow",
460 'type' => "flag",
461 'argu' => "NULL" ,
462 'descrip' => "Set then system will show the error summary"},
463 { 'name' => "debug",
464 'type' => "flag",
465 'argu' => "NULL" ,
466 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
467 ];
468
469
470 if ( $strOutputFormat ne "xml" ) {
471 print "$strError\n";
472
473 foreach my $hashOneArg (@{$aryptHelps})
474 {
475 print "\n----------------------------\n";
476 print "Parameters: -".$hashOneArg->{"name"}."\n";
477 print "Type: ".$hashOneArg->{"type"}."\n";
478 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
479 print "Description: ".$hashOneArg->{"descrip"}."\n";
480 print "----------------------------\n";
481 }
482 }
483}
484
485sub OutputStart
486{
487 my ($strColName) = @_;
488 my $intPadding = 17 - length($strColName);
489
490 if ( $strOutputFormat eq "xml" ) {
491 print "<diffcol>\n";
492 } else {
493 print "+---------------------------------------------------------+\n";
494 print "| |\n";
495 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
496 print "| |\n";
497 print "+---------------------------------------------------------+\n\n";
498 }
499}
500
501sub OutputEnd
502{
503 my ($strColName,$aryptErrors) = @_;
504 my $intPadding = 12 - length($strColName);
505 if ( $strOutputFormat eq "xml" ) {
506 print "</diffcol>\n";
507 } else {
508 print "\n";
509 print "+---------------------------------------------------------+\n";
510 print "| |\n";
511 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
512 print "| |\n";
513 print "+---------------------------------------------------------+\n\n";
514 }
515
516 my $intTotalErrors = scalar(@{$aryptErrors});
517 if ( $strOutputFormat ne "xml" ) {
518 print "Checking completed, there is $intTotalErrors error(s) found.\n";
519 }
520
521 if($gv_blnErrorShow ne "off")
522 {
523 foreach my $strEachError (@{$aryptErrors})
524 {
525 if ( $strOutputFormat eq "xml" ) {
526 print "<error>";
527 print $strEachError;
528 print "</error>\n";
529 } else {
530 print "+---------------------------------------------------------+\n";
531 print "| Error |\n";
532 print "+---------------------------------------------------------+\n\n";
533 print "$strEachError\n\n";
534 }
535 }
536 }
537 else
538 {
539 if ( $strOutputFormat ne "xml" ) {
540 print "Use -eshow to show the error detail\n\n";
541 }
542 }
543}
544
545sub AlignPrint
546{
547 my ($strMainString,$strStatus,$intLevel) = @_;
548 my $intDot = 100 - length($strMainString) - length($strStatus);
549 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
550}
551
552
553# this function is only called on DocXMLFiles.
554# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
555# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
556sub isDocOrMETSXMLFileWindows
557{
558 my ($file_contents) = @_;
559
560 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
561
562 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
563 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
564
565 # for doc.xml:
566 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
567 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
568 $gsdlsourcefilename = $2;
569 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
570 return 1;
571 }
572 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
573 return 1;
574 }
575
576 return 0;
577}
578
579sub TestEach
580{
581 my ($strModel,$strTest,$intLevel,$strColName) = @_;
582 my @Errors = ();
583
584 $intLevel++;
585 if (-d $strModel && -d $strTest)
586 {
587 my @aryInModel = &diffutil::files_in_dir($strModel);
588 my @aryInTest = &diffutil::files_in_dir($strTest);
589
590 # Files to be skipped because they get generated on one OS but not the other
591 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
592 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
593 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
594 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
595
596 # Now check all remaining files in the folder exist in both model and test collections
597 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
598 my @aryCorrectFiles = @{$aryTwoPointers[1]};
599 @Errors = @{$aryTwoPointers[0]};
600
601 if(scalar(@Errors) == 0)
602 {
603 foreach my $strEachFile (@aryInModel)
604 {
605 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
606 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
607 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
608 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
609 {
610 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
611 }
612 else
613 {
614 if ( $strOutputFormat eq "xml" ) {
615 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
616 } else {
617 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
618 }
619 }
620 }
621 }
622 else
623 {
624 foreach my $strEachFile (@aryCorrectFiles)
625 {
626 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
627 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
628 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
629 {
630 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
631 }
632 }
633 }
634 if($intLevel == $gv_intVerbosity)
635 {
636 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
637 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
638 }
639 }
640 else
641 {
642 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
643 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
644
645 my $strResult;
646
647 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
648 # in case these don't appear in the same order between collections, since
649 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
650 # when they can show up as unnecessary diff 'errors'
651
652 my ($model_contents, $test_contents);
653
654 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
655 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
656
657 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
658 sysread(FIN, $model_contents, -s FIN);
659 close(FIN);
660 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
661 sysread(FIN, $test_contents, -s FIN);
662 close(FIN);
663
664 $model_contents =~ s/$ignore_line_re//g;
665 $test_contents =~ s/$ignore_line_re//g;
666
667
668 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
669 # before comparing a windows test with a linux model or vice-versa
670 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
671 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
672
673 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
674
675 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
676 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
677
678 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
679 $$win_contents =~ s@[\r]@@g;
680
681 # make all single windows slashes into single unix slashes
682 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
683 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
684 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
685
686 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
687 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
688
689 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
690 # $$win_contents =~ s@\r@\n@mg;
691
692 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
693 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
694 # containing solitary carriage returns instead of linefeed
695 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
696
697
698 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
699 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
700 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
701 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
702 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
703
704 # Advanced Beatles collection,
705 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
706 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
707 # Normalising to windows version for doing a diff
708 $$lin_contents =~ s@href=http:///@href=http://@g;
709 }
710 }
711
712 # remove all absolute paths upto collect folder from <Metadata /> elements
713 if($strModel =~ m/docmets\.xml$/) {
714 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
715 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
716 }
717
718 # processing particular to doc.xml
719 if($strModel =~ m/doc\.xml$/) {
720 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
721 # these tmpdirs are located inside the collection directory
722 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
723 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
724
725 # remove all absolute paths upto collect folder from <Metadata /> elements
726 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
727 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
728
729 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
730 # These tmpdirs are located inside the toplevel *greenstone* directory
731 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
732 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
733 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
734
735 if($test_contents =~ m@$tmpfile_regex@) {
736 # found a match, replace the tmp file name with "random", keeping the original file extension
737 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
738
739 my ($old_tmp_filename, $ext) = ($1, $2);
740 my $new_tmp_filename = "random";
741
742 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
743 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
744 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
745 if($5) {
746 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
747 } else { # OrigSource contains only the filename
748 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
749 }
750
751 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
752 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
753 if($5) {
754 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
755 } else { # OrigSource contains only the filename
756 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
757 }
758 }
759
760 } # finished special processing of doc.xml files
761
762
763 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
764 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
765 &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
766 &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
767# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
768# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
769# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
770# }
771 }
772
773
774 # now can diff the normalised versions of the doc.xml/docmets.xml files:
775 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
776
777 } else {
778 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
779 }
780
781 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
782 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
783 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
784
785 #$strResult = GeneralOutput($strResult);
786 if ( $strOutputFormat eq "xml" ) {
787 #
788 } else {
789 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
790 }
791 if ($strResult eq "")
792 {
793 if ( $strOutputFormat eq "xml" ) {
794 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
795 } else {
796 AlignPrint("Comparing File","Succeed",$intLevel);
797 }
798 }
799 else
800 {
801# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
802
803 my $strOutput = "Difference Report:\n$strResult\n";
804 if ( $strOutputFormat eq "xml" ) {
805 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
806 } else {
807 AlignPrint("Comparing File","Failed",$intLevel);
808 }
809
810 #$result=`file -b $strModel`; # linux specific test for binary file
811 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
812 if ( "$result" =~ "data" ) {
813 VobPrint( "These binary files differ", $intLevel );
814 } else {
815 VobPrint ( "$strOutput" , $intLevel);
816 }
817
818
819 if ( $strOutputFormat eq "xml" ) {
820 print "</message></file-comparison>";
821 }
822
823 if($gv_blnErrorStop ne "off") { exit; }
824 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
825 }
826 }
827
828 return @Errors;
829}
830
831
832sub FolderTesting
833{
834 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
835 my %hashCount = ();
836 my @Errors = ();
837 my @CorrectFiles = ();
838 my @TwoPointers = (\@Errors,\@CorrectFiles);
839
840 if ( $strOutputFormat eq "xml" ) {
841 #print "<folder-comparison location=\"$strModelFolder\">\n";
842 } else {
843 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
844 }
845
846 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
847 foreach my $strEachItem (@$aryptTest)
848 {
849 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
850 else {$hashCount{$strEachItem} = 'T';}
851 }
852
853 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
854 {
855 if ( $strOutputFormat eq "xml" ) {
856 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
857 } else {
858 AlignPrint("Folder Comparsion","Succeed",$intLevel);
859 }
860 return @TwoPointers;
861 }
862 else
863 {
864 if ( $strOutputFormat eq "xml" ) {
865 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
866 } else {
867 AlignPrint("Folder Comparsion","Failed",$intLevel);
868 }
869
870 foreach my $strEachItem (keys %hashCount)
871 {
872 if($hashCount{$strEachItem} ne 'B')
873 {
874 my $strOutput = "";
875 my $strReport = "";
876
877 if($hashCount{$strEachItem} eq 'M')
878 {
879 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
880 $strReport = "Difference Report: difference found at $strTestFolder";
881 }
882 elsif($hashCount{$strEachItem} eq 'T')
883 {
884 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
885 $strReport = "Difference Report: difference found at $strModelFolder";
886 }
887 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
888
889 VobPrint ("$strOutput\n",$intLevel);
890 $strOutput = $strOutput."\n\t".$strReport."\n";
891 push(@Errors,$strOutput);
892 }
893 else {push(@CorrectFiles,$strEachItem);}
894 }
895 if( $strOutputFormat eq "xml" ) {
896 print "</message></folder-comparison>";
897 }
898
899 return @TwoPointers;
900 }
901}
902
903sub VobPrint
904{
905 my ($strOutput, $intLevel) = @_;
906 my $strTab = "";
907 my $intTab = int($intLevel/2);
908 if($intLevel <= $gv_intVerbosity)
909 {
910 if($intLevel >= 1)
911 {
912 $strTab = "\t"x($intTab+1);
913 $strOutput =~ s/\n$//;
914 $strOutput =~ s/\n/\n$strTab/g;
915 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
916 }
917
918 if( $strOutputFormat eq "xml" ) {
919 $strOutput =~ s/&/&amp;/g;
920 $strOutput =~ s/</&amp;lt;/g;
921 $strOutput =~ s/>/&amp;gt;/g;
922 }
923
924 if ( length( $strOutput ) > 1000 ) {
925 $strOutput = substr( $strOutput, 0, 978);
926
927 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
928 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
929 if(defined $1 && $1) {
930 my $rest = $1;
931 if($rest =~ m/^a/) {
932 $strOutput =~ s@am?p?$@amp;@;
933 }
934 elsif($rest eq "g" || $rest eq "l") {
935 $strOutput .= "t;"; # close the known tag
936 }
937 elsif($rest eq "gt" || $rest eq "lt") {
938 $strOutput .= ";";
939 }
940 } else { # & on its own
941 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
942 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
943 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
944 }
945 }
946 $strOutput .= "... (output truncated)";
947 }
948
949
950 print $strTab.$strOutput."\n";
951 }
952}
953#----##
954
955
956#--Main System----------------------------
957#-----------------------------------------
958# Name: main
959# Perameters: arguments from command line
960# Pre-condition: testing will start by calling this main function.
961# Post-condition: output the test results for one or more collections.
962#-----------------------------------------
963sub main
964{
965 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
966 my $strProgName = $0;
967 my $intArgc = scalar(@ARGV);
968
969 #--System Arguments Setup
970 if (!parsargv::parse(\@ARGV,
971 'estop//off', \$strErrorStop,
972 'eshow//off', \$strErrorShow,
973 'debug', \$debugging,
974 'verbosity/\d+/1', \$intVerbosity,
975 'mode/[\w\-]+/all', \$strMode,
976 'output/[\w\-]+/text', \$strOutputFormat,
977 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
978 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
979 )) {
980 PrintUsage($strProgName);
981 die "\n";
982 }
983
984 if ($intArgc<1) {
985 PrintUsage($strProgName);
986 die "\n";
987 }
988
989 $gv_test_os = $test_os; # if not specified, defaults to "compute"
990 $gv_model_os = $model_os; # tends to be linux
991
992 $gv_blnDebugging = $debugging;
993 $gv_blnErrorStop = $strErrorStop;
994 $gv_blnErrorShow = $strErrorShow;
995 $gv_intVerbosity = $intVerbosity;
996 $gv_strMode = SetMode($strMode);
997
998 #----##
999
1000# To find out what version of perl we're using
1001 if( $strOutputFormat eq "xml" ) {
1002 my $perloutput = `perl -v`;
1003 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1004 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1005 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1006 }
1007
1008 #--Collection(s) Testing
1009 foreach $strColName (@ARGV)
1010 {
1011 my @ErrorsInEachCol;
1012 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1013 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1014
1015 #--Output(Start)
1016 OutputStart($strColName);
1017 #----##
1018
1019 if(-e $strModelCol && -e $strTestCol )
1020 {
1021
1022 #--Individual Testing
1023 if ($gv_strMode eq "Individual")
1024 {
1025 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1026 }
1027 #----##
1028
1029 #--Initial Testing
1030 elsif ($gv_strMode eq "Initial")
1031 {
1032 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1033 }
1034 #----##
1035
1036 #--Full Testing
1037 elsif ($gv_strMode eq "Full")
1038 {
1039 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1040 }
1041 #----##
1042
1043 #--Error Checking
1044 else
1045 {
1046 if ( $strOutputFormat eq "xml" ) {
1047 die "<error>Error occoured in main function</error>\n";
1048 } else {
1049 die "Error occoured in main function.\n";
1050 }
1051 }
1052 #----##
1053
1054 }
1055 else
1056 {
1057 if( $strOutputFormat eq "xml" ) {
1058 die "<error>Cannot find collection: $strColName</error>\n";
1059 } else {
1060 die "Error: cannot find collection: $strColName\n";
1061 }
1062 }
1063 #----##
1064
1065 #--Output(Results and Errors)
1066 OutputEnd($strColName,\@ErrorsInEachCol);
1067 #----##
1068
1069 }
1070}
1071#----##
1072
1073&main();
Note: See TracBrowser for help on using the repository browser.