source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 31418

Last change on this file since 31418 was 29495, checked in by ak19, 10 years ago

Related to previous commit 29494 accidentally committed under sjs49 and which was prematurely committed. That commit and this one adds a debug flag to the run_test action of the diffcol task, that will store the intermediate debug files in the top level diffcol folder for inspection.

File size: 35.8 KB
RevLine 
[21711]1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
[27536]41use FileUtils;
[21711]42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
[27766]47use Cwd;
[21711]48
49#--Global Variables Declaration-----------
[28172]50my ($gv_test_os, $gv_model_os); # still just file globals
51
[28661]52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
[21711]59
[29495]60$gv_blnDebugging = 0;
[21711]61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
[27537]65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
[21711]66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
[27730]126 my ($strModelCol,$strTestCol,$strColName) = @_;
[21711]127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
[27536]135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
[21711]137 $intNumberOfErrors = scalar(@Errors);
[27730]138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
[21711]139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
[27536]156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
[21711]158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193 # Testing the build.cfg
[27536]194 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
195 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
[21711]196
197 if(-e $strModelBcfg && -e $strTestBcfg)
198 {
199 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
200 if($strBcfgError ne "")
201 {
[27537]202 if( $strOutputFormat eq "xml" ) {
[21711]203 print "<build-cfg succeeded=\"no\">\n<message>";
204 } else {
205 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
206 }
207
208 VobPrint ("$strBcfgError",$intLevel);
[27537]209 if( $strOutputFormat eq "xml" ) {
[21711]210 print "</message></build-cfg>";
211 }
212
213 $strBcfgError = "$strBcfgError";
214 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
215 push(@Errors,$strBcfgError);
216 }
217 else
218 {
219 if( $strOutputFormat eq "xml" ) {
220 print "<build-cfg succeeded=\"yes\"/>";
221 } else {
222 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
223 }
224 }
225 }
226 else
227 {
228 my $strErrorColName;
229 my $strBcfgError;
230
231 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
232 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
233
234 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
235 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
236 VobPrint ("$strBcfgError\n",$intLevel);
237 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
238
239 push(@Errors,$strBcfgError);
240 }
241 VobPrint ("\n",$intLevel);
242
243 # Testing the collect.cfg
244
[27536]245 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
246 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
[21711]247
248 if(-e $strModelCcfg && -e $strTestCcfg)
249 {
250 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
251 if($strCcfgError ne "")
252 {
253 if( $strOutputFormat eq "xml" ) {
254 print "<collect-cfg succeeded=\"no\"><message>";
255 } else {
256 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
257 }
258
259 VobPrint ("$strCcfgError",$intLevel);
260
261 if( $strOutputFormat eq "xml" ) {
262 print "</message></collect-cfg>";
263 }
264
265 $strCcfgError = "$strCcfgError";
266 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
267 push(@Errors,$strCcfgError);
268 }
269 else
270 {
271 if( $strOutputFormat eq "xml" ) {
272 print "<collect-cfg succeeded=\"yes\"/>";
273 } else {
274 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
275 }
276 }
277 }
278 else
279 {
280 my $strErrorColName;
281 my $strCcfgError;
282
283 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
284 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
285
286 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
287 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
288 VobPrint ("$strCcfgError\n",$intLevel);
289 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
290
291 push(@Errors,$strCcfgError);
292 }
293
294 VobPrint ("\n",$intLevel);
295
[27604]296 # Testing databases
[21711]297
[27604]298 # index
299 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
300 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
[28238]301 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
[27604]302 if($strGdbError) {
303 push(@Errors,$strGdbError);
304 }
[21711]305
[27604]306 # archives
307 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
308 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
[28238]309 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
[27604]310 if($strGdbError) {
311 push(@Errors,$strGdbError);
312 }
[21711]313
[27604]314 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
315 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
[28238]316 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
[27604]317 if($strGdbError) {
318 push(@Errors,$strGdbError);
319 }
[21711]320
[27604]321 VobPrint ("\n",$intLevel);
322
323 return @Errors;
324}
325
326
327# At present handles gdbm - need to expand to allow for jdbm and other db types
328sub GdbDiff
329{
[28238]330 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
[27604]331
332 my $strGdbError = 0;
333
334 if(-e $strModelGdb && -e $strTestGdb)
335 {
336 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
[29495]337 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
[27604]338 if($strGdbError ne "")
339 {
340 if( $strOutputFormat eq "xml" ) {
341 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
342 } else {
343 AlignPrint("Database Comparsion Result","Failed",$intLevel);
344 }
345 VobPrint ("$strGdbError\n",$intLevel);
346
347 if( $strOutputFormat eq "xml" ) {
348 print "</message></database>";
349 }
350
351 $strGdbError = "$strGdbError";
352 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
353 #push(@Errors,$strGdbError);
[21711]354 }
[27604]355 else
356 {
357 if( $strOutputFormat eq "xml" ) {
358 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
359 } else {
360 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
361 }
362 }
363 }
364 else
365 {
366 my $strErrorColName;
[21711]367
[27604]368 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
369 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
[21711]370
[27604]371 AlignPrint("Database Comparsion Result","Failed",$intLevel);
[21711]372
[27604]373 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
374 VobPrint ("$strGdbError\n",$intLevel);
375
376 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
[21711]377
[27604]378 }
379
380 return $strGdbError;
[21711]381}
382
383sub FullTest
384{
385 my ($strModelCol,$strTestCol,$strColName) = @_;
386 my @Errors = ();
387 my $intLevel = 0;
388 my $intNumberDiffs = 0;
389
390 # <Initial Test>
391 if( $strOutputFormat eq "xml" ) {
392 #print "<initial-test>";
393 } else {
394 VobPrint("Initial Testing Start\n",$intLevel);
395 }
396
397 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
398 $intNumberDiffs = scalar(@Errors);
399
400 if( $strOutputFormat eq "xml" ) {
401 #print "</initial-test>";
402 } else {
403 VobPrint("Initial Testing End\n",$intLevel);
404 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
405 VobPrint("\n",$intLevel);
406 }
407 # </Initial Test>
408
409 # <Detailed Test>
410 if( $strOutputFormat eq "xml" ) {
411 #print "<detailed-test>";
412 } else {
413 VobPrint("Detail Testing Start\n",$intLevel);
414 }
[27730]415 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
[21711]416 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
417
418 if( $strOutputFormat eq "xml" ) {
419 #print "</detailed-test>";
420 } else {
421 VobPrint("Detail Testing End\n",$intLevel);
422 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
423 }
424 # </Detailed Test>
425
426 return @Errors;
427}
428#----##
429
430
431#--Other System Utilities
432sub PrintUsage
433{
434 my ($strProgName) = @_;
435 if ( $strOutputFormat eq "xml" ) {
436 print "<error>usage incorrect</error>\n";
437 } else {
[29495]438 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
[21711]439 }
440 Help("Error: used incorrect parameters");
441}
442
443sub Help
444{
445 my ($strError) = @_;
446 my $aryptHelps =
447 [ { 'name' => "verbosity",
448 'type' => "scale",
449 'argu' => "a integer" ,
450 'descrip' => "this parameter setup the verbosity of the testing result"},
451 { 'name' => "mode",
452 'type' => "option",
453 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
454 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
455 { 'name' => "estop",
456 'type' => "flag",
457 'argu' => "NULL" ,
[29495]458 'descrip' => "Set then system will stop once it meets an error"},
[21711]459 { 'name' => "eshow",
460 'type' => "flag",
461 'argu' => "NULL" ,
[29495]462 'descrip' => "Set then system will show the error summary"},
463 { 'name' => "debug",
464 'type' => "flag",
465 'argu' => "NULL" ,
466 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
[21711]467 ];
468
469
470 if ( $strOutputFormat ne "xml" ) {
471 print "$strError\n";
472
473 foreach my $hashOneArg (@{$aryptHelps})
474 {
475 print "\n----------------------------\n";
476 print "Parameters: -".$hashOneArg->{"name"}."\n";
477 print "Type: ".$hashOneArg->{"type"}."\n";
478 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
479 print "Description: ".$hashOneArg->{"descrip"}."\n";
480 print "----------------------------\n";
481 }
482 }
483}
484
485sub OutputStart
486{
487 my ($strColName) = @_;
488 my $intPadding = 17 - length($strColName);
489
490 if ( $strOutputFormat eq "xml" ) {
491 print "<diffcol>\n";
492 } else {
493 print "+---------------------------------------------------------+\n";
494 print "| |\n";
495 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
496 print "| |\n";
497 print "+---------------------------------------------------------+\n\n";
498 }
499}
500
501sub OutputEnd
502{
503 my ($strColName,$aryptErrors) = @_;
504 my $intPadding = 12 - length($strColName);
505 if ( $strOutputFormat eq "xml" ) {
506 print "</diffcol>\n";
507 } else {
508 print "\n";
509 print "+---------------------------------------------------------+\n";
510 print "| |\n";
511 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
512 print "| |\n";
513 print "+---------------------------------------------------------+\n\n";
514 }
515
516 my $intTotalErrors = scalar(@{$aryptErrors});
517 if ( $strOutputFormat ne "xml" ) {
518 print "Checking completed, there is $intTotalErrors error(s) found.\n";
519 }
520
521 if($gv_blnErrorShow ne "off")
522 {
523 foreach my $strEachError (@{$aryptErrors})
524 {
525 if ( $strOutputFormat eq "xml" ) {
526 print "<error>";
527 print $strEachError;
528 print "</error>\n";
529 } else {
530 print "+---------------------------------------------------------+\n";
531 print "| Error |\n";
532 print "+---------------------------------------------------------+\n\n";
533 print "$strEachError\n\n";
534 }
535 }
536 }
537 else
538 {
539 if ( $strOutputFormat ne "xml" ) {
540 print "Use -eshow to show the error detail\n\n";
541 }
542 }
543}
544
545sub AlignPrint
546{
547 my ($strMainString,$strStatus,$intLevel) = @_;
548 my $intDot = 100 - length($strMainString) - length($strStatus);
549 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
550}
551
[27695]552
553# this function is only called on DocXMLFiles.
554# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
555# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
[28086]556sub isDocOrMETSXMLFileWindows
[27695]557{
558 my ($file_contents) = @_;
559
560 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
561
562 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
563 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
564
565 # for doc.xml:
566 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
[28086]567 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
568 $gsdlsourcefilename = $2;
[27695]569 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
570 return 1;
571 }
[28107]572 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
573 return 1;
[27695]574 }
575
576 return 0;
577}
578
[21711]579sub TestEach
580{
[27730]581 my ($strModel,$strTest,$intLevel,$strColName) = @_;
[21711]582 my @Errors = ();
583
584 $intLevel++;
585 if (-d $strModel && -d $strTest)
586 {
587 my @aryInModel = &diffutil::files_in_dir($strModel);
588 my @aryInTest = &diffutil::files_in_dir($strTest);
[27695]589
590 # Files to be skipped because they get generated on one OS but not the other
591 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
[28609]592 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
[27695]593 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
594 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
595
596 # Now check all remaining files in the folder exist in both model and test collections
[21711]597 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
598 my @aryCorrectFiles = @{$aryTwoPointers[1]};
599 @Errors = @{$aryTwoPointers[0]};
600
601 if(scalar(@Errors) == 0)
602 {
603 foreach my $strEachFile (@aryInModel)
604 {
[27536]605 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
606 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
[27695]607 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
[28609]608 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
[21711]609 {
[27730]610 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
[21711]611 }
612 else
613 {
614 if ( $strOutputFormat eq "xml" ) {
615 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
616 } else {
617 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
618 }
619 }
620 }
621 }
622 else
623 {
624 foreach my $strEachFile (@aryCorrectFiles)
625 {
[27536]626 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
627 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
[28609]628 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
[21711]629 {
[27730]630 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
[21711]631 }
632 }
633 }
634 if($intLevel == $gv_intVerbosity)
635 {
636 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
637 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
638 }
639 }
640 else
641 {
[28078]642 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
[28086]643 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
[27730]644
[27604]645 my $strResult;
646
[28078]647 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
[27604]648 # in case these don't appear in the same order between collections, since
649 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
650 # when they can show up as unnecessary diff 'errors'
[28078]651
652 my ($model_contents, $test_contents);
653
[28107]654 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
655 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
[28078]656
[27604]657 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
658 sysread(FIN, $model_contents, -s FIN);
659 close(FIN);
660 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
661 sysread(FIN, $test_contents, -s FIN);
662 close(FIN);
663
664 $model_contents =~ s/$ignore_line_re//g;
665 $test_contents =~ s/$ignore_line_re//g;
[27730]666
[28086]667
668 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
669 # before comparing a windows test with a linux model or vice-versa
[28172]670 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
671 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
[28086]672
673 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
674
675 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
676 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
[27695]677
[28086]678 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
679 $$win_contents =~ s@[\r]@@g;
680
681 # make all single windows slashes into single unix slashes
682 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
683 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
684 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
[27695]685
[28086]686 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
687 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
[27743]688
[28086]689 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
690 # $$win_contents =~ s@\r@\n@mg;
691
692 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
[27990]693 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
694 # containing solitary carriage returns instead of linefeed
695 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
[28008]696
697
698 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
699 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
700 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
701 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
[28086]702 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
[28019]703
704 # Advanced Beatles collection,
705 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
706 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
707 # Normalising to windows version for doing a diff
[28086]708 $$lin_contents =~ s@href=http:///@href=http://@g;
709 }
710 }
[27695]711
[28086]712 # processing particular to doc.xml
713 if($strModel =~ m/doc\.xml$/) {
714 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
715 # these tmpdirs are located inside the collection directory
716 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
717 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
[27766]718
[28086]719 # remove all absolute paths upto collect folder from <Metadata /> elements
720 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
721 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
[27766]722
[28086]723 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
724 # These tmpdirs are located inside the toplevel *greenstone* directory
725 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
726 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
727 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
728
729 if($test_contents =~ m@$tmpfile_regex@) {
730 # found a match, replace the tmp file name with "random", keeping the original file extension
731 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
732
733 my ($old_tmp_filename, $ext) = ($1, $2);
734 my $new_tmp_filename = "random";
735
736 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
737 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
738 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
739 if($5) {
740 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
741 } else { # OrigSource contains only the filename
742 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
743 }
744
745 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
746 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
747 if($5) {
748 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
749 } else { # OrigSource contains only the filename
750 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
751 }
[27766]752 }
[28086]753
754 } # finished special processing of doc.xml files
[27766]755
[28086]756
[29495]757 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
758 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
759 &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
760 &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
761# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
762# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
763# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
764# }
765 }
766
[28078]767
768 # now can diff the normalised versions of the doc.xml/docmets.xml files:
[27604]769 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
770
771 } else {
772 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
773 }
774
775 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
776 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
777 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
778
[21711]779 #$strResult = GeneralOutput($strResult);
780 if ( $strOutputFormat eq "xml" ) {
781 #
782 } else {
783 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
784 }
785 if ($strResult eq "")
786 {
787 if ( $strOutputFormat eq "xml" ) {
788 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
789 } else {
790 AlignPrint("Comparing File","Succeed",$intLevel);
791 }
792 }
793 else
794 {
[27730]795# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
796
[21711]797 my $strOutput = "Difference Report:\n$strResult\n";
798 if ( $strOutputFormat eq "xml" ) {
799 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
800 } else {
801 AlignPrint("Comparing File","Failed",$intLevel);
802 }
803
[27666]804 #$result=`file -b $strModel`; # linux specific test for binary file
805 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
[21711]806 if ( "$result" =~ "data" ) {
807 VobPrint( "These binary files differ", $intLevel );
808 } else {
809 VobPrint ( "$strOutput" , $intLevel);
810 }
811
812
813 if ( $strOutputFormat eq "xml" ) {
814 print "</message></file-comparison>";
815 }
816
817 if($gv_blnErrorStop ne "off") { exit; }
818 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
819 }
820 }
821
822 return @Errors;
823}
824
825
826sub FolderTesting
827{
828 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
829 my %hashCount = ();
830 my @Errors = ();
831 my @CorrectFiles = ();
832 my @TwoPointers = (\@Errors,\@CorrectFiles);
833
834 if ( $strOutputFormat eq "xml" ) {
835 #print "<folder-comparison location=\"$strModelFolder\">\n";
836 } else {
837 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
838 }
839
840 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
841 foreach my $strEachItem (@$aryptTest)
842 {
843 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
844 else {$hashCount{$strEachItem} = 'T';}
845 }
846
847 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
848 {
849 if ( $strOutputFormat eq "xml" ) {
850 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
851 } else {
852 AlignPrint("Folder Comparsion","Succeed",$intLevel);
853 }
854 return @TwoPointers;
855 }
856 else
857 {
858 if ( $strOutputFormat eq "xml" ) {
859 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
860 } else {
861 AlignPrint("Folder Comparsion","Failed",$intLevel);
862 }
863
864 foreach my $strEachItem (keys %hashCount)
865 {
866 if($hashCount{$strEachItem} ne 'B')
867 {
868 my $strOutput = "";
869 my $strReport = "";
870
871 if($hashCount{$strEachItem} eq 'M')
872 {
[27539]873 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
[21711]874 $strReport = "Difference Report: difference found at $strTestFolder";
875 }
876 elsif($hashCount{$strEachItem} eq 'T')
877 {
[27539]878 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
[21711]879 $strReport = "Difference Report: difference found at $strModelFolder";
880 }
881 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
882
883 VobPrint ("$strOutput\n",$intLevel);
884 $strOutput = $strOutput."\n\t".$strReport."\n";
885 push(@Errors,$strOutput);
886 }
887 else {push(@CorrectFiles,$strEachItem);}
888 }
889 if( $strOutputFormat eq "xml" ) {
890 print "</message></folder-comparison>";
891 }
892
893 return @TwoPointers;
894 }
895}
896
897sub VobPrint
898{
899 my ($strOutput, $intLevel) = @_;
900 my $strTab = "";
901 my $intTab = int($intLevel/2);
902 if($intLevel <= $gv_intVerbosity)
903 {
904 if($intLevel >= 1)
905 {
906 $strTab = "\t"x($intTab+1);
907 $strOutput =~ s/\n$//;
908 $strOutput =~ s/\n/\n$strTab/g;
909 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
910 }
911
912 if( $strOutputFormat eq "xml" ) {
913 $strOutput =~ s/&/&amp;/g;
[29432]914 $strOutput =~ s/</&amp;lt;/g;
915 $strOutput =~ s/>/&amp;gt;/g;
[21711]916 }
917
918 if ( length( $strOutput ) > 1000 ) {
[27969]919 $strOutput = substr( $strOutput, 0, 978);
920
[28102]921 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
922 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
[27969]923 if(defined $1 && $1) {
924 my $rest = $1;
[27990]925 if($rest =~ m/^a/) {
926 $strOutput =~ s@am?p?$@amp;@;
927 }
928 elsif($rest eq "g" || $rest eq "l") {
[27969]929 $strOutput .= "t;"; # close the known tag
930 }
[27990]931 elsif($rest eq "gt" || $rest eq "lt") {
[27969]932 $strOutput .= ";";
[27990]933 }
[27969]934 } else { # & on its own
935 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
936 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
937 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
938 }
939 }
940 $strOutput .= "... (output truncated)";
[21711]941 }
942
943
944 print $strTab.$strOutput."\n";
945 }
946}
947#----##
948
949
950#--Main System----------------------------
951#-----------------------------------------
952# Name: main
953# Perameters: arguments from command line
954# Pre-condition: testing will start by calling this main function.
955# Post-condition: output the test results for one or more collections.
956#-----------------------------------------
957sub main
958{
[29495]959 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
[21711]960 my $strProgName = $0;
961 my $intArgc = scalar(@ARGV);
962
963 #--System Arguments Setup
964 if (!parsargv::parse(\@ARGV,
965 'estop//off', \$strErrorStop,
966 'eshow//off', \$strErrorShow,
[29495]967 'debug', \$debugging,
[21711]968 'verbosity/\d+/1', \$intVerbosity,
969 'mode/[\w\-]+/all', \$strMode,
[28172]970 'output/[\w\-]+/text', \$strOutputFormat,
971 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
972 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
[21711]973 )) {
974 PrintUsage($strProgName);
975 die "\n";
976 }
977
978 if ($intArgc<1) {
979 PrintUsage($strProgName);
980 die "\n";
981 }
982
[28172]983 $gv_test_os = $test_os; # if not specified, defaults to "compute"
984 $gv_model_os = $model_os; # tends to be linux
985
[29495]986 $gv_blnDebugging = $debugging;
[21711]987 $gv_blnErrorStop = $strErrorStop;
988 $gv_blnErrorShow = $strErrorShow;
989 $gv_intVerbosity = $intVerbosity;
990 $gv_strMode = SetMode($strMode);
991
992 #----##
993
[29398]994# To find out what version of perl we're using
995 if( $strOutputFormat eq "xml" ) {
996 my $perloutput = `perl -v`;
997 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
[29448]998 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
[29398]999 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1000 }
1001
[21711]1002 #--Collection(s) Testing
1003 foreach $strColName (@ARGV)
1004 {
1005 my @ErrorsInEachCol;
[27536]1006 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1007 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
[21711]1008
1009 #--Output(Start)
1010 OutputStart($strColName);
1011 #----##
1012
1013 if(-e $strModelCol && -e $strTestCol )
1014 {
1015
1016 #--Individual Testing
1017 if ($gv_strMode eq "Individual")
1018 {
[27730]1019 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
[21711]1020 }
1021 #----##
1022
1023 #--Initial Testing
1024 elsif ($gv_strMode eq "Initial")
1025 {
1026 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1027 }
1028 #----##
1029
1030 #--Full Testing
1031 elsif ($gv_strMode eq "Full")
1032 {
1033 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1034 }
1035 #----##
1036
1037 #--Error Checking
1038 else
1039 {
1040 if ( $strOutputFormat eq "xml" ) {
1041 die "<error>Error occoured in main function</error>\n";
1042 } else {
1043 die "Error occoured in main function.\n";
1044 }
1045 }
1046 #----##
1047
1048 }
1049 else
1050 {
1051 if( $strOutputFormat eq "xml" ) {
1052 die "<error>Cannot find collection: $strColName</error>\n";
1053 } else {
1054 die "Error: cannot find collection: $strColName\n";
1055 }
1056 }
1057 #----##
1058
1059 #--Output(Results and Errors)
1060 OutputEnd($strColName,\@ErrorsInEachCol);
1061 #----##
1062
1063 }
1064}
1065#----##
1066
1067&main();
Note: See TracBrowser for help on using the repository browser.