source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 36807

Last change on this file since 36807 was 36807, checked in by anupama, 20 months ago

We used to run diffcol as a nightly task only for GS2. Commit 36655 was the first stage of getting diffcol to work for GS3, but skipped a lot of important code branches (like comparing the index\text\j/gdb files) in order to fix up the easier parts of the code. Now that I think the remainder of the diffcol scripts have been got to work with diffcol for GS3, where the index\text\flatdb files are compared and diffcol works for them, I can commit the important changes as well as commented out debugging statements made to the diffcol scripts that get the full diffcol code to work for GS3 diffcol. I will recommit again after removing the debugging statements. And I still need to do a full local diffcol run again, as well as testing if diffcol still works after locally undoing my sort field changes to some GS3 model cols (the recent commits to Tudor, Word-PDF, Images-GPS and Multimedia collections) to see if Dr Bainbridge's PERL_HASH_SEED env var addition fixes all of those collections diffcol failures, making the extra sorting redundant. In that case, I will recommit those model collections after updating their col configurations to not do the extra sorting.

File size: 37.7 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnDebugging = 0;
61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
126 my ($strModelCol,$strTestCol,$strColName) = @_;
127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
137 $intNumberOfErrors = scalar(@Errors);
138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193
194 ## TODO: Added to get GS3 diffcol to work, as there is no build/collect.cfg file in GS3
195 ## Now that other GS3 diffcol errors have been fixed (except for Word-PDF-Enhanced collection,
196 ## where the model-col needs to be built & committed from a Win machine),
197 ## need to comment out the following 3 newly added lines and fix any remaining issues
198# if($ENV{'GSVERSION'} eq "3") {
199# return @Errors;
200# }
201 if($ENV{'GSVERSION'} eq "3") {
202 # Testing the build.cfg
203 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
204 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
205 #if($ENV{'GSVERSION'} eq "3") {
206 # $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","buildConfig.xml");
207 # $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","buildConfig.xml");
208 #}
209
210 if(-e $strModelBcfg && -e $strTestBcfg)
211 {
212 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
213 if($strBcfgError ne "")
214 {
215 if( $strOutputFormat eq "xml" ) {
216 print "<build-cfg succeeded=\"no\">\n<message>";
217 } else {
218 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
219 }
220
221 VobPrint ("$strBcfgError",$intLevel);
222 if( $strOutputFormat eq "xml" ) {
223 print "</message></build-cfg>";
224 }
225
226 $strBcfgError = "$strBcfgError";
227 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
228 push(@Errors,$strBcfgError);
229 }
230 else
231 {
232 if( $strOutputFormat eq "xml" ) {
233 print "<build-cfg succeeded=\"yes\"/>";
234 } else {
235 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
236 }
237 }
238 }
239 else
240 {
241 my $strErrorColName;
242 my $strBcfgError;
243
244 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
245 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
246
247 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
248 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
249 VobPrint ("$strBcfgError\n",$intLevel);
250 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
251
252 push(@Errors,$strBcfgError);
253 }
254 VobPrint ("\n",$intLevel);
255
256 # Testing the collect.cfg
257
258 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
259 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
260 #if($ENV{'GSVERSION'} eq "3") {
261 # $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collectConfig.xml");
262 # $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collectConfig.xml");
263 #}
264
265 if(-e $strModelCcfg && -e $strTestCcfg)
266 {
267 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
268 if($strCcfgError ne "")
269 {
270 if( $strOutputFormat eq "xml" ) {
271 print "<collect-cfg succeeded=\"no\"><message>";
272 } else {
273 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
274 }
275
276 VobPrint ("$strCcfgError",$intLevel);
277
278 if( $strOutputFormat eq "xml" ) {
279 print "</message></collect-cfg>";
280 }
281
282 $strCcfgError = "$strCcfgError";
283 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
284 push(@Errors,$strCcfgError);
285 }
286 else
287 {
288 if( $strOutputFormat eq "xml" ) {
289 print "<collect-cfg succeeded=\"yes\"/>";
290 } else {
291 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
292 }
293 }
294 }
295 else
296 {
297 my $strErrorColName;
298 my $strCcfgError;
299
300 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
301 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
302
303 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
304 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
305 VobPrint ("$strCcfgError\n",$intLevel);
306 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
307
308 push(@Errors,$strCcfgError);
309 }
310
311 VobPrint ("\n",$intLevel);
312 }
313 # Testing databases
314
315 # index
316 my $flat_db_ext = ".gdb";
317
318 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
319 if(!(-e $strModelGdb)){ # try jdb
320 $flat_db_ext = ".jdb";
321 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
322 }
323
324 #print STDERR "*********** USING DB EXTENSION: $flat_db_ext\n";
325 #print STDERR "@@@ PERL_PERTURB_KEYS: ".$ENV{'PERL_PERTURB_KEYS'}."\n";
326
327 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName$flat_db_ext");
328 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
329 if($strGdbError) {
330 push(@Errors,$strGdbError);
331 }
332
333 # archives
334 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc$flat_db_ext");
335 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc$flat_db_ext");
336 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
337 if($strGdbError) {
338 push(@Errors,$strGdbError);
339 }
340
341 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src$flat_db_ext");
342 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src$flat_db_ext");
343 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
344 if($strGdbError) {
345 push(@Errors,$strGdbError);
346 }
347
348 VobPrint ("\n",$intLevel);
349
350 return @Errors;
351}
352
353
354# At present handles gdbm - need to expand to allow for jdbm and other db types
355sub GdbDiff
356{
357 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
358
359 my $strGdbError = 0;
360
361 if(-e $strModelGdb && -e $strTestGdb)
362 {
363 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
364
365# $gv_blnDebugging = 1;
366
367 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
368# $gv_blnDebugging = 0;
369
370 if($strGdbError ne "")
371 {
372 if( $strOutputFormat eq "xml" ) {
373 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
374 } else {
375 AlignPrint("Database Comparsion Result","Failed",$intLevel);
376 }
377 VobPrint ("$strGdbError\n",$intLevel);
378
379 if( $strOutputFormat eq "xml" ) {
380 print "</message></database>";
381 }
382
383 $strGdbError = "$strGdbError";
384 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
385 #push(@Errors,$strGdbError);
386 }
387 else
388 {
389 if( $strOutputFormat eq "xml" ) {
390 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
391 } else {
392 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
393 }
394 }
395 }
396 else
397 {
398 my $strErrorColName;
399
400 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
401 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
402
403 AlignPrint("Database Comparsion Result","Failed",$intLevel);
404
405 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
406 VobPrint ("$strGdbError\n",$intLevel);
407
408 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
409
410 }
411
412 return $strGdbError;
413}
414
415sub FullTest
416{
417 my ($strModelCol,$strTestCol,$strColName) = @_;
418 my @Errors = ();
419 my $intLevel = 0;
420 my $intNumberDiffs = 0;
421
422 # <Initial Test>
423 if( $strOutputFormat eq "xml" ) {
424 #print "<initial-test>";
425 } else {
426 VobPrint("Initial Testing Start\n",$intLevel);
427 }
428
429 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
430 $intNumberDiffs = scalar(@Errors);
431
432 if( $strOutputFormat eq "xml" ) {
433 #print "</initial-test>";
434 } else {
435 VobPrint("Initial Testing End\n",$intLevel);
436 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
437 VobPrint("\n",$intLevel);
438 }
439 # </Initial Test>
440
441 # <Detailed Test>
442 if( $strOutputFormat eq "xml" ) {
443 #print "<detailed-test>";
444 } else {
445 VobPrint("Detail Testing Start\n",$intLevel);
446 }
447 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
448 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
449
450 if( $strOutputFormat eq "xml" ) {
451 #print "</detailed-test>";
452 } else {
453 VobPrint("Detail Testing End\n",$intLevel);
454 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
455 }
456 # </Detailed Test>
457
458 return @Errors;
459}
460#----##
461
462
463#--Other System Utilities
464sub PrintUsage
465{
466 my ($strProgName) = @_;
467 if ( $strOutputFormat eq "xml" ) {
468 print "<error>usage incorrect</error>\n";
469 } else {
470 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
471 }
472 Help("Error: used incorrect parameters");
473}
474
475sub Help
476{
477 my ($strError) = @_;
478 my $aryptHelps =
479 [ { 'name' => "verbosity",
480 'type' => "scale",
481 'argu' => "a integer" ,
482 'descrip' => "this parameter setup the verbosity of the testing result"},
483 { 'name' => "mode",
484 'type' => "option",
485 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
486 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
487 { 'name' => "estop",
488 'type' => "flag",
489 'argu' => "NULL" ,
490 'descrip' => "Set then system will stop once it meets an error"},
491 { 'name' => "eshow",
492 'type' => "flag",
493 'argu' => "NULL" ,
494 'descrip' => "Set then system will show the error summary"},
495 { 'name' => "debug",
496 'type' => "flag",
497 'argu' => "NULL" ,
498 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
499 ];
500
501
502 if ( $strOutputFormat ne "xml" ) {
503 print "$strError\n";
504
505 foreach my $hashOneArg (@{$aryptHelps})
506 {
507 print "\n----------------------------\n";
508 print "Parameters: -".$hashOneArg->{"name"}."\n";
509 print "Type: ".$hashOneArg->{"type"}."\n";
510 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
511 print "Description: ".$hashOneArg->{"descrip"}."\n";
512 print "----------------------------\n";
513 }
514 }
515}
516
517sub OutputStart
518{
519 my ($strColName) = @_;
520 my $intPadding = 17 - length($strColName);
521
522 if ( $strOutputFormat eq "xml" ) {
523 print "<diffcol>\n";
524 } else {
525 print "+---------------------------------------------------------+\n";
526 print "| |\n";
527 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
528 print "| |\n";
529 print "+---------------------------------------------------------+\n\n";
530 }
531}
532
533sub OutputEnd
534{
535 my ($strColName,$aryptErrors) = @_;
536 my $intPadding = 12 - length($strColName);
537 if ( $strOutputFormat eq "xml" ) {
538 print "</diffcol>\n";
539 } else {
540 print "\n";
541 print "+---------------------------------------------------------+\n";
542 print "| |\n";
543 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
544 print "| |\n";
545 print "+---------------------------------------------------------+\n\n";
546 }
547
548 my $intTotalErrors = scalar(@{$aryptErrors});
549 if ( $strOutputFormat ne "xml" ) {
550 print "Checking completed, there is $intTotalErrors error(s) found.\n";
551 }
552
553 if($gv_blnErrorShow ne "off")
554 {
555 foreach my $strEachError (@{$aryptErrors})
556 {
557 if ( $strOutputFormat eq "xml" ) {
558 print "<error>";
559 print $strEachError;
560 print "</error>\n";
561 } else {
562 print "+---------------------------------------------------------+\n";
563 print "| Error |\n";
564 print "+---------------------------------------------------------+\n\n";
565 print "$strEachError\n\n";
566 }
567 }
568 }
569 else
570 {
571 if ( $strOutputFormat ne "xml" ) {
572 print "Use -eshow to show the error detail\n\n";
573 }
574 }
575}
576
577sub AlignPrint
578{
579 my ($strMainString,$strStatus,$intLevel) = @_;
580 my $intDot = 100 - length($strMainString) - length($strStatus);
581 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
582}
583
584
585# this function is only called on DocXMLFiles.
586# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
587# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
588sub isDocOrMETSXMLFileWindows
589{
590 my ($file_contents) = @_;
591
592 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
593
594 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
595 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
596
597 # for doc.xml:
598 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
599 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
600 $gsdlsourcefilename = $2;
601 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
602 return 1;
603 }
604 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
605 return 1;
606 }
607
608 return 0;
609}
610
611sub TestEach
612{
613 my ($strModel,$strTest,$intLevel,$strColName) = @_;
614 my @Errors = ();
615
616 $intLevel++;
617 if (-d $strModel && -d $strTest)
618 {
619 my @aryInModel = &diffutil::files_in_dir($strModel);
620 my @aryInTest = &diffutil::files_in_dir($strTest);
621
622 # Files to be skipped because they get generated on one OS but not the other
623 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
624 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
625 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
626 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
627
628 # Now check all remaining files in the folder exist in both model and test collections
629 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
630 my @aryCorrectFiles = @{$aryTwoPointers[1]};
631 @Errors = @{$aryTwoPointers[0]};
632
633 if(scalar(@Errors) == 0)
634 {
635 foreach my $strEachFile (@aryInModel)
636 {
637 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
638 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
639 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
640 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
641 {
642 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
643 }
644 else
645 {
646 if ( $strOutputFormat eq "xml" ) {
647 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
648 } else {
649 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
650 }
651 }
652 }
653 }
654 else
655 {
656 foreach my $strEachFile (@aryCorrectFiles)
657 {
658 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
659 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
660 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
661 {
662 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
663 }
664 }
665 }
666 if($intLevel == $gv_intVerbosity)
667 {
668 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
669 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
670 }
671 }
672 else
673 {
674 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
675 my $ignore_line_re = "<(.*?:)?(?:M|m)etadata name=\"(buildDate|earliestDatestamp|lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ex.File.FileAccessDate|ex.File.FileInodeChangeDate|ex.File.FileCreateDate|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?(?:M|m)etadata>\\s*\\n*";
676
677 my $strResult;
678
679 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
680 # in case these don't appear in the same order between collections, since
681 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
682 # when they can show up as unnecessary diff 'errors'
683
684 my ($model_contents, $test_contents);
685
686 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
687 if($strModel =~ m/buildConfig\.xml$/ || $strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
688
689 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
690 sysread(FIN, $model_contents, -s FIN);
691 close(FIN);
692 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
693 sysread(FIN, $test_contents, -s FIN);
694 close(FIN);
695
696 $model_contents =~ s/$ignore_line_re//g;
697 $test_contents =~ s/$ignore_line_re//g;
698
699
700 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
701 # before comparing a windows test with a linux model or vice-versa
702 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
703 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
704
705 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
706
707 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
708 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
709
710 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
711 $$win_contents =~ s@[\r]@@g;
712
713 # make all single windows slashes into single unix slashes
714 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
715 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
716 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
717
718 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
719 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
720
721 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
722 # $$win_contents =~ s@\r@\n@mg;
723
724 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
725 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
726 # containing solitary carriage returns instead of linefeed
727 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
728
729
730 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
731 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
732 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
733 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
734 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
735
736 # Advanced Beatles collection,
737 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
738 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
739 # Normalising to windows version for doing a diff
740 $$lin_contents =~ s@href=http:///@href=http://@g;
741 }
742 }
743
744 # remove all absolute paths upto collect folder from <Metadata /> elements
745 if($strModel =~ m/docmets\.xml$/) {
746 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
747 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
748 }
749
750 # processing particular to doc.xml
751 if($strModel =~ m/doc\.xml$/) {
752 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
753 # these tmpdirs are located inside the collection directory
754 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
755 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
756
757 # remove all absolute paths upto collect folder from <Metadata /> elements
758 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
759 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
760
761 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
762 # These tmpdirs are located inside the toplevel *greenstone* directory
763 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
764 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
765 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
766
767 if($test_contents =~ m@$tmpfile_regex@) {
768 # found a match, replace the tmp file name with "random", keeping the original file extension
769 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
770
771 my ($old_tmp_filename, $ext) = ($1, $2);
772 my $new_tmp_filename = "random";
773
774 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
775 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
776 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
777 if($5) {
778 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
779 } else { # OrigSource contains only the filename
780 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
781 }
782
783 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
784 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
785 if($5) {
786 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
787 } else { # OrigSource contains only the filename
788 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
789 }
790 }
791
792 } # finished special processing of doc.xml files
793
794
795 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
796 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
797 &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
798 &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
799# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
800# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
801# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
802# }
803 }
804
805
806 # now can diff the normalised versions of the doc.xml/docmets.xml files:
807 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
808
809 } else {
810 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
811 }
812
813 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
814 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
815 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
816
817 #$strResult = GeneralOutput($strResult);
818 if ( $strOutputFormat eq "xml" ) {
819 #
820 } else {
821 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
822 }
823 if ($strResult eq "")
824 {
825 if ( $strOutputFormat eq "xml" ) {
826 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
827 } else {
828 AlignPrint("Comparing File","Succeed",$intLevel);
829 }
830 }
831 else
832 {
833# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
834
835 my $strOutput = "Difference Report:\n$strResult\n";
836 if ( $strOutputFormat eq "xml" ) {
837 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
838 } else {
839 AlignPrint("Comparing File","Failed",$intLevel);
840 }
841
842 #$result=`file -b $strModel`; # linux specific test for binary file
843 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
844 if ( "$result" =~ "data" ) {
845 VobPrint( "These binary files differ", $intLevel );
846 } else {
847 VobPrint ( "$strOutput" , $intLevel);
848 }
849
850
851 if ( $strOutputFormat eq "xml" ) {
852 print "</message></file-comparison>";
853 }
854
855 if($gv_blnErrorStop ne "off") { exit; }
856 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
857 }
858 }
859
860 return @Errors;
861}
862
863
864sub FolderTesting
865{
866 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
867 my %hashCount = ();
868 my @Errors = ();
869 my @CorrectFiles = ();
870 my @TwoPointers = (\@Errors,\@CorrectFiles);
871
872 if ( $strOutputFormat eq "xml" ) {
873 #print "<folder-comparison location=\"$strModelFolder\">\n";
874 } else {
875 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
876 }
877
878 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
879 foreach my $strEachItem (@$aryptTest)
880 {
881 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
882 else {$hashCount{$strEachItem} = 'T';}
883 }
884
885 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
886 {
887 if ( $strOutputFormat eq "xml" ) {
888 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
889 } else {
890 AlignPrint("Folder Comparsion","Succeed",$intLevel);
891 }
892 return @TwoPointers;
893 }
894 else
895 {
896 if ( $strOutputFormat eq "xml" ) {
897 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
898 } else {
899 AlignPrint("Folder Comparsion","Failed",$intLevel);
900 }
901
902 foreach my $strEachItem (keys %hashCount)
903 {
904 if($hashCount{$strEachItem} ne 'B')
905 {
906 my $strOutput = "";
907 my $strReport = "";
908
909 if($hashCount{$strEachItem} eq 'M')
910 {
911 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
912 $strReport = "Difference Report: difference found at $strTestFolder";
913 }
914 elsif($hashCount{$strEachItem} eq 'T')
915 {
916 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
917 $strReport = "Difference Report: difference found at $strModelFolder";
918 }
919 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
920
921 VobPrint ("$strOutput\n",$intLevel);
922 $strOutput = $strOutput."\n\t".$strReport."\n";
923 push(@Errors,$strOutput);
924 }
925 else {push(@CorrectFiles,$strEachItem);}
926 }
927 if( $strOutputFormat eq "xml" ) {
928 print "</message></folder-comparison>";
929 }
930
931 return @TwoPointers;
932 }
933}
934
935sub VobPrint
936{
937 my ($strOutput, $intLevel) = @_;
938 my $strTab = "";
939 my $intTab = int($intLevel/2);
940 if($intLevel <= $gv_intVerbosity)
941 {
942 if($intLevel >= 1)
943 {
944 $strTab = "\t"x($intTab+1);
945 $strOutput =~ s/\n$//;
946 $strOutput =~ s/\n/\n$strTab/g;
947 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
948 }
949
950 if( $strOutputFormat eq "xml" ) {
951 $strOutput =~ s/&/&amp;/g;
952 $strOutput =~ s/</&amp;lt;/g;
953 $strOutput =~ s/>/&amp;gt;/g;
954 }
955
956 if ( length( $strOutput ) > 1000 ) {
957 $strOutput = substr( $strOutput, 0, 978);
958
959 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
960 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
961 if(defined $1 && $1) {
962 my $rest = $1;
963 if($rest =~ m/^a/) {
964 $strOutput =~ s@am?p?$@amp;@;
965 }
966 elsif($rest eq "g" || $rest eq "l") {
967 $strOutput .= "t;"; # close the known tag
968 }
969 elsif($rest eq "gt" || $rest eq "lt") {
970 $strOutput .= ";";
971 }
972 } else { # & on its own
973 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
974 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
975 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
976 }
977 }
978 $strOutput .= "... (output truncated)";
979 }
980
981
982 print $strTab.$strOutput."\n";
983 }
984}
985#----##
986
987
988#--Main System----------------------------
989#-----------------------------------------
990# Name: main
991# Perameters: arguments from command line
992# Pre-condition: testing will start by calling this main function.
993# Post-condition: output the test results for one or more collections.
994#-----------------------------------------
995sub main
996{
997 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
998 my $strProgName = $0;
999 my $intArgc = scalar(@ARGV);
1000
1001 #--System Arguments Setup
1002 if (!parsargv::parse(\@ARGV,
1003 'estop//off', \$strErrorStop,
1004 'eshow//off', \$strErrorShow,
1005 'debug', \$debugging,
1006 'verbosity/\d+/1', \$intVerbosity,
1007 'mode/[\w\-]+/all', \$strMode,
1008 'output/[\w\-]+/text', \$strOutputFormat,
1009 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
1010 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
1011 )) {
1012 PrintUsage($strProgName);
1013 die "\n";
1014 }
1015
1016 if ($intArgc<1) {
1017 PrintUsage($strProgName);
1018 die "\n";
1019 }
1020
1021 $gv_test_os = $test_os; # if not specified, defaults to "compute"
1022 $gv_model_os = $model_os; # tends to be linux
1023
1024 #print STDERR "@@@@@ test_os: $test_os\n";
1025 #print STDERR "@@@@@ model_os: $model_os\n";
1026
1027 $gv_blnDebugging = $debugging;
1028 $gv_blnErrorStop = $strErrorStop;
1029 $gv_blnErrorShow = $strErrorShow;
1030 $gv_intVerbosity = $intVerbosity;
1031 $gv_strMode = SetMode($strMode);
1032
1033 #----##
1034
1035# To find out what version of perl we're using
1036 if( $strOutputFormat eq "xml" ) {
1037 my $perloutput = `perl -v`;
1038 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1039 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1040 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1041 }
1042
1043 #--Collection(s) Testing
1044 foreach $strColName (@ARGV)
1045 {
1046 my @ErrorsInEachCol;
1047 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1048 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1049
1050 #--Output(Start)
1051 OutputStart($strColName);
1052 #----##
1053
1054 if(-e $strModelCol && -e $strTestCol )
1055 {
1056
1057 #--Individual Testing
1058 if ($gv_strMode eq "Individual")
1059 {
1060 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1061 }
1062 #----##
1063
1064 #--Initial Testing
1065 elsif ($gv_strMode eq "Initial")
1066 {
1067 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1068 }
1069 #----##
1070
1071 #--Full Testing
1072 elsif ($gv_strMode eq "Full")
1073 {
1074 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1075 }
1076 #----##
1077
1078 #--Error Checking
1079 else
1080 {
1081 if ( $strOutputFormat eq "xml" ) {
1082 die "<error>Error occoured in main function</error>\n";
1083 } else {
1084 die "Error occoured in main function.\n";
1085 }
1086 }
1087 #----##
1088
1089 }
1090 else
1091 {
1092 if( $strOutputFormat eq "xml" ) {
1093 die "<error>Cannot find collection: $strColName</error>\n";
1094 } else {
1095 die "Error: cannot find collection: $strColName\n";
1096 }
1097 }
1098 #----##
1099
1100 #--Output(Results and Errors)
1101 OutputEnd($strColName,\@ErrorsInEachCol);
1102 #----##
1103
1104 }
1105}
1106#----##
1107
1108&main();
Note: See TracBrowser for help on using the repository browser.