source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 36655

Last change on this file since 36655 was 36655, checked in by anupama, 21 months ago

First stage of getting GS3 diffcol to work. 1. For now skipping absence of build.cfg and collect.cfg to fix any remaining issues. 2. The gs3-model-collect regenerated on Linux a few days back didn't contain the COLLNAME-inf-tmp.(gdb|jdb|lg) files whereas the test colls on this Windows machine do. So adding it to list of files to ignore because they only get generated on some OS. 3. buildConfig.xml contains metadata elements with lowercase m for metadata, and for buildConfig.xml need to ignore 2 more time-sensitive fields when diffing: buildDate and earliestDatestamp, which are now added to list of fieldnames to ignore, just as buildConfig.xml is added to list of files to process in ways somewhat similar to doc.xml (i.e. comparing some metadata fields, and ignoring time-sensitive ones or other arbitrary ones). 4. A fresh SVN checkout of GS3 shows it doesn't come with openoffice and doesn't use it for building GS3 model collections. And there's no libreoffice on this Windows diffcol test machine (where the test-colls are built and diffed against model-colls prepared on linux), so the open-office extension can not be used on here anyway. 4. With these settings, but see the potentially big hack in step 1, all but Word-PDF-Enhanced collection pass diffcol testing. The Word-PDF-Enhanced collection is the windows_scripting collection and so the model-collection should have been regenerated on Windows, as it originally was for this collection (see commit r30029).

File size: 37.1 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56 $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57 $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnDebugging = 0;
61$gv_blnErrorStop = "false";
62$gv_blnErrorShow = "false";
63$gv_intVerbosity = 0;
64$gv_strMode = "Full";
65$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
66
67%gv_IndivList = ("archives" => 0,
68 "etc" => 0,
69 "images" => 0,
70 "building" => 0,
71 "import" => 0,
72 "index" => 0,
73 "log" => 0,
74 "metadata" => 0,
75 "perllib" => 0,
76 "temp" => 0);
77#----##
78
79#--System Setup---------------------------
80sub SetMode
81{
82 my ($strModeList) = @_;
83 $strModeList =~ s/\|/ /g;
84 my @Modes = split(" ",$strModeList);
85
86 my $blnIndividual = "true";
87 my $blnInitial = "false";
88 my $blnFull = "false";
89
90
91 foreach $strEachMode (@Modes)
92 {
93 if($strEachMode eq "all")
94 {
95 $blnFull = "true";
96 $blnIndividual = "false";
97 }
98 elsif($strEachMode eq "init")
99 {
100 $blnInitial = "true";
101 $blnIndividual = "false";
102 }
103 else
104 {
105 if(defined $gv_IndivList{$strEachMode})
106 {
107 $gv_IndivList{$strEachMode} = 1;
108 }
109 else
110 {
111 die Help("Error: used undefined mode");
112 }
113 }
114 }
115
116 if($blnFull eq "true") {return "Full";}
117 elsif($blnInitial eq "true") {return "Initial";}
118 elsif($blnIndividual eq "true") {return "Individual";}
119 else {die "Error occured in function SetMode!!\n";}
120}
121#----##
122
123#--System Process-------------------------
124sub IndivTest
125{
126 my ($strModelCol,$strTestCol,$strColName) = @_;
127 my @Errors = ();
128 my $intNumberOfErrors = 0;
129 foreach $strEachFolder (keys %gv_IndivList)
130 {
131 if($gv_IndivList{$strEachFolder} == 1)
132 {
133
134 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
135 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
136 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
137 $intNumberOfErrors = scalar(@Errors);
138 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
139 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
140 VobPrint("End Comparing \"$strEachFolder\"\n",0);
141 VobPrint("Difference Found: $intNumberOfErrors\n",0);
142 VobPrint ("\n",0);
143 }
144 }
145 return @Errors;
146}
147
148sub InitTest
149{
150
151 my ($strModelCol,$strTestCol,$strColName) = @_;
152 my $intLevel = 1;
153 my @Errors;
154
155 # Testing Log files
156# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
157# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
158#
159# if(-e $strModelLog && -e $strTestLog)
160# {
161# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
162# if($strLogError ne "")
163# {
164# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
165# VobPrint ("$strLogError\n",$intLevel);
166#
167# $strLogError = "$strLogError";
168# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
169# push(@Errors,$strLogError);
170# }
171# else
172# {
173# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
174# }
175# }
176# else
177# {
178# my $strErrorColName;
179# my $strLogError;
180#
181# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
182# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
183#
184# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
185# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
186# VobPrint ("$strLogError\n",$intLevel);
187# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
188#
189# push(@Errors,$strLogError);
190# }
191# VobPrint ("\n",$intLevel);
192
193
194 ## TODO: Added to get GS3 diffcol to work, as there is no build/collect.cfg file in GS3
195 ## Now that other GS3 diffcol errors have been fixed (except for Word-PDF-Enhanced collection,
196 ## where the model-col needs to be built & committed from a Win machine),
197 ## need to comment out the following 3 newly added lines and fix any remaining issues
198 if($ENV{'GSVERSION'} eq "3") {
199 return @Errors;
200 }
201
202 # Testing the build.cfg
203 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
204 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
205 #if($ENV{'GSVERSION'} eq "3") {
206 # $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","buildConfig.xml");
207 # $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","buildConfig.xml");
208 #}
209
210 if(-e $strModelBcfg && -e $strTestBcfg)
211 {
212 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
213 if($strBcfgError ne "")
214 {
215 if( $strOutputFormat eq "xml" ) {
216 print "<build-cfg succeeded=\"no\">\n<message>";
217 } else {
218 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
219 }
220
221 VobPrint ("$strBcfgError",$intLevel);
222 if( $strOutputFormat eq "xml" ) {
223 print "</message></build-cfg>";
224 }
225
226 $strBcfgError = "$strBcfgError";
227 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
228 push(@Errors,$strBcfgError);
229 }
230 else
231 {
232 if( $strOutputFormat eq "xml" ) {
233 print "<build-cfg succeeded=\"yes\"/>";
234 } else {
235 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
236 }
237 }
238 }
239 else
240 {
241 my $strErrorColName;
242 my $strBcfgError;
243
244 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
245 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
246
247 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
248 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
249 VobPrint ("$strBcfgError\n",$intLevel);
250 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
251
252 push(@Errors,$strBcfgError);
253 }
254 VobPrint ("\n",$intLevel);
255
256 # Testing the collect.cfg
257
258 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
259 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
260 #if($ENV{'GSVERSION'} eq "3") {
261 # $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collectConfig.xml");
262 # $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collectConfig.xml");
263 #}
264
265 if(-e $strModelCcfg && -e $strTestCcfg)
266 {
267 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
268 if($strCcfgError ne "")
269 {
270 if( $strOutputFormat eq "xml" ) {
271 print "<collect-cfg succeeded=\"no\"><message>";
272 } else {
273 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
274 }
275
276 VobPrint ("$strCcfgError",$intLevel);
277
278 if( $strOutputFormat eq "xml" ) {
279 print "</message></collect-cfg>";
280 }
281
282 $strCcfgError = "$strCcfgError";
283 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
284 push(@Errors,$strCcfgError);
285 }
286 else
287 {
288 if( $strOutputFormat eq "xml" ) {
289 print "<collect-cfg succeeded=\"yes\"/>";
290 } else {
291 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
292 }
293 }
294 }
295 else
296 {
297 my $strErrorColName;
298 my $strCcfgError;
299
300 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
301 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
302
303 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
304 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
305 VobPrint ("$strCcfgError\n",$intLevel);
306 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
307
308 push(@Errors,$strCcfgError);
309 }
310
311 VobPrint ("\n",$intLevel);
312
313 # Testing databases
314
315 # index
316 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
317 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
318 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
319 if($strGdbError) {
320 push(@Errors,$strGdbError);
321 }
322
323 # archives
324 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
325 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
326 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
327 if($strGdbError) {
328 push(@Errors,$strGdbError);
329 }
330
331 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
332 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
333 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
334 if($strGdbError) {
335 push(@Errors,$strGdbError);
336 }
337
338 VobPrint ("\n",$intLevel);
339
340 return @Errors;
341}
342
343
344# At present handles gdbm - need to expand to allow for jdbm and other db types
345sub GdbDiff
346{
347 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
348
349 my $strGdbError = 0;
350
351 if(-e $strModelGdb && -e $strTestGdb)
352 {
353 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
354 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
355 if($strGdbError ne "")
356 {
357 if( $strOutputFormat eq "xml" ) {
358 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
359 } else {
360 AlignPrint("Database Comparsion Result","Failed",$intLevel);
361 }
362 VobPrint ("$strGdbError\n",$intLevel);
363
364 if( $strOutputFormat eq "xml" ) {
365 print "</message></database>";
366 }
367
368 $strGdbError = "$strGdbError";
369 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
370 #push(@Errors,$strGdbError);
371 }
372 else
373 {
374 if( $strOutputFormat eq "xml" ) {
375 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
376 } else {
377 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
378 }
379 }
380 }
381 else
382 {
383 my $strErrorColName;
384
385 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
386 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
387
388 AlignPrint("Database Comparsion Result","Failed",$intLevel);
389
390 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
391 VobPrint ("$strGdbError\n",$intLevel);
392
393 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
394
395 }
396
397 return $strGdbError;
398}
399
400sub FullTest
401{
402 my ($strModelCol,$strTestCol,$strColName) = @_;
403 my @Errors = ();
404 my $intLevel = 0;
405 my $intNumberDiffs = 0;
406
407 # <Initial Test>
408 if( $strOutputFormat eq "xml" ) {
409 #print "<initial-test>";
410 } else {
411 VobPrint("Initial Testing Start\n",$intLevel);
412 }
413
414 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
415 $intNumberDiffs = scalar(@Errors);
416
417 if( $strOutputFormat eq "xml" ) {
418 #print "</initial-test>";
419 } else {
420 VobPrint("Initial Testing End\n",$intLevel);
421 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
422 VobPrint("\n",$intLevel);
423 }
424 # </Initial Test>
425
426 # <Detailed Test>
427 if( $strOutputFormat eq "xml" ) {
428 #print "<detailed-test>";
429 } else {
430 VobPrint("Detail Testing Start\n",$intLevel);
431 }
432 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
433 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
434
435 if( $strOutputFormat eq "xml" ) {
436 #print "</detailed-test>";
437 } else {
438 VobPrint("Detail Testing End\n",$intLevel);
439 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
440 }
441 # </Detailed Test>
442
443 return @Errors;
444}
445#----##
446
447
448#--Other System Utilities
449sub PrintUsage
450{
451 my ($strProgName) = @_;
452 if ( $strOutputFormat eq "xml" ) {
453 print "<error>usage incorrect</error>\n";
454 } else {
455 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
456 }
457 Help("Error: used incorrect parameters");
458}
459
460sub Help
461{
462 my ($strError) = @_;
463 my $aryptHelps =
464 [ { 'name' => "verbosity",
465 'type' => "scale",
466 'argu' => "a integer" ,
467 'descrip' => "this parameter setup the verbosity of the testing result"},
468 { 'name' => "mode",
469 'type' => "option",
470 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
471 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
472 { 'name' => "estop",
473 'type' => "flag",
474 'argu' => "NULL" ,
475 'descrip' => "Set then system will stop once it meets an error"},
476 { 'name' => "eshow",
477 'type' => "flag",
478 'argu' => "NULL" ,
479 'descrip' => "Set then system will show the error summary"},
480 { 'name' => "debug",
481 'type' => "flag",
482 'argu' => "NULL" ,
483 'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
484 ];
485
486
487 if ( $strOutputFormat ne "xml" ) {
488 print "$strError\n";
489
490 foreach my $hashOneArg (@{$aryptHelps})
491 {
492 print "\n----------------------------\n";
493 print "Parameters: -".$hashOneArg->{"name"}."\n";
494 print "Type: ".$hashOneArg->{"type"}."\n";
495 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
496 print "Description: ".$hashOneArg->{"descrip"}."\n";
497 print "----------------------------\n";
498 }
499 }
500}
501
502sub OutputStart
503{
504 my ($strColName) = @_;
505 my $intPadding = 17 - length($strColName);
506
507 if ( $strOutputFormat eq "xml" ) {
508 print "<diffcol>\n";
509 } else {
510 print "+---------------------------------------------------------+\n";
511 print "| |\n";
512 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
513 print "| |\n";
514 print "+---------------------------------------------------------+\n\n";
515 }
516}
517
518sub OutputEnd
519{
520 my ($strColName,$aryptErrors) = @_;
521 my $intPadding = 12 - length($strColName);
522 if ( $strOutputFormat eq "xml" ) {
523 print "</diffcol>\n";
524 } else {
525 print "\n";
526 print "+---------------------------------------------------------+\n";
527 print "| |\n";
528 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
529 print "| |\n";
530 print "+---------------------------------------------------------+\n\n";
531 }
532
533 my $intTotalErrors = scalar(@{$aryptErrors});
534 if ( $strOutputFormat ne "xml" ) {
535 print "Checking completed, there is $intTotalErrors error(s) found.\n";
536 }
537
538 if($gv_blnErrorShow ne "off")
539 {
540 foreach my $strEachError (@{$aryptErrors})
541 {
542 if ( $strOutputFormat eq "xml" ) {
543 print "<error>";
544 print $strEachError;
545 print "</error>\n";
546 } else {
547 print "+---------------------------------------------------------+\n";
548 print "| Error |\n";
549 print "+---------------------------------------------------------+\n\n";
550 print "$strEachError\n\n";
551 }
552 }
553 }
554 else
555 {
556 if ( $strOutputFormat ne "xml" ) {
557 print "Use -eshow to show the error detail\n\n";
558 }
559 }
560}
561
562sub AlignPrint
563{
564 my ($strMainString,$strStatus,$intLevel) = @_;
565 my $intDot = 100 - length($strMainString) - length($strStatus);
566 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
567}
568
569
570# this function is only called on DocXMLFiles.
571# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
572# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
573sub isDocOrMETSXMLFileWindows
574{
575 my ($file_contents) = @_;
576
577 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
578
579 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
580 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
581
582 # for doc.xml:
583 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
584 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
585 $gsdlsourcefilename = $2;
586 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
587 return 1;
588 }
589 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
590 return 1;
591 }
592
593 return 0;
594}
595
596sub TestEach
597{
598 my ($strModel,$strTest,$intLevel,$strColName) = @_;
599 my @Errors = ();
600
601 $intLevel++;
602 if (-d $strModel && -d $strTest)
603 {
604 my @aryInModel = &diffutil::files_in_dir($strModel);
605 my @aryInTest = &diffutil::files_in_dir($strTest);
606
607 # Files to be skipped because they get generated on one OS but not the other
608 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
609 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
610 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
611 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
612
613 # Now check all remaining files in the folder exist in both model and test collections
614 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
615 my @aryCorrectFiles = @{$aryTwoPointers[1]};
616 @Errors = @{$aryTwoPointers[0]};
617
618 if(scalar(@Errors) == 0)
619 {
620 foreach my $strEachFile (@aryInModel)
621 {
622 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
623 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
624 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
625 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
626 {
627 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
628 }
629 else
630 {
631 if ( $strOutputFormat eq "xml" ) {
632 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
633 } else {
634 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
635 }
636 }
637 }
638 }
639 else
640 {
641 foreach my $strEachFile (@aryCorrectFiles)
642 {
643 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
644 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
645 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
646 {
647 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
648 }
649 }
650 }
651 if($intLevel == $gv_intVerbosity)
652 {
653 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
654 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
655 }
656 }
657 else
658 {
659 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
660 my $ignore_line_re = "<(.*?:)?(?:M|m)etadata name=\"(buildDate|earliestDatestamp|lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ex.File.FileAccessDate|ex.File.FileInodeChangeDate|ex.File.FileCreateDate|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?(?:M|m)etadata>\\s*\\n*";
661
662 my $strResult;
663
664 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
665 # in case these don't appear in the same order between collections, since
666 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
667 # when they can show up as unnecessary diff 'errors'
668
669 my ($model_contents, $test_contents);
670
671 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
672 if($strModel =~ m/buildConfig\.xml$/ || $strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
673
674 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
675 sysread(FIN, $model_contents, -s FIN);
676 close(FIN);
677 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
678 sysread(FIN, $test_contents, -s FIN);
679 close(FIN);
680
681 $model_contents =~ s/$ignore_line_re//g;
682 $test_contents =~ s/$ignore_line_re//g;
683
684
685 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
686 # before comparing a windows test with a linux model or vice-versa
687 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
688 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
689
690 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
691
692 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
693 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
694
695 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
696 $$win_contents =~ s@[\r]@@g;
697
698 # make all single windows slashes into single unix slashes
699 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
700 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
701 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
702
703 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
704 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
705
706 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
707 # $$win_contents =~ s@\r@\n@mg;
708
709 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
710 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
711 # containing solitary carriage returns instead of linefeed
712 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
713
714
715 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
716 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
717 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
718 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
719 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
720
721 # Advanced Beatles collection,
722 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
723 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
724 # Normalising to windows version for doing a diff
725 $$lin_contents =~ s@href=http:///@href=http://@g;
726 }
727 }
728
729 # remove all absolute paths upto collect folder from <Metadata /> elements
730 if($strModel =~ m/docmets\.xml$/) {
731 $model_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
732 $test_contents =~ s@(<gsdl3:Metadata name=\"gsdlfullsourcepath\">).*($strColName)@$1$2@g;
733 }
734
735 # processing particular to doc.xml
736 if($strModel =~ m/doc\.xml$/) {
737 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
738 # these tmpdirs are located inside the collection directory
739 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
740 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
741
742 # remove all absolute paths upto collect folder from <Metadata /> elements
743 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
744 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
745
746 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
747 # These tmpdirs are located inside the toplevel *greenstone* directory
748 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
749 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
750 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
751
752 if($test_contents =~ m@$tmpfile_regex@) {
753 # found a match, replace the tmp file name with "random", keeping the original file extension
754 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
755
756 my ($old_tmp_filename, $ext) = ($1, $2);
757 my $new_tmp_filename = "random";
758
759 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
760 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
761 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
762 if($5) {
763 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
764 } else { # OrigSource contains only the filename
765 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
766 }
767
768 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
769 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
770 if($5) {
771 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
772 } else { # OrigSource contains only the filename
773 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
774 }
775 }
776
777 } # finished special processing of doc.xml files
778
779
780 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
781 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
782 &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
783 &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
784# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
785# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
786# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
787# }
788 }
789
790
791 # now can diff the normalised versions of the doc.xml/docmets.xml files:
792 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
793
794 } else {
795 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
796 }
797
798 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
799 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
800 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
801
802 #$strResult = GeneralOutput($strResult);
803 if ( $strOutputFormat eq "xml" ) {
804 #
805 } else {
806 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
807 }
808 if ($strResult eq "")
809 {
810 if ( $strOutputFormat eq "xml" ) {
811 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
812 } else {
813 AlignPrint("Comparing File","Succeed",$intLevel);
814 }
815 }
816 else
817 {
818# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
819
820 my $strOutput = "Difference Report:\n$strResult\n";
821 if ( $strOutputFormat eq "xml" ) {
822 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
823 } else {
824 AlignPrint("Comparing File","Failed",$intLevel);
825 }
826
827 #$result=`file -b $strModel`; # linux specific test for binary file
828 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
829 if ( "$result" =~ "data" ) {
830 VobPrint( "These binary files differ", $intLevel );
831 } else {
832 VobPrint ( "$strOutput" , $intLevel);
833 }
834
835
836 if ( $strOutputFormat eq "xml" ) {
837 print "</message></file-comparison>";
838 }
839
840 if($gv_blnErrorStop ne "off") { exit; }
841 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
842 }
843 }
844
845 return @Errors;
846}
847
848
849sub FolderTesting
850{
851 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
852 my %hashCount = ();
853 my @Errors = ();
854 my @CorrectFiles = ();
855 my @TwoPointers = (\@Errors,\@CorrectFiles);
856
857 if ( $strOutputFormat eq "xml" ) {
858 #print "<folder-comparison location=\"$strModelFolder\">\n";
859 } else {
860 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
861 }
862
863 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
864 foreach my $strEachItem (@$aryptTest)
865 {
866 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
867 else {$hashCount{$strEachItem} = 'T';}
868 }
869
870 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
871 {
872 if ( $strOutputFormat eq "xml" ) {
873 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
874 } else {
875 AlignPrint("Folder Comparsion","Succeed",$intLevel);
876 }
877 return @TwoPointers;
878 }
879 else
880 {
881 if ( $strOutputFormat eq "xml" ) {
882 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
883 } else {
884 AlignPrint("Folder Comparsion","Failed",$intLevel);
885 }
886
887 foreach my $strEachItem (keys %hashCount)
888 {
889 if($hashCount{$strEachItem} ne 'B')
890 {
891 my $strOutput = "";
892 my $strReport = "";
893
894 if($hashCount{$strEachItem} eq 'M')
895 {
896 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
897 $strReport = "Difference Report: difference found at $strTestFolder";
898 }
899 elsif($hashCount{$strEachItem} eq 'T')
900 {
901 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
902 $strReport = "Difference Report: difference found at $strModelFolder";
903 }
904 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
905
906 VobPrint ("$strOutput\n",$intLevel);
907 $strOutput = $strOutput."\n\t".$strReport."\n";
908 push(@Errors,$strOutput);
909 }
910 else {push(@CorrectFiles,$strEachItem);}
911 }
912 if( $strOutputFormat eq "xml" ) {
913 print "</message></folder-comparison>";
914 }
915
916 return @TwoPointers;
917 }
918}
919
920sub VobPrint
921{
922 my ($strOutput, $intLevel) = @_;
923 my $strTab = "";
924 my $intTab = int($intLevel/2);
925 if($intLevel <= $gv_intVerbosity)
926 {
927 if($intLevel >= 1)
928 {
929 $strTab = "\t"x($intTab+1);
930 $strOutput =~ s/\n$//;
931 $strOutput =~ s/\n/\n$strTab/g;
932 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
933 }
934
935 if( $strOutputFormat eq "xml" ) {
936 $strOutput =~ s/&/&amp;/g;
937 $strOutput =~ s/</&amp;lt;/g;
938 $strOutput =~ s/>/&amp;gt;/g;
939 }
940
941 if ( length( $strOutput ) > 1000 ) {
942 $strOutput = substr( $strOutput, 0, 978);
943
944 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
945 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
946 if(defined $1 && $1) {
947 my $rest = $1;
948 if($rest =~ m/^a/) {
949 $strOutput =~ s@am?p?$@amp;@;
950 }
951 elsif($rest eq "g" || $rest eq "l") {
952 $strOutput .= "t;"; # close the known tag
953 }
954 elsif($rest eq "gt" || $rest eq "lt") {
955 $strOutput .= ";";
956 }
957 } else { # & on its own
958 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
959 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
960 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
961 }
962 }
963 $strOutput .= "... (output truncated)";
964 }
965
966
967 print $strTab.$strOutput."\n";
968 }
969}
970#----##
971
972
973#--Main System----------------------------
974#-----------------------------------------
975# Name: main
976# Perameters: arguments from command line
977# Pre-condition: testing will start by calling this main function.
978# Post-condition: output the test results for one or more collections.
979#-----------------------------------------
980sub main
981{
982 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
983 my $strProgName = $0;
984 my $intArgc = scalar(@ARGV);
985
986 #--System Arguments Setup
987 if (!parsargv::parse(\@ARGV,
988 'estop//off', \$strErrorStop,
989 'eshow//off', \$strErrorShow,
990 'debug', \$debugging,
991 'verbosity/\d+/1', \$intVerbosity,
992 'mode/[\w\-]+/all', \$strMode,
993 'output/[\w\-]+/text', \$strOutputFormat,
994 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
995 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
996 )) {
997 PrintUsage($strProgName);
998 die "\n";
999 }
1000
1001 if ($intArgc<1) {
1002 PrintUsage($strProgName);
1003 die "\n";
1004 }
1005
1006 $gv_test_os = $test_os; # if not specified, defaults to "compute"
1007 $gv_model_os = $model_os; # tends to be linux
1008
1009 $gv_blnDebugging = $debugging;
1010 $gv_blnErrorStop = $strErrorStop;
1011 $gv_blnErrorShow = $strErrorShow;
1012 $gv_intVerbosity = $intVerbosity;
1013 $gv_strMode = SetMode($strMode);
1014
1015 #----##
1016
1017# To find out what version of perl we're using
1018 if( $strOutputFormat eq "xml" ) {
1019 my $perloutput = `perl -v`;
1020 $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
1021 $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
1022 print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
1023 }
1024
1025 #--Collection(s) Testing
1026 foreach $strColName (@ARGV)
1027 {
1028 my @ErrorsInEachCol;
1029 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
1030 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
1031
1032 #--Output(Start)
1033 OutputStart($strColName);
1034 #----##
1035
1036 if(-e $strModelCol && -e $strTestCol )
1037 {
1038
1039 #--Individual Testing
1040 if ($gv_strMode eq "Individual")
1041 {
1042 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1043 }
1044 #----##
1045
1046 #--Initial Testing
1047 elsif ($gv_strMode eq "Initial")
1048 {
1049 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1050 }
1051 #----##
1052
1053 #--Full Testing
1054 elsif ($gv_strMode eq "Full")
1055 {
1056 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1057 }
1058 #----##
1059
1060 #--Error Checking
1061 else
1062 {
1063 if ( $strOutputFormat eq "xml" ) {
1064 die "<error>Error occoured in main function</error>\n";
1065 } else {
1066 die "Error occoured in main function.\n";
1067 }
1068 }
1069 #----##
1070
1071 }
1072 else
1073 {
1074 if( $strOutputFormat eq "xml" ) {
1075 die "<error>Cannot find collection: $strColName</error>\n";
1076 } else {
1077 die "Error: cannot find collection: $strColName\n";
1078 }
1079 }
1080 #----##
1081
1082 #--Output(Results and Errors)
1083 OutputEnd($strColName,\@ErrorsInEachCol);
1084 #----##
1085
1086 }
1087}
1088#----##
1089
1090&main();
Note: See TracBrowser for help on using the repository browser.