source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28238

Last change on this file since 28238 was 28238, checked in by ak19, 11 years ago

Modifications to handle placeholders for Greenstone standard path prefixes when the suffix of such a path is of the form of a Windows shortfilename. This occurs in archiveinf-doc.gdb on Windows, and possibly archiveinf-src.gdb too. Need to first reconstruct the full path locally (without placeholder), then convert it to a windows long filename, then stick the placeholder back into its place and do the diff between the test and model databases.

File size: 34.8 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
53$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
54
55$gv_blnErrorStop = "false";
56$gv_blnErrorShow = "false";
57$gv_intVerbosity = 0;
58$gv_strMode = "Full";
59$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
60
61%gv_IndivList = ("archives" => 0,
62 "etc" => 0,
63 "images" => 0,
64 "building" => 0,
65 "import" => 0,
66 "index" => 0,
67 "log" => 0,
68 "metadata" => 0,
69 "perllib" => 0,
70 "temp" => 0);
71#----##
72
73#--System Setup---------------------------
74sub SetMode
75{
76 my ($strModeList) = @_;
77 $strModeList =~ s/\|/ /g;
78 my @Modes = split(" ",$strModeList);
79
80 my $blnIndividual = "true";
81 my $blnInitial = "false";
82 my $blnFull = "false";
83
84
85 foreach $strEachMode (@Modes)
86 {
87 if($strEachMode eq "all")
88 {
89 $blnFull = "true";
90 $blnIndividual = "false";
91 }
92 elsif($strEachMode eq "init")
93 {
94 $blnInitial = "true";
95 $blnIndividual = "false";
96 }
97 else
98 {
99 if(defined $gv_IndivList{$strEachMode})
100 {
101 $gv_IndivList{$strEachMode} = 1;
102 }
103 else
104 {
105 die Help("Error: used undefined mode");
106 }
107 }
108 }
109
110 if($blnFull eq "true") {return "Full";}
111 elsif($blnInitial eq "true") {return "Initial";}
112 elsif($blnIndividual eq "true") {return "Individual";}
113 else {die "Error occured in function SetMode!!\n";}
114}
115#----##
116
117#--System Process-------------------------
118sub IndivTest
119{
120 my ($strModelCol,$strTestCol,$strColName) = @_;
121 my @Errors = ();
122 my $intNumberOfErrors = 0;
123 foreach $strEachFolder (keys %gv_IndivList)
124 {
125 if($gv_IndivList{$strEachFolder} == 1)
126 {
127
128 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
129 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
130 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
131 $intNumberOfErrors = scalar(@Errors);
132 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
133 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
134 VobPrint("End Comparing \"$strEachFolder\"\n",0);
135 VobPrint("Difference Found: $intNumberOfErrors\n",0);
136 VobPrint ("\n",0);
137 }
138 }
139 return @Errors;
140}
141
142sub InitTest
143{
144
145 my ($strModelCol,$strTestCol,$strColName) = @_;
146 my $intLevel = 1;
147 my @Errors;
148
149 # Testing Log files
150# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
151# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
152#
153# if(-e $strModelLog && -e $strTestLog)
154# {
155# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
156# if($strLogError ne "")
157# {
158# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
159# VobPrint ("$strLogError\n",$intLevel);
160#
161# $strLogError = "$strLogError";
162# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
163# push(@Errors,$strLogError);
164# }
165# else
166# {
167# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
168# }
169# }
170# else
171# {
172# my $strErrorColName;
173# my $strLogError;
174#
175# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
176# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
177#
178# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
179# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
180# VobPrint ("$strLogError\n",$intLevel);
181# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
182#
183# push(@Errors,$strLogError);
184# }
185# VobPrint ("\n",$intLevel);
186
187 # Testing the build.cfg
188 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
189 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
190
191 if(-e $strModelBcfg && -e $strTestBcfg)
192 {
193 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
194 if($strBcfgError ne "")
195 {
196 if( $strOutputFormat eq "xml" ) {
197 print "<build-cfg succeeded=\"no\">\n<message>";
198 } else {
199 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
200 }
201
202 VobPrint ("$strBcfgError",$intLevel);
203 if( $strOutputFormat eq "xml" ) {
204 print "</message></build-cfg>";
205 }
206
207 $strBcfgError = "$strBcfgError";
208 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
209 push(@Errors,$strBcfgError);
210 }
211 else
212 {
213 if( $strOutputFormat eq "xml" ) {
214 print "<build-cfg succeeded=\"yes\"/>";
215 } else {
216 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
217 }
218 }
219 }
220 else
221 {
222 my $strErrorColName;
223 my $strBcfgError;
224
225 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
226 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
227
228 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
229 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
230 VobPrint ("$strBcfgError\n",$intLevel);
231 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
232
233 push(@Errors,$strBcfgError);
234 }
235 VobPrint ("\n",$intLevel);
236
237 # Testing the collect.cfg
238
239 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
240 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
241
242 if(-e $strModelCcfg && -e $strTestCcfg)
243 {
244 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
245 if($strCcfgError ne "")
246 {
247 if( $strOutputFormat eq "xml" ) {
248 print "<collect-cfg succeeded=\"no\"><message>";
249 } else {
250 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
251 }
252
253 VobPrint ("$strCcfgError",$intLevel);
254
255 if( $strOutputFormat eq "xml" ) {
256 print "</message></collect-cfg>";
257 }
258
259 $strCcfgError = "$strCcfgError";
260 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
261 push(@Errors,$strCcfgError);
262 }
263 else
264 {
265 if( $strOutputFormat eq "xml" ) {
266 print "<collect-cfg succeeded=\"yes\"/>";
267 } else {
268 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
269 }
270 }
271 }
272 else
273 {
274 my $strErrorColName;
275 my $strCcfgError;
276
277 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
278 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
279
280 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
281 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
282 VobPrint ("$strCcfgError\n",$intLevel);
283 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
284
285 push(@Errors,$strCcfgError);
286 }
287
288 VobPrint ("\n",$intLevel);
289
290 # Testing databases
291
292 # index
293 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
294 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
295 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
296 if($strGdbError) {
297 push(@Errors,$strGdbError);
298 }
299
300 # archives
301 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
302 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
303 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
304 if($strGdbError) {
305 push(@Errors,$strGdbError);
306 }
307
308 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
309 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
310 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
311 if($strGdbError) {
312 push(@Errors,$strGdbError);
313 }
314
315 VobPrint ("\n",$intLevel);
316
317 return @Errors;
318}
319
320
321# At present handles gdbm - need to expand to allow for jdbm and other db types
322sub GdbDiff
323{
324 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
325
326 my $strGdbError = 0;
327
328 if(-e $strModelGdb && -e $strTestGdb)
329 {
330 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
331 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
332 if($strGdbError ne "")
333 {
334 if( $strOutputFormat eq "xml" ) {
335 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
336 } else {
337 AlignPrint("Database Comparsion Result","Failed",$intLevel);
338 }
339 VobPrint ("$strGdbError\n",$intLevel);
340
341 if( $strOutputFormat eq "xml" ) {
342 print "</message></database>";
343 }
344
345 $strGdbError = "$strGdbError";
346 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
347 #push(@Errors,$strGdbError);
348 }
349 else
350 {
351 if( $strOutputFormat eq "xml" ) {
352 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
353 } else {
354 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
355 }
356 }
357 }
358 else
359 {
360 my $strErrorColName;
361
362 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
363 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
364
365 AlignPrint("Database Comparsion Result","Failed",$intLevel);
366
367 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
368 VobPrint ("$strGdbError\n",$intLevel);
369
370 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
371
372 }
373
374 return $strGdbError;
375}
376
377sub FullTest
378{
379 my ($strModelCol,$strTestCol,$strColName) = @_;
380 my @Errors = ();
381 my $intLevel = 0;
382 my $intNumberDiffs = 0;
383
384 # <Initial Test>
385 if( $strOutputFormat eq "xml" ) {
386 #print "<initial-test>";
387 } else {
388 VobPrint("Initial Testing Start\n",$intLevel);
389 }
390
391 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
392 $intNumberDiffs = scalar(@Errors);
393
394 if( $strOutputFormat eq "xml" ) {
395 #print "</initial-test>";
396 } else {
397 VobPrint("Initial Testing End\n",$intLevel);
398 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
399 VobPrint("\n",$intLevel);
400 }
401 # </Initial Test>
402
403 # <Detailed Test>
404 if( $strOutputFormat eq "xml" ) {
405 #print "<detailed-test>";
406 } else {
407 VobPrint("Detail Testing Start\n",$intLevel);
408 }
409 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
410 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
411
412 if( $strOutputFormat eq "xml" ) {
413 #print "</detailed-test>";
414 } else {
415 VobPrint("Detail Testing End\n",$intLevel);
416 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
417 }
418 # </Detailed Test>
419
420 return @Errors;
421}
422#----##
423
424
425#--Other System Utilities
426sub PrintUsage
427{
428 my ($strProgName) = @_;
429 if ( $strOutputFormat eq "xml" ) {
430 print "<error>usage incorrect</error>\n";
431 } else {
432 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
433 }
434 Help("Error: used incorrect parameters");
435}
436
437sub Help
438{
439 my ($strError) = @_;
440 my $aryptHelps =
441 [ { 'name' => "verbosity",
442 'type' => "scale",
443 'argu' => "a integer" ,
444 'descrip' => "this parameter setup the verbosity of the testing result"},
445 { 'name' => "mode",
446 'type' => "option",
447 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
448 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
449 { 'name' => "estop",
450 'type' => "flag",
451 'argu' => "NULL" ,
452 'descrip' => "Set then system will stop once it meet an error"},
453 { 'name' => "eshow",
454 'type' => "flag",
455 'argu' => "NULL" ,
456 'descrip' => "Set then system will show the error summary"}
457 ];
458
459
460 if ( $strOutputFormat ne "xml" ) {
461 print "$strError\n";
462
463 foreach my $hashOneArg (@{$aryptHelps})
464 {
465 print "\n----------------------------\n";
466 print "Parameters: -".$hashOneArg->{"name"}."\n";
467 print "Type: ".$hashOneArg->{"type"}."\n";
468 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
469 print "Description: ".$hashOneArg->{"descrip"}."\n";
470 print "----------------------------\n";
471 }
472 }
473}
474
475sub OutputStart
476{
477 my ($strColName) = @_;
478 my $intPadding = 17 - length($strColName);
479
480 if ( $strOutputFormat eq "xml" ) {
481 print "<diffcol>\n";
482 } else {
483 print "+---------------------------------------------------------+\n";
484 print "| |\n";
485 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
486 print "| |\n";
487 print "+---------------------------------------------------------+\n\n";
488 }
489}
490
491sub OutputEnd
492{
493 my ($strColName,$aryptErrors) = @_;
494 my $intPadding = 12 - length($strColName);
495 if ( $strOutputFormat eq "xml" ) {
496 print "</diffcol>\n";
497 } else {
498 print "\n";
499 print "+---------------------------------------------------------+\n";
500 print "| |\n";
501 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
502 print "| |\n";
503 print "+---------------------------------------------------------+\n\n";
504 }
505
506 my $intTotalErrors = scalar(@{$aryptErrors});
507 if ( $strOutputFormat ne "xml" ) {
508 print "Checking completed, there is $intTotalErrors error(s) found.\n";
509 }
510
511 if($gv_blnErrorShow ne "off")
512 {
513 foreach my $strEachError (@{$aryptErrors})
514 {
515 if ( $strOutputFormat eq "xml" ) {
516 print "<error>";
517 print $strEachError;
518 print "</error>\n";
519 } else {
520 print "+---------------------------------------------------------+\n";
521 print "| Error |\n";
522 print "+---------------------------------------------------------+\n\n";
523 print "$strEachError\n\n";
524 }
525 }
526 }
527 else
528 {
529 if ( $strOutputFormat ne "xml" ) {
530 print "Use -eshow to show the error detail\n\n";
531 }
532 }
533}
534
535sub AlignPrint
536{
537 my ($strMainString,$strStatus,$intLevel) = @_;
538 my $intDot = 100 - length($strMainString) - length($strStatus);
539 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
540}
541
542
543# this function is only called on DocXMLFiles.
544# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
545# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
546sub isDocOrMETSXMLFileWindows
547{
548 my ($file_contents) = @_;
549
550 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
551
552 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
553 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
554
555 # for doc.xml:
556 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
557 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
558 $gsdlsourcefilename = $2;
559 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
560 return 1;
561 }
562 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
563 return 1;
564 }
565
566 return 0;
567}
568
569sub TestEach
570{
571 my ($strModel,$strTest,$intLevel,$strColName) = @_;
572 my @Errors = ();
573
574 $intLevel++;
575 if (-d $strModel && -d $strTest)
576 {
577 my @aryInModel = &diffutil::files_in_dir($strModel);
578 my @aryInTest = &diffutil::files_in_dir($strTest);
579
580 # Files to be skipped because they get generated on one OS but not the other
581 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
582 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
583 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
584 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
585
586 # Now check all remaining files in the folder exist in both model and test collections
587 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
588 my @aryCorrectFiles = @{$aryTwoPointers[1]};
589 @Errors = @{$aryTwoPointers[0]};
590
591 if(scalar(@Errors) == 0)
592 {
593 foreach my $strEachFile (@aryInModel)
594 {
595 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
596 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
597 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
598 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
599 {
600 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
601 }
602 else
603 {
604 if ( $strOutputFormat eq "xml" ) {
605 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
606 } else {
607 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
608 }
609 }
610 }
611 }
612 else
613 {
614 foreach my $strEachFile (@aryCorrectFiles)
615 {
616 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
617 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
618 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
619 {
620 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
621 }
622 }
623 }
624 if($intLevel == $gv_intVerbosity)
625 {
626 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
627 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
628 }
629 }
630 else
631 {
632 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
633 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
634
635 my $strResult;
636
637 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
638 # in case these don't appear in the same order between collections, since
639 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
640 # when they can show up as unnecessary diff 'errors'
641
642 my ($model_contents, $test_contents);
643
644 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
645 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
646
647 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
648 sysread(FIN, $model_contents, -s FIN);
649 close(FIN);
650 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
651 sysread(FIN, $test_contents, -s FIN);
652 close(FIN);
653
654 $model_contents =~ s/$ignore_line_re//g;
655 $test_contents =~ s/$ignore_line_re//g;
656
657
658 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
659 # before comparing a windows test with a linux model or vice-versa
660 my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
661 my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
662
663 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
664
665 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
666 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
667
668 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
669 $$win_contents =~ s@[\r]@@g;
670
671 # make all single windows slashes into single unix slashes
672 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
673 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
674 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
675
676 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
677 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
678
679 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
680 # $$win_contents =~ s@\r@\n@mg;
681
682 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
683 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
684 # containing solitary carriage returns instead of linefeed
685 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
686
687
688 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
689 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
690 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
691 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
692 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
693
694 # Advanced Beatles collection,
695 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
696 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
697 # Normalising to windows version for doing a diff
698 $$lin_contents =~ s@href=http:///@href=http://@g;
699 }
700 }
701
702 # processing particular to doc.xml
703 if($strModel =~ m/doc\.xml$/) {
704 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
705 # these tmpdirs are located inside the collection directory
706 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
707 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
708
709 # remove all absolute paths upto collect folder from <Metadata /> elements
710 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
711 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
712
713 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
714 # These tmpdirs are located inside the toplevel *greenstone* directory
715 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
716 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
717 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
718
719 if($test_contents =~ m@$tmpfile_regex@) {
720 # found a match, replace the tmp file name with "random", keeping the original file extension
721 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
722
723 my ($old_tmp_filename, $ext) = ($1, $2);
724 my $new_tmp_filename = "random";
725
726 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
727 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
728 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
729 if($5) {
730 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
731 } else { # OrigSource contains only the filename
732 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
733 }
734
735 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
736 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
737 if($5) {
738 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
739 } else { # OrigSource contains only the filename
740 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
741 }
742 }
743
744 } # finished special processing of doc.xml files
745
746 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
747# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
748# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
749# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
750# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
751# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
752# }
753
754
755
756 # now can diff the normalised versions of the doc.xml/docmets.xml files:
757 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
758
759 } else {
760 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
761 }
762
763 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
764 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
765 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
766
767 #$strResult = GeneralOutput($strResult);
768 if ( $strOutputFormat eq "xml" ) {
769 #
770 } else {
771 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
772 }
773 if ($strResult eq "")
774 {
775 if ( $strOutputFormat eq "xml" ) {
776 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
777 } else {
778 AlignPrint("Comparing File","Succeed",$intLevel);
779 }
780 }
781 else
782 {
783# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
784
785 my $strOutput = "Difference Report:\n$strResult\n";
786 if ( $strOutputFormat eq "xml" ) {
787 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
788 } else {
789 AlignPrint("Comparing File","Failed",$intLevel);
790 }
791
792 #$result=`file -b $strModel`; # linux specific test for binary file
793 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
794 if ( "$result" =~ "data" ) {
795 VobPrint( "These binary files differ", $intLevel );
796 } else {
797 VobPrint ( "$strOutput" , $intLevel);
798 }
799
800
801 if ( $strOutputFormat eq "xml" ) {
802 print "</message></file-comparison>";
803 }
804
805 if($gv_blnErrorStop ne "off") { exit; }
806 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
807 }
808 }
809
810 return @Errors;
811}
812
813
814sub FolderTesting
815{
816 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
817 my %hashCount = ();
818 my @Errors = ();
819 my @CorrectFiles = ();
820 my @TwoPointers = (\@Errors,\@CorrectFiles);
821
822 if ( $strOutputFormat eq "xml" ) {
823 #print "<folder-comparison location=\"$strModelFolder\">\n";
824 } else {
825 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
826 }
827
828 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
829 foreach my $strEachItem (@$aryptTest)
830 {
831 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
832 else {$hashCount{$strEachItem} = 'T';}
833 }
834
835 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
836 {
837 if ( $strOutputFormat eq "xml" ) {
838 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
839 } else {
840 AlignPrint("Folder Comparsion","Succeed",$intLevel);
841 }
842 return @TwoPointers;
843 }
844 else
845 {
846 if ( $strOutputFormat eq "xml" ) {
847 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
848 } else {
849 AlignPrint("Folder Comparsion","Failed",$intLevel);
850 }
851
852 foreach my $strEachItem (keys %hashCount)
853 {
854 if($hashCount{$strEachItem} ne 'B')
855 {
856 my $strOutput = "";
857 my $strReport = "";
858
859 if($hashCount{$strEachItem} eq 'M')
860 {
861 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
862 $strReport = "Difference Report: difference found at $strTestFolder";
863 }
864 elsif($hashCount{$strEachItem} eq 'T')
865 {
866 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
867 $strReport = "Difference Report: difference found at $strModelFolder";
868 }
869 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
870
871 VobPrint ("$strOutput\n",$intLevel);
872 $strOutput = $strOutput."\n\t".$strReport."\n";
873 push(@Errors,$strOutput);
874 }
875 else {push(@CorrectFiles,$strEachItem);}
876 }
877 if( $strOutputFormat eq "xml" ) {
878 print "</message></folder-comparison>";
879 }
880
881 return @TwoPointers;
882 }
883}
884
885sub VobPrint
886{
887 my ($strOutput, $intLevel) = @_;
888 my $strTab = "";
889 my $intTab = int($intLevel/2);
890 if($intLevel <= $gv_intVerbosity)
891 {
892 if($intLevel >= 1)
893 {
894 $strTab = "\t"x($intTab+1);
895 $strOutput =~ s/\n$//;
896 $strOutput =~ s/\n/\n$strTab/g;
897 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
898 }
899
900 if( $strOutputFormat eq "xml" ) {
901 $strOutput =~ s/&/&amp;/g;
902 $strOutput =~ s/</&lt;/g;
903 $strOutput =~ s/>/&gt;/g;
904 }
905
906 if ( length( $strOutput ) > 1000 ) {
907 $strOutput = substr( $strOutput, 0, 978);
908
909 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
910 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
911 if(defined $1 && $1) {
912 my $rest = $1;
913 if($rest =~ m/^a/) {
914 $strOutput =~ s@am?p?$@amp;@;
915 }
916 elsif($rest eq "g" || $rest eq "l") {
917 $strOutput .= "t;"; # close the known tag
918 }
919 elsif($rest eq "gt" || $rest eq "lt") {
920 $strOutput .= ";";
921 }
922 } else { # & on its own
923 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
924 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
925 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
926 }
927 }
928 $strOutput .= "... (output truncated)";
929 }
930
931
932 print $strTab.$strOutput."\n";
933 }
934}
935#----##
936
937
938#--Main System----------------------------
939#-----------------------------------------
940# Name: main
941# Perameters: arguments from command line
942# Pre-condition: testing will start by calling this main function.
943# Post-condition: output the test results for one or more collections.
944#-----------------------------------------
945sub main
946{
947 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
948 my $strProgName = $0;
949 my $intArgc = scalar(@ARGV);
950
951 #--System Arguments Setup
952 if (!parsargv::parse(\@ARGV,
953 'estop//off', \$strErrorStop,
954 'eshow//off', \$strErrorShow,
955 'verbosity/\d+/1', \$intVerbosity,
956 'mode/[\w\-]+/all', \$strMode,
957 'output/[\w\-]+/text', \$strOutputFormat,
958 'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
959 'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
960 )) {
961 PrintUsage($strProgName);
962 die "\n";
963 }
964
965 if ($intArgc<1) {
966 PrintUsage($strProgName);
967 die "\n";
968 }
969
970 $gv_test_os = $test_os; # if not specified, defaults to "compute"
971 $gv_model_os = $model_os; # tends to be linux
972
973 $gv_blnErrorStop = $strErrorStop;
974 $gv_blnErrorShow = $strErrorShow;
975 $gv_intVerbosity = $intVerbosity;
976 $gv_strMode = SetMode($strMode);
977
978 #----##
979
980 #--Collection(s) Testing
981 foreach $strColName (@ARGV)
982 {
983 my @ErrorsInEachCol;
984 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
985 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
986
987 #--Output(Start)
988 OutputStart($strColName);
989 #----##
990
991 if(-e $strModelCol && -e $strTestCol )
992 {
993
994 #--Individual Testing
995 if ($gv_strMode eq "Individual")
996 {
997 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
998 }
999 #----##
1000
1001 #--Initial Testing
1002 elsif ($gv_strMode eq "Initial")
1003 {
1004 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1005 }
1006 #----##
1007
1008 #--Full Testing
1009 elsif ($gv_strMode eq "Full")
1010 {
1011 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1012 }
1013 #----##
1014
1015 #--Error Checking
1016 else
1017 {
1018 if ( $strOutputFormat eq "xml" ) {
1019 die "<error>Error occoured in main function</error>\n";
1020 } else {
1021 die "Error occoured in main function.\n";
1022 }
1023 }
1024 #----##
1025
1026 }
1027 else
1028 {
1029 if( $strOutputFormat eq "xml" ) {
1030 die "<error>Cannot find collection: $strColName</error>\n";
1031 } else {
1032 die "Error: cannot find collection: $strColName\n";
1033 }
1034 }
1035 #----##
1036
1037 #--Output(Results and Errors)
1038 OutputEnd($strColName,\@ErrorsInEachCol);
1039 #----##
1040
1041 }
1042}
1043#----##
1044
1045&main();
Note: See TracBrowser for help on using the repository browser.