source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28107

Last change on this file since 28107 was 28107, checked in by ak19, 11 years ago

Demo-Lucene has a different type of doc.xml in its index\text folder's HASH dirs. Expanding diffcol doc.xml processing to support this.

File size: 34.2 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
51$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
52
53$gv_blnErrorStop = "false";
54$gv_blnErrorShow = "false";
55$gv_intVerbosity = 0;
56$gv_strMode = "Full";
57$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
58
59%gv_IndivList = ("archives" => 0,
60 "etc" => 0,
61 "images" => 0,
62 "building" => 0,
63 "import" => 0,
64 "index" => 0,
65 "log" => 0,
66 "metadata" => 0,
67 "perllib" => 0,
68 "temp" => 0);
69#----##
70
71#--System Setup---------------------------
72sub SetMode
73{
74 my ($strModeList) = @_;
75 $strModeList =~ s/\|/ /g;
76 my @Modes = split(" ",$strModeList);
77
78 my $blnIndividual = "true";
79 my $blnInitial = "false";
80 my $blnFull = "false";
81
82
83 foreach $strEachMode (@Modes)
84 {
85 if($strEachMode eq "all")
86 {
87 $blnFull = "true";
88 $blnIndividual = "false";
89 }
90 elsif($strEachMode eq "init")
91 {
92 $blnInitial = "true";
93 $blnIndividual = "false";
94 }
95 else
96 {
97 if(defined $gv_IndivList{$strEachMode})
98 {
99 $gv_IndivList{$strEachMode} = 1;
100 }
101 else
102 {
103 die Help("Error: used undefined mode");
104 }
105 }
106 }
107
108 if($blnFull eq "true") {return "Full";}
109 elsif($blnInitial eq "true") {return "Initial";}
110 elsif($blnIndividual eq "true") {return "Individual";}
111 else {die "Error occured in function SetMode!!\n";}
112}
113#----##
114
115#--System Process-------------------------
116sub IndivTest
117{
118 my ($strModelCol,$strTestCol,$strColName) = @_;
119 my @Errors = ();
120 my $intNumberOfErrors = 0;
121 foreach $strEachFolder (keys %gv_IndivList)
122 {
123 if($gv_IndivList{$strEachFolder} == 1)
124 {
125
126 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
127 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
128 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
129 $intNumberOfErrors = scalar(@Errors);
130 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
131 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
132 VobPrint("End Comparing \"$strEachFolder\"\n",0);
133 VobPrint("Difference Found: $intNumberOfErrors\n",0);
134 VobPrint ("\n",0);
135 }
136 }
137 return @Errors;
138}
139
140sub InitTest
141{
142
143 my ($strModelCol,$strTestCol,$strColName) = @_;
144 my $intLevel = 1;
145 my @Errors;
146
147 # Testing Log files
148# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
149# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
150#
151# if(-e $strModelLog && -e $strTestLog)
152# {
153# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
154# if($strLogError ne "")
155# {
156# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
157# VobPrint ("$strLogError\n",$intLevel);
158#
159# $strLogError = "$strLogError";
160# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
161# push(@Errors,$strLogError);
162# }
163# else
164# {
165# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
166# }
167# }
168# else
169# {
170# my $strErrorColName;
171# my $strLogError;
172#
173# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
174# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
175#
176# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
177# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
178# VobPrint ("$strLogError\n",$intLevel);
179# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
180#
181# push(@Errors,$strLogError);
182# }
183# VobPrint ("\n",$intLevel);
184
185 # Testing the build.cfg
186 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
187 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
188
189 if(-e $strModelBcfg && -e $strTestBcfg)
190 {
191 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
192 if($strBcfgError ne "")
193 {
194 if( $strOutputFormat eq "xml" ) {
195 print "<build-cfg succeeded=\"no\">\n<message>";
196 } else {
197 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
198 }
199
200 VobPrint ("$strBcfgError",$intLevel);
201 if( $strOutputFormat eq "xml" ) {
202 print "</message></build-cfg>";
203 }
204
205 $strBcfgError = "$strBcfgError";
206 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
207 push(@Errors,$strBcfgError);
208 }
209 else
210 {
211 if( $strOutputFormat eq "xml" ) {
212 print "<build-cfg succeeded=\"yes\"/>";
213 } else {
214 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
215 }
216 }
217 }
218 else
219 {
220 my $strErrorColName;
221 my $strBcfgError;
222
223 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
224 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
225
226 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
227 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
228 VobPrint ("$strBcfgError\n",$intLevel);
229 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
230
231 push(@Errors,$strBcfgError);
232 }
233 VobPrint ("\n",$intLevel);
234
235 # Testing the collect.cfg
236
237 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
238 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
239
240 if(-e $strModelCcfg && -e $strTestCcfg)
241 {
242 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
243 if($strCcfgError ne "")
244 {
245 if( $strOutputFormat eq "xml" ) {
246 print "<collect-cfg succeeded=\"no\"><message>";
247 } else {
248 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
249 }
250
251 VobPrint ("$strCcfgError",$intLevel);
252
253 if( $strOutputFormat eq "xml" ) {
254 print "</message></collect-cfg>";
255 }
256
257 $strCcfgError = "$strCcfgError";
258 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
259 push(@Errors,$strCcfgError);
260 }
261 else
262 {
263 if( $strOutputFormat eq "xml" ) {
264 print "<collect-cfg succeeded=\"yes\"/>";
265 } else {
266 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
267 }
268 }
269 }
270 else
271 {
272 my $strErrorColName;
273 my $strCcfgError;
274
275 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
276 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
277
278 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
279 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
280 VobPrint ("$strCcfgError\n",$intLevel);
281 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
282
283 push(@Errors,$strCcfgError);
284 }
285
286 VobPrint ("\n",$intLevel);
287
288 # Testing databases
289
290 # index
291 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
292 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
293 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); # returns 0 if no error
294 if($strGdbError) {
295 push(@Errors,$strGdbError);
296 }
297
298 # archives
299 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
300 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
301 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
302 if($strGdbError) {
303 push(@Errors,$strGdbError);
304 }
305
306 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
307 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
308 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
309 if($strGdbError) {
310 push(@Errors,$strGdbError);
311 }
312
313 VobPrint ("\n",$intLevel);
314
315 return @Errors;
316}
317
318
319# At present handles gdbm - need to expand to allow for jdbm and other db types
320sub GdbDiff
321{
322 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName) = @_;
323
324 my $strGdbError = 0;
325
326 if(-e $strModelGdb && -e $strTestGdb)
327 {
328 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
329 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb,$strColName);
330 if($strGdbError ne "")
331 {
332 if( $strOutputFormat eq "xml" ) {
333 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
334 } else {
335 AlignPrint("Database Comparsion Result","Failed",$intLevel);
336 }
337 VobPrint ("$strGdbError\n",$intLevel);
338
339 if( $strOutputFormat eq "xml" ) {
340 print "</message></database>";
341 }
342
343 $strGdbError = "$strGdbError";
344 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
345 #push(@Errors,$strGdbError);
346 }
347 else
348 {
349 if( $strOutputFormat eq "xml" ) {
350 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
351 } else {
352 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
353 }
354 }
355 }
356 else
357 {
358 my $strErrorColName;
359
360 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
361 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
362
363 AlignPrint("Database Comparsion Result","Failed",$intLevel);
364
365 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
366 VobPrint ("$strGdbError\n",$intLevel);
367
368 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
369
370 }
371
372 return $strGdbError;
373}
374
375sub FullTest
376{
377 my ($strModelCol,$strTestCol,$strColName) = @_;
378 my @Errors = ();
379 my $intLevel = 0;
380 my $intNumberDiffs = 0;
381
382 # <Initial Test>
383 if( $strOutputFormat eq "xml" ) {
384 #print "<initial-test>";
385 } else {
386 VobPrint("Initial Testing Start\n",$intLevel);
387 }
388
389 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
390 $intNumberDiffs = scalar(@Errors);
391
392 if( $strOutputFormat eq "xml" ) {
393 #print "</initial-test>";
394 } else {
395 VobPrint("Initial Testing End\n",$intLevel);
396 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
397 VobPrint("\n",$intLevel);
398 }
399 # </Initial Test>
400
401 # <Detailed Test>
402 if( $strOutputFormat eq "xml" ) {
403 #print "<detailed-test>";
404 } else {
405 VobPrint("Detail Testing Start\n",$intLevel);
406 }
407 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
408 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
409
410 if( $strOutputFormat eq "xml" ) {
411 #print "</detailed-test>";
412 } else {
413 VobPrint("Detail Testing End\n",$intLevel);
414 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
415 }
416 # </Detailed Test>
417
418 return @Errors;
419}
420#----##
421
422
423#--Other System Utilities
424sub PrintUsage
425{
426 my ($strProgName) = @_;
427 if ( $strOutputFormat eq "xml" ) {
428 print "<error>usage incorrect</error>\n";
429 } else {
430 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
431 }
432 Help("Error: used incorrect parameters");
433}
434
435sub Help
436{
437 my ($strError) = @_;
438 my $aryptHelps =
439 [ { 'name' => "verbosity",
440 'type' => "scale",
441 'argu' => "a integer" ,
442 'descrip' => "this parameter setup the verbosity of the testing result"},
443 { 'name' => "mode",
444 'type' => "option",
445 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
446 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
447 { 'name' => "estop",
448 'type' => "flag",
449 'argu' => "NULL" ,
450 'descrip' => "Set then system will stop once it meet an error"},
451 { 'name' => "eshow",
452 'type' => "flag",
453 'argu' => "NULL" ,
454 'descrip' => "Set then system will show the error summary"}
455 ];
456
457
458 if ( $strOutputFormat ne "xml" ) {
459 print "$strError\n";
460
461 foreach my $hashOneArg (@{$aryptHelps})
462 {
463 print "\n----------------------------\n";
464 print "Parameters: -".$hashOneArg->{"name"}."\n";
465 print "Type: ".$hashOneArg->{"type"}."\n";
466 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
467 print "Description: ".$hashOneArg->{"descrip"}."\n";
468 print "----------------------------\n";
469 }
470 }
471}
472
473sub OutputStart
474{
475 my ($strColName) = @_;
476 my $intPadding = 17 - length($strColName);
477
478 if ( $strOutputFormat eq "xml" ) {
479 print "<diffcol>\n";
480 } else {
481 print "+---------------------------------------------------------+\n";
482 print "| |\n";
483 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
484 print "| |\n";
485 print "+---------------------------------------------------------+\n\n";
486 }
487}
488
489sub OutputEnd
490{
491 my ($strColName,$aryptErrors) = @_;
492 my $intPadding = 12 - length($strColName);
493 if ( $strOutputFormat eq "xml" ) {
494 print "</diffcol>\n";
495 } else {
496 print "\n";
497 print "+---------------------------------------------------------+\n";
498 print "| |\n";
499 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
500 print "| |\n";
501 print "+---------------------------------------------------------+\n\n";
502 }
503
504 my $intTotalErrors = scalar(@{$aryptErrors});
505 if ( $strOutputFormat ne "xml" ) {
506 print "Checking completed, there is $intTotalErrors error(s) found.\n";
507 }
508
509 if($gv_blnErrorShow ne "off")
510 {
511 foreach my $strEachError (@{$aryptErrors})
512 {
513 if ( $strOutputFormat eq "xml" ) {
514 print "<error>";
515 print $strEachError;
516 print "</error>\n";
517 } else {
518 print "+---------------------------------------------------------+\n";
519 print "| Error |\n";
520 print "+---------------------------------------------------------+\n\n";
521 print "$strEachError\n\n";
522 }
523 }
524 }
525 else
526 {
527 if ( $strOutputFormat ne "xml" ) {
528 print "Use -eshow to show the error detail\n\n";
529 }
530 }
531}
532
533sub AlignPrint
534{
535 my ($strMainString,$strStatus,$intLevel) = @_;
536 my $intDot = 100 - length($strMainString) - length($strStatus);
537 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
538}
539
540
541# this function is only called on DocXMLFiles.
542# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
543# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
544sub isDocOrMETSXMLFileWindows
545{
546 my ($file_contents) = @_;
547
548 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
549
550 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
551 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
552
553 # for doc.xml:
554 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
555 if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
556 $gsdlsourcefilename = $2;
557 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
558 return 1;
559 }
560 } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
561 return 1;
562 }
563
564 return 0;
565}
566
567sub TestEach
568{
569 my ($strModel,$strTest,$intLevel,$strColName) = @_;
570 my @Errors = ();
571
572 $intLevel++;
573 if (-d $strModel && -d $strTest)
574 {
575 my @aryInModel = &diffutil::files_in_dir($strModel);
576 my @aryInTest = &diffutil::files_in_dir($strTest);
577
578 # Files to be skipped because they get generated on one OS but not the other
579 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
580 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
581 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
582 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
583
584 # Now check all remaining files in the folder exist in both model and test collections
585 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
586 my @aryCorrectFiles = @{$aryTwoPointers[1]};
587 @Errors = @{$aryTwoPointers[0]};
588
589 if(scalar(@Errors) == 0)
590 {
591 foreach my $strEachFile (@aryInModel)
592 {
593 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
594 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
595 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
596 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
597 {
598 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
599 }
600 else
601 {
602 if ( $strOutputFormat eq "xml" ) {
603 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
604 } else {
605 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
606 }
607 }
608 }
609 }
610 else
611 {
612 foreach my $strEachFile (@aryCorrectFiles)
613 {
614 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
615 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
616 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
617 {
618 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
619 }
620 }
621 }
622 if($intLevel == $gv_intVerbosity)
623 {
624 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
625 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
626 }
627 }
628 else
629 {
630 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
631 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
632
633 my $strResult;
634
635 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
636 # in case these don't appear in the same order between collections, since
637 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
638 # when they can show up as unnecessary diff 'errors'
639
640 my ($model_contents, $test_contents);
641
642 # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
643 if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
644
645 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
646 sysread(FIN, $model_contents, -s FIN);
647 close(FIN);
648 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
649 sysread(FIN, $test_contents, -s FIN);
650 close(FIN);
651
652 $model_contents =~ s/$ignore_line_re//g;
653 $test_contents =~ s/$ignore_line_re//g;
654
655
656 # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
657 # before comparing a windows test with a linux model or vice-versa
658 my $testIsWin = &isDocOrMETSXMLFileWindows($test_contents);
659 my $modelIsWin = &isDocOrMETSXMLFileWindows($model_contents);
660
661 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
662
663 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
664 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
665
666 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
667 $$win_contents =~ s@[\r]@@g;
668
669 # make all single windows slashes into single unix slashes
670 # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
671 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
672 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
673
674 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
675 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
676
677 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
678 # $$win_contents =~ s@\r@\n@mg;
679
680 if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
681 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
682 # containing solitary carriage returns instead of linefeed
683 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
684
685
686 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
687 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
688 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
689 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
690 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
691
692 # Advanced Beatles collection,
693 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
694 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
695 # Normalising to windows version for doing a diff
696 $$lin_contents =~ s@href=http:///@href=http://@g;
697 }
698 }
699
700 # processing particular to doc.xml
701 if($strModel =~ m/doc\.xml$/) {
702 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
703 # these tmpdirs are located inside the collection directory
704 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
705 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
706
707 # remove all absolute paths upto collect folder from <Metadata /> elements
708 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
709 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
710
711 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
712 # These tmpdirs are located inside the toplevel *greenstone* directory
713 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
714 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
715 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
716
717 if($test_contents =~ m@$tmpfile_regex@) {
718 # found a match, replace the tmp file name with "random", keeping the original file extension
719 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
720
721 my ($old_tmp_filename, $ext) = ($1, $2);
722 my $new_tmp_filename = "random";
723
724 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
725 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
726 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
727 if($5) {
728 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
729 } else { # OrigSource contains only the filename
730 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
731 }
732
733 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
734 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
735 if($5) {
736 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
737 } else { # OrigSource contains only the filename
738 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
739 }
740 }
741
742 } # finished special processing of doc.xml files
743
744 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
745# &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
746# &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
747# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
748# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
749# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
750# }
751
752
753
754 # now can diff the normalised versions of the doc.xml/docmets.xml files:
755 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
756
757 } else {
758 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
759 }
760
761 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
762 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
763 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
764
765 #$strResult = GeneralOutput($strResult);
766 if ( $strOutputFormat eq "xml" ) {
767 #
768 } else {
769 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
770 }
771 if ($strResult eq "")
772 {
773 if ( $strOutputFormat eq "xml" ) {
774 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
775 } else {
776 AlignPrint("Comparing File","Succeed",$intLevel);
777 }
778 }
779 else
780 {
781# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
782
783 my $strOutput = "Difference Report:\n$strResult\n";
784 if ( $strOutputFormat eq "xml" ) {
785 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
786 } else {
787 AlignPrint("Comparing File","Failed",$intLevel);
788 }
789
790 #$result=`file -b $strModel`; # linux specific test for binary file
791 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
792 if ( "$result" =~ "data" ) {
793 VobPrint( "These binary files differ", $intLevel );
794 } else {
795 VobPrint ( "$strOutput" , $intLevel);
796 }
797
798
799 if ( $strOutputFormat eq "xml" ) {
800 print "</message></file-comparison>";
801 }
802
803 if($gv_blnErrorStop ne "off") { exit; }
804 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
805 }
806 }
807
808 return @Errors;
809}
810
811
812sub FolderTesting
813{
814 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
815 my %hashCount = ();
816 my @Errors = ();
817 my @CorrectFiles = ();
818 my @TwoPointers = (\@Errors,\@CorrectFiles);
819
820 if ( $strOutputFormat eq "xml" ) {
821 #print "<folder-comparison location=\"$strModelFolder\">\n";
822 } else {
823 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
824 }
825
826 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
827 foreach my $strEachItem (@$aryptTest)
828 {
829 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
830 else {$hashCount{$strEachItem} = 'T';}
831 }
832
833 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
834 {
835 if ( $strOutputFormat eq "xml" ) {
836 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
837 } else {
838 AlignPrint("Folder Comparsion","Succeed",$intLevel);
839 }
840 return @TwoPointers;
841 }
842 else
843 {
844 if ( $strOutputFormat eq "xml" ) {
845 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
846 } else {
847 AlignPrint("Folder Comparsion","Failed",$intLevel);
848 }
849
850 foreach my $strEachItem (keys %hashCount)
851 {
852 if($hashCount{$strEachItem} ne 'B')
853 {
854 my $strOutput = "";
855 my $strReport = "";
856
857 if($hashCount{$strEachItem} eq 'M')
858 {
859 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
860 $strReport = "Difference Report: difference found at $strTestFolder";
861 }
862 elsif($hashCount{$strEachItem} eq 'T')
863 {
864 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
865 $strReport = "Difference Report: difference found at $strModelFolder";
866 }
867 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
868
869 VobPrint ("$strOutput\n",$intLevel);
870 $strOutput = $strOutput."\n\t".$strReport."\n";
871 push(@Errors,$strOutput);
872 }
873 else {push(@CorrectFiles,$strEachItem);}
874 }
875 if( $strOutputFormat eq "xml" ) {
876 print "</message></folder-comparison>";
877 }
878
879 return @TwoPointers;
880 }
881}
882
883sub VobPrint
884{
885 my ($strOutput, $intLevel) = @_;
886 my $strTab = "";
887 my $intTab = int($intLevel/2);
888 if($intLevel <= $gv_intVerbosity)
889 {
890 if($intLevel >= 1)
891 {
892 $strTab = "\t"x($intTab+1);
893 $strOutput =~ s/\n$//;
894 $strOutput =~ s/\n/\n$strTab/g;
895 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
896 }
897
898 if( $strOutputFormat eq "xml" ) {
899 $strOutput =~ s/&/&amp;/g;
900 $strOutput =~ s/</&lt;/g;
901 $strOutput =~ s/>/&gt;/g;
902 }
903
904 if ( length( $strOutput ) > 1000 ) {
905 $strOutput = substr( $strOutput, 0, 978);
906
907 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
908 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
909 if(defined $1 && $1) {
910 my $rest = $1;
911 if($rest =~ m/^a/) {
912 $strOutput =~ s@am?p?$@amp;@;
913 }
914 elsif($rest eq "g" || $rest eq "l") {
915 $strOutput .= "t;"; # close the known tag
916 }
917 elsif($rest eq "gt" || $rest eq "lt") {
918 $strOutput .= ";";
919 }
920 } else { # & on its own
921 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
922 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
923 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
924 }
925 }
926 $strOutput .= "... (output truncated)";
927 }
928
929
930 print $strTab.$strOutput."\n";
931 }
932}
933#----##
934
935
936#--Main System----------------------------
937#-----------------------------------------
938# Name: main
939# Perameters: arguments from command line
940# Pre-condition: testing will start by calling this main function.
941# Post-condition: output the test results for one or more collections.
942#-----------------------------------------
943sub main
944{
945 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode);
946 my $strProgName = $0;
947 my $intArgc = scalar(@ARGV);
948
949 #--System Arguments Setup
950 if (!parsargv::parse(\@ARGV,
951 'estop//off', \$strErrorStop,
952 'eshow//off', \$strErrorShow,
953 'verbosity/\d+/1', \$intVerbosity,
954 'mode/[\w\-]+/all', \$strMode,
955 'output/[\w\-]+/text', \$strOutputFormat
956 )) {
957 PrintUsage($strProgName);
958 die "\n";
959 }
960
961 if ($intArgc<1) {
962 PrintUsage($strProgName);
963 die "\n";
964 }
965
966 $gv_blnErrorStop = $strErrorStop;
967 $gv_blnErrorShow = $strErrorShow;
968 $gv_intVerbosity = $intVerbosity;
969 $gv_strMode = SetMode($strMode);
970
971 #----##
972
973 #--Collection(s) Testing
974 foreach $strColName (@ARGV)
975 {
976 my @ErrorsInEachCol;
977 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
978 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
979
980 #--Output(Start)
981 OutputStart($strColName);
982 #----##
983
984 if(-e $strModelCol && -e $strTestCol )
985 {
986
987 #--Individual Testing
988 if ($gv_strMode eq "Individual")
989 {
990 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
991 }
992 #----##
993
994 #--Initial Testing
995 elsif ($gv_strMode eq "Initial")
996 {
997 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
998 }
999 #----##
1000
1001 #--Full Testing
1002 elsif ($gv_strMode eq "Full")
1003 {
1004 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1005 }
1006 #----##
1007
1008 #--Error Checking
1009 else
1010 {
1011 if ( $strOutputFormat eq "xml" ) {
1012 die "<error>Error occoured in main function</error>\n";
1013 } else {
1014 die "Error occoured in main function.\n";
1015 }
1016 }
1017 #----##
1018
1019 }
1020 else
1021 {
1022 if( $strOutputFormat eq "xml" ) {
1023 die "<error>Cannot find collection: $strColName</error>\n";
1024 } else {
1025 die "Error: cannot find collection: $strColName\n";
1026 }
1027 }
1028 #----##
1029
1030 #--Output(Results and Errors)
1031 OutputEnd($strColName,\@ErrorsInEachCol);
1032 #----##
1033
1034 }
1035}
1036#----##
1037
1038&main();
Note: See TracBrowser for help on using the repository browser.