source: other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl@ 28078

Last change on this file since 28078 was 28078, checked in by ak19, 11 years ago

diffcol modifications for getting the METS tutorial collection to work. The docmets.xml files also used (oai)lastmodified timestamps, so these needed to be ignored as well.

File size: 33.3 KB
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
51$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
52
53$gv_blnErrorStop = "false";
54$gv_blnErrorShow = "false";
55$gv_intVerbosity = 0;
56$gv_strMode = "Full";
57$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
58
59%gv_IndivList = ("archives" => 0,
60 "etc" => 0,
61 "images" => 0,
62 "building" => 0,
63 "import" => 0,
64 "index" => 0,
65 "log" => 0,
66 "metadata" => 0,
67 "perllib" => 0,
68 "temp" => 0);
69#----##
70
71#--System Setup---------------------------
72sub SetMode
73{
74 my ($strModeList) = @_;
75 $strModeList =~ s/\|/ /g;
76 my @Modes = split(" ",$strModeList);
77
78 my $blnIndividual = "true";
79 my $blnInitial = "false";
80 my $blnFull = "false";
81
82
83 foreach $strEachMode (@Modes)
84 {
85 if($strEachMode eq "all")
86 {
87 $blnFull = "true";
88 $blnIndividual = "false";
89 }
90 elsif($strEachMode eq "init")
91 {
92 $blnInitial = "true";
93 $blnIndividual = "false";
94 }
95 else
96 {
97 if(defined $gv_IndivList{$strEachMode})
98 {
99 $gv_IndivList{$strEachMode} = 1;
100 }
101 else
102 {
103 die Help("Error: used undefined mode");
104 }
105 }
106 }
107
108 if($blnFull eq "true") {return "Full";}
109 elsif($blnInitial eq "true") {return "Initial";}
110 elsif($blnIndividual eq "true") {return "Individual";}
111 else {die "Error occured in function SetMode!!\n";}
112}
113#----##
114
115#--System Process-------------------------
116sub IndivTest
117{
118 my ($strModelCol,$strTestCol,$strColName) = @_;
119 my @Errors = ();
120 my $intNumberOfErrors = 0;
121 foreach $strEachFolder (keys %gv_IndivList)
122 {
123 if($gv_IndivList{$strEachFolder} == 1)
124 {
125
126 VobPrint("Start Comparing \"$strEachFolder\"\n",0);
127 my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
128 my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
129 $intNumberOfErrors = scalar(@Errors);
130 push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
131 $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
132 VobPrint("End Comparing \"$strEachFolder\"\n",0);
133 VobPrint("Difference Found: $intNumberOfErrors\n",0);
134 VobPrint ("\n",0);
135 }
136 }
137 return @Errors;
138}
139
140sub InitTest
141{
142
143 my ($strModelCol,$strTestCol,$strColName) = @_;
144 my $intLevel = 1;
145 my @Errors;
146
147 # Testing Log files
148# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
149# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
150#
151# if(-e $strModelLog && -e $strTestLog)
152# {
153# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
154# if($strLogError ne "")
155# {
156# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
157# VobPrint ("$strLogError\n",$intLevel);
158#
159# $strLogError = "$strLogError";
160# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
161# push(@Errors,$strLogError);
162# }
163# else
164# {
165# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
166# }
167# }
168# else
169# {
170# my $strErrorColName;
171# my $strLogError;
172#
173# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
174# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
175#
176# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
177# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
178# VobPrint ("$strLogError\n",$intLevel);
179# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
180#
181# push(@Errors,$strLogError);
182# }
183# VobPrint ("\n",$intLevel);
184
185 # Testing the build.cfg
186 my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
187 my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
188
189 if(-e $strModelBcfg && -e $strTestBcfg)
190 {
191 my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
192 if($strBcfgError ne "")
193 {
194 if( $strOutputFormat eq "xml" ) {
195 print "<build-cfg succeeded=\"no\">\n<message>";
196 } else {
197 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
198 }
199
200 VobPrint ("$strBcfgError",$intLevel);
201 if( $strOutputFormat eq "xml" ) {
202 print "</message></build-cfg>";
203 }
204
205 $strBcfgError = "$strBcfgError";
206 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
207 push(@Errors,$strBcfgError);
208 }
209 else
210 {
211 if( $strOutputFormat eq "xml" ) {
212 print "<build-cfg succeeded=\"yes\"/>";
213 } else {
214 AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
215 }
216 }
217 }
218 else
219 {
220 my $strErrorColName;
221 my $strBcfgError;
222
223 if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
224 if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
225
226 AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
227 $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
228 VobPrint ("$strBcfgError\n",$intLevel);
229 $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
230
231 push(@Errors,$strBcfgError);
232 }
233 VobPrint ("\n",$intLevel);
234
235 # Testing the collect.cfg
236
237 my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
238 my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
239
240 if(-e $strModelCcfg && -e $strTestCcfg)
241 {
242 my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
243 if($strCcfgError ne "")
244 {
245 if( $strOutputFormat eq "xml" ) {
246 print "<collect-cfg succeeded=\"no\"><message>";
247 } else {
248 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
249 }
250
251 VobPrint ("$strCcfgError",$intLevel);
252
253 if( $strOutputFormat eq "xml" ) {
254 print "</message></collect-cfg>";
255 }
256
257 $strCcfgError = "$strCcfgError";
258 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
259 push(@Errors,$strCcfgError);
260 }
261 else
262 {
263 if( $strOutputFormat eq "xml" ) {
264 print "<collect-cfg succeeded=\"yes\"/>";
265 } else {
266 AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
267 }
268 }
269 }
270 else
271 {
272 my $strErrorColName;
273 my $strCcfgError;
274
275 if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
276 if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
277
278 AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
279 $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
280 VobPrint ("$strCcfgError\n",$intLevel);
281 $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
282
283 push(@Errors,$strCcfgError);
284 }
285
286 VobPrint ("\n",$intLevel);
287
288 # Testing databases
289
290 # index
291 my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
292 my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
293 my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); # returns 0 if no error
294 if($strGdbError) {
295 push(@Errors,$strGdbError);
296 }
297
298 # archives
299 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
300 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
301 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
302 if($strGdbError) {
303 push(@Errors,$strGdbError);
304 }
305
306 $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
307 $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
308 $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
309 if($strGdbError) {
310 push(@Errors,$strGdbError);
311 }
312
313 VobPrint ("\n",$intLevel);
314
315 return @Errors;
316}
317
318
319# At present handles gdbm - need to expand to allow for jdbm and other db types
320sub GdbDiff
321{
322 my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName) = @_;
323
324 my $strGdbError = 0;
325
326 if(-e $strModelGdb && -e $strTestGdb)
327 {
328 #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
329 $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb,$strColName);
330 if($strGdbError ne "")
331 {
332 if( $strOutputFormat eq "xml" ) {
333 print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
334 } else {
335 AlignPrint("Database Comparsion Result","Failed",$intLevel);
336 }
337 VobPrint ("$strGdbError\n",$intLevel);
338
339 if( $strOutputFormat eq "xml" ) {
340 print "</message></database>";
341 }
342
343 $strGdbError = "$strGdbError";
344 $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
345 #push(@Errors,$strGdbError);
346 }
347 else
348 {
349 if( $strOutputFormat eq "xml" ) {
350 print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
351 } else {
352 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
353 }
354 }
355 }
356 else
357 {
358 my $strErrorColName;
359
360 if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
361 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
362
363 AlignPrint("Database Comparsion Result","Failed",$intLevel);
364
365 $strGdbError = "Difference Report: No Database files found in $strErrorColName";
366 VobPrint ("$strGdbError\n",$intLevel);
367
368 $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
369
370 }
371
372 return $strGdbError;
373}
374
375sub FullTest
376{
377 my ($strModelCol,$strTestCol,$strColName) = @_;
378 my @Errors = ();
379 my $intLevel = 0;
380 my $intNumberDiffs = 0;
381
382 # <Initial Test>
383 if( $strOutputFormat eq "xml" ) {
384 #print "<initial-test>";
385 } else {
386 VobPrint("Initial Testing Start\n",$intLevel);
387 }
388
389 @Errors = InitTest($strModelCol,$strTestCol,$strColName);
390 $intNumberDiffs = scalar(@Errors);
391
392 if( $strOutputFormat eq "xml" ) {
393 #print "</initial-test>";
394 } else {
395 VobPrint("Initial Testing End\n",$intLevel);
396 VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
397 VobPrint("\n",$intLevel);
398 }
399 # </Initial Test>
400
401 # <Detailed Test>
402 if( $strOutputFormat eq "xml" ) {
403 #print "<detailed-test>";
404 } else {
405 VobPrint("Detail Testing Start\n",$intLevel);
406 }
407 push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
408 $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
409
410 if( $strOutputFormat eq "xml" ) {
411 #print "</detailed-test>";
412 } else {
413 VobPrint("Detail Testing End\n",$intLevel);
414 VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
415 }
416 # </Detailed Test>
417
418 return @Errors;
419}
420#----##
421
422
423#--Other System Utilities
424sub PrintUsage
425{
426 my ($strProgName) = @_;
427 if ( $strOutputFormat eq "xml" ) {
428 print "<error>usage incorrect</error>\n";
429 } else {
430 print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
431 }
432 Help("Error: used incorrect parameters");
433}
434
435sub Help
436{
437 my ($strError) = @_;
438 my $aryptHelps =
439 [ { 'name' => "verbosity",
440 'type' => "scale",
441 'argu' => "a integer" ,
442 'descrip' => "this parameter setup the verbosity of the testing result"},
443 { 'name' => "mode",
444 'type' => "option",
445 'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
446 'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
447 { 'name' => "estop",
448 'type' => "flag",
449 'argu' => "NULL" ,
450 'descrip' => "Set then system will stop once it meet an error"},
451 { 'name' => "eshow",
452 'type' => "flag",
453 'argu' => "NULL" ,
454 'descrip' => "Set then system will show the error summary"}
455 ];
456
457
458 if ( $strOutputFormat ne "xml" ) {
459 print "$strError\n";
460
461 foreach my $hashOneArg (@{$aryptHelps})
462 {
463 print "\n----------------------------\n";
464 print "Parameters: -".$hashOneArg->{"name"}."\n";
465 print "Type: ".$hashOneArg->{"type"}."\n";
466 print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
467 print "Description: ".$hashOneArg->{"descrip"}."\n";
468 print "----------------------------\n";
469 }
470 }
471}
472
473sub OutputStart
474{
475 my ($strColName) = @_;
476 my $intPadding = 17 - length($strColName);
477
478 if ( $strOutputFormat eq "xml" ) {
479 print "<diffcol>\n";
480 } else {
481 print "+---------------------------------------------------------+\n";
482 print "| |\n";
483 print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
484 print "| |\n";
485 print "+---------------------------------------------------------+\n\n";
486 }
487}
488
489sub OutputEnd
490{
491 my ($strColName,$aryptErrors) = @_;
492 my $intPadding = 12 - length($strColName);
493 if ( $strOutputFormat eq "xml" ) {
494 print "</diffcol>\n";
495 } else {
496 print "\n";
497 print "+---------------------------------------------------------+\n";
498 print "| |\n";
499 print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
500 print "| |\n";
501 print "+---------------------------------------------------------+\n\n";
502 }
503
504 my $intTotalErrors = scalar(@{$aryptErrors});
505 if ( $strOutputFormat ne "xml" ) {
506 print "Checking completed, there is $intTotalErrors error(s) found.\n";
507 }
508
509 if($gv_blnErrorShow ne "off")
510 {
511 foreach my $strEachError (@{$aryptErrors})
512 {
513 if ( $strOutputFormat eq "xml" ) {
514 print "<error>";
515 print $strEachError;
516 print "</error>\n";
517 } else {
518 print "+---------------------------------------------------------+\n";
519 print "| Error |\n";
520 print "+---------------------------------------------------------+\n\n";
521 print "$strEachError\n\n";
522 }
523 }
524 }
525 else
526 {
527 if ( $strOutputFormat ne "xml" ) {
528 print "Use -eshow to show the error detail\n\n";
529 }
530 }
531}
532
533sub AlignPrint
534{
535 my ($strMainString,$strStatus,$intLevel) = @_;
536 my $intDot = 100 - length($strMainString) - length($strStatus);
537 VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
538}
539
540
541# this function is only called on DocXMLFiles.
542# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
543# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
544sub isDocXMLFileWindows
545{
546 my ($file_contents) = @_;
547
548 #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
549
550 # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
551 # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
552
553 # for doc.xml:
554 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
555 if($file_contents =~ m@<Metadata name="gsdlsourcefilename">([^>]*)</Metadata>@m) {
556 $gsdlsourcefilename = $1;
557 if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
558 return 1;
559 }
560 }
561
562 return 0;
563}
564
565sub TestEach
566{
567 my ($strModel,$strTest,$intLevel,$strColName) = @_;
568 my @Errors = ();
569
570 $intLevel++;
571 if (-d $strModel && -d $strTest)
572 {
573 my @aryInModel = &diffutil::files_in_dir($strModel);
574 my @aryInTest = &diffutil::files_in_dir($strTest);
575
576 # Files to be skipped because they get generated on one OS but not the other
577 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
578 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
579 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
580 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
581
582 # Now check all remaining files in the folder exist in both model and test collections
583 my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
584 my @aryCorrectFiles = @{$aryTwoPointers[1]};
585 @Errors = @{$aryTwoPointers[0]};
586
587 if(scalar(@Errors) == 0)
588 {
589 foreach my $strEachFile (@aryInModel)
590 {
591 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
592 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
593 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
594 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
595 {
596 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
597 }
598 else
599 {
600 if ( $strOutputFormat eq "xml" ) {
601 print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
602 } else {
603 VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
604 }
605 }
606 }
607 }
608 else
609 {
610 foreach my $strEachFile (@aryCorrectFiles)
611 {
612 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
613 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
614 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
615 {
616 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
617 }
618 }
619 }
620 if($intLevel == $gv_intVerbosity)
621 {
622 if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
623 else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
624 }
625 }
626 else
627 {
628 # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
629 my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize)\">.*</(.*?:)?Metadata>\\s*\\n*";
630
631 my $strResult;
632
633 # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
634 # in case these don't appear in the same order between collections, since
635 # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
636 # when they can show up as unnecessary diff 'errors'
637
638 my ($model_contents, $test_contents);
639
640 if($strModel =~ m/doc(mets)?\.xml$/) {
641
642 open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
643 sysread(FIN, $model_contents, -s FIN);
644 close(FIN);
645 open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
646 sysread(FIN, $test_contents, -s FIN);
647 close(FIN);
648
649 $model_contents =~ s/$ignore_line_re//g;
650 $test_contents =~ s/$ignore_line_re//g;
651
652
653 # doc.xml needs to additionally be normalised, before comparing a windows test with a linux model or vice-versa
654 if($strModel =~ m/doc\.xml$/) {
655 # equalise/normalise the two doc.xml files for OS differences, if there are any
656 my $testIsWin = &isDocXMLFileWindows($test_contents);
657 my $modelIsWin = &isDocXMLFileWindows($model_contents);
658
659 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
660
661 my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
662 my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
663
664 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
665 $$win_contents =~ s@[\r]@@g;
666
667 # make all single windows slashes into single unix slashes
668 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
669 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
670 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
671
672 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
673 # $$win_contents =~ s@\r@\n@mg;
674
675 # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
676 # containing solitary carriage returns instead of linefeed
677 $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
678
679
680 # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
681 # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
682 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
683 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
684
685 # Advanced Beatles collection,
686 # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
687 # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
688 # Normalising to windows version for doing a diff
689 $$lin_contents =~ s@href=http:///@href=http://@g;
690 }
691
692
693 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
694 # these tmpdirs are located inside the collection directory
695 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
696 $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
697
698 # remove all absolute paths upto collect folder from <Metadata /> elements
699 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
700 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
701
702 # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
703 # These tmpdirs are located inside the toplevel *greenstone* directory
704 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
705 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
706 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
707
708 if($test_contents =~ m@$tmpfile_regex@) {
709 # found a match, replace the tmp file name with "random", keeping the original file extension
710 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
711
712 my ($old_tmp_filename, $ext) = ($1, $2);
713 my $new_tmp_filename = "random";
714
715 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
716 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
717 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
718 if($5) {
719 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
720 } else { # OrigSource contains only the filename
721 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
722 }
723
724 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
725 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
726 if($5) {
727 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
728 } else { # OrigSource contains only the filename
729 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
730 }
731 }
732
733# my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
734# if($strModel =~ m/(HASH010d.dir)/) { # list the HASH dirs for which you want the doc.xml file generated
735# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
736# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
737# }
738
739 } # finished special processing of doc.xml files
740
741 # now can diff the normalised versions of the doc.xml/docmets.xml files:
742 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
743
744 } else {
745 $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
746 }
747
748 # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
749 # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
750 $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
751
752 #$strResult = GeneralOutput($strResult);
753 if ( $strOutputFormat eq "xml" ) {
754 #
755 } else {
756 VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
757 }
758 if ($strResult eq "")
759 {
760 if ( $strOutputFormat eq "xml" ) {
761 print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
762 } else {
763 AlignPrint("Comparing File","Succeed",$intLevel);
764 }
765 }
766 else
767 {
768# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
769
770 my $strOutput = "Difference Report:\n$strResult\n";
771 if ( $strOutputFormat eq "xml" ) {
772 print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
773 } else {
774 AlignPrint("Comparing File","Failed",$intLevel);
775 }
776
777 #$result=`file -b $strModel`; # linux specific test for binary file
778 $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
779 if ( "$result" =~ "data" ) {
780 VobPrint( "These binary files differ", $intLevel );
781 } else {
782 VobPrint ( "$strOutput" , $intLevel);
783 }
784
785
786 if ( $strOutputFormat eq "xml" ) {
787 print "</message></file-comparison>";
788 }
789
790 if($gv_blnErrorStop ne "off") { exit; }
791 push(@Errors,"File content comparison failed($strModel):\n$strOutput");
792 }
793 }
794
795 return @Errors;
796}
797
798
799sub FolderTesting
800{
801 my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
802 my %hashCount = ();
803 my @Errors = ();
804 my @CorrectFiles = ();
805 my @TwoPointers = (\@Errors,\@CorrectFiles);
806
807 if ( $strOutputFormat eq "xml" ) {
808 #print "<folder-comparison location=\"$strModelFolder\">\n";
809 } else {
810 VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
811 }
812
813 foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
814 foreach my $strEachItem (@$aryptTest)
815 {
816 if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
817 else {$hashCount{$strEachItem} = 'T';}
818 }
819
820 if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
821 {
822 if ( $strOutputFormat eq "xml" ) {
823 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
824 } else {
825 AlignPrint("Folder Comparsion","Succeed",$intLevel);
826 }
827 return @TwoPointers;
828 }
829 else
830 {
831 if ( $strOutputFormat eq "xml" ) {
832 print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
833 } else {
834 AlignPrint("Folder Comparsion","Failed",$intLevel);
835 }
836
837 foreach my $strEachItem (keys %hashCount)
838 {
839 if($hashCount{$strEachItem} ne 'B')
840 {
841 my $strOutput = "";
842 my $strReport = "";
843
844 if($hashCount{$strEachItem} eq 'M')
845 {
846 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
847 $strReport = "Difference Report: difference found at $strTestFolder";
848 }
849 elsif($hashCount{$strEachItem} eq 'T')
850 {
851 $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
852 $strReport = "Difference Report: difference found at $strModelFolder";
853 }
854 else {die "Error occours in diffcol_mk2::TestingFolder\n"}
855
856 VobPrint ("$strOutput\n",$intLevel);
857 $strOutput = $strOutput."\n\t".$strReport."\n";
858 push(@Errors,$strOutput);
859 }
860 else {push(@CorrectFiles,$strEachItem);}
861 }
862 if( $strOutputFormat eq "xml" ) {
863 print "</message></folder-comparison>";
864 }
865
866 return @TwoPointers;
867 }
868}
869
870sub VobPrint
871{
872 my ($strOutput, $intLevel) = @_;
873 my $strTab = "";
874 my $intTab = int($intLevel/2);
875 if($intLevel <= $gv_intVerbosity)
876 {
877 if($intLevel >= 1)
878 {
879 $strTab = "\t"x($intTab+1);
880 $strOutput =~ s/\n$//;
881 $strOutput =~ s/\n/\n$strTab/g;
882 #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
883 }
884
885 if( $strOutputFormat eq "xml" ) {
886 $strOutput =~ s/&/&amp;/g;
887 $strOutput =~ s/</&lt;/g;
888 $strOutput =~ s/>/&gt;/g;
889 }
890
891 if ( length( $strOutput ) > 1000 ) {
892 $strOutput = substr( $strOutput, 0, 978);
893
894 # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt;
895 if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&amp$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
896 if(defined $1 && $1) {
897 my $rest = $1;
898 if($rest =~ m/^a/) {
899 $strOutput =~ s@am?p?$@amp;@;
900 }
901 elsif($rest eq "g" || $rest eq "l") {
902 $strOutput .= "t;"; # close the known tag
903 }
904 elsif($rest eq "gt" || $rest eq "lt") {
905 $strOutput .= ";";
906 }
907 } else { # & on its own
908 #$strOutput = substr( $strOutput, 0, 977); # lop off the &
909 $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
910 # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
911 }
912 }
913 $strOutput .= "... (output truncated)";
914 }
915
916
917 print $strTab.$strOutput."\n";
918 }
919}
920#----##
921
922
923#--Main System----------------------------
924#-----------------------------------------
925# Name: main
926# Perameters: arguments from command line
927# Pre-condition: testing will start by calling this main function.
928# Post-condition: output the test results for one or more collections.
929#-----------------------------------------
930sub main
931{
932 my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode);
933 my $strProgName = $0;
934 my $intArgc = scalar(@ARGV);
935
936 #--System Arguments Setup
937 if (!parsargv::parse(\@ARGV,
938 'estop//off', \$strErrorStop,
939 'eshow//off', \$strErrorShow,
940 'verbosity/\d+/1', \$intVerbosity,
941 'mode/[\w\-]+/all', \$strMode,
942 'output/[\w\-]+/text', \$strOutputFormat
943 )) {
944 PrintUsage($strProgName);
945 die "\n";
946 }
947
948 if ($intArgc<1) {
949 PrintUsage($strProgName);
950 die "\n";
951 }
952
953 $gv_blnErrorStop = $strErrorStop;
954 $gv_blnErrorShow = $strErrorShow;
955 $gv_intVerbosity = $intVerbosity;
956 $gv_strMode = SetMode($strMode);
957
958 #----##
959
960 #--Collection(s) Testing
961 foreach $strColName (@ARGV)
962 {
963 my @ErrorsInEachCol;
964 my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
965 my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
966
967 #--Output(Start)
968 OutputStart($strColName);
969 #----##
970
971 if(-e $strModelCol && -e $strTestCol )
972 {
973
974 #--Individual Testing
975 if ($gv_strMode eq "Individual")
976 {
977 @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
978 }
979 #----##
980
981 #--Initial Testing
982 elsif ($gv_strMode eq "Initial")
983 {
984 @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
985 }
986 #----##
987
988 #--Full Testing
989 elsif ($gv_strMode eq "Full")
990 {
991 @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
992 }
993 #----##
994
995 #--Error Checking
996 else
997 {
998 if ( $strOutputFormat eq "xml" ) {
999 die "<error>Error occoured in main function</error>\n";
1000 } else {
1001 die "Error occoured in main function.\n";
1002 }
1003 }
1004 #----##
1005
1006 }
1007 else
1008 {
1009 if( $strOutputFormat eq "xml" ) {
1010 die "<error>Cannot find collection: $strColName</error>\n";
1011 } else {
1012 die "Error: cannot find collection: $strColName\n";
1013 }
1014 }
1015 #----##
1016
1017 #--Output(Results and Errors)
1018 OutputEnd($strColName,\@ErrorsInEachCol);
1019 #----##
1020
1021 }
1022}
1023#----##
1024
1025&main();
Note: See TracBrowser for help on using the repository browser.