#!/usr/bin/perl -w
#TODO: Individual Testing
###########################################################################
#
# test.pl -- for testing is built collection is consistent with model collection
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
package diffcol_mk2;
BEGIN {
die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
}
use parsargv;
use util;
use FileUtils;
use logdiff;
use cfgdiff;
use gdbdiff;
use diffutil;
use Text::Diff;
use Cwd;
#--Global Variables Declaration-----------
my ($gv_test_os, $gv_model_os); # still just file globals
if($ENV{'GSDL3SRCHOME'}) {
$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
} else {
$gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
$gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
}
$gv_blnDebugging = 0;
$gv_blnErrorStop = "false";
$gv_blnErrorShow = "false";
$gv_intVerbosity = 0;
$gv_strMode = "Full";
$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
%gv_IndivList = ("archives" => 0,
"etc" => 0,
"images" => 0,
"building" => 0,
"import" => 0,
"index" => 0,
"log" => 0,
"metadata" => 0,
"perllib" => 0,
"temp" => 0);
#----##
#--System Setup---------------------------
sub SetMode
{
my ($strModeList) = @_;
$strModeList =~ s/\|/ /g;
my @Modes = split(" ",$strModeList);
my $blnIndividual = "true";
my $blnInitial = "false";
my $blnFull = "false";
foreach $strEachMode (@Modes)
{
if($strEachMode eq "all")
{
$blnFull = "true";
$blnIndividual = "false";
}
elsif($strEachMode eq "init")
{
$blnInitial = "true";
$blnIndividual = "false";
}
else
{
if(defined $gv_IndivList{$strEachMode})
{
$gv_IndivList{$strEachMode} = 1;
}
else
{
die Help("Error: used undefined mode");
}
}
}
if($blnFull eq "true") {return "Full";}
elsif($blnInitial eq "true") {return "Initial";}
elsif($blnIndividual eq "true") {return "Individual";}
else {die "Error occured in function SetMode!!\n";}
}
#----##
#--System Process-------------------------
sub IndivTest
{
my ($strModelCol,$strTestCol,$strColName) = @_;
my @Errors = ();
my $intNumberOfErrors = 0;
foreach $strEachFolder (keys %gv_IndivList)
{
if($gv_IndivList{$strEachFolder} == 1)
{
VobPrint("Start Comparing \"$strEachFolder\"\n",0);
my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
$intNumberOfErrors = scalar(@Errors);
push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
$intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
VobPrint("End Comparing \"$strEachFolder\"\n",0);
VobPrint("Difference Found: $intNumberOfErrors\n",0);
VobPrint ("\n",0);
}
}
return @Errors;
}
sub InitTest
{
my ($strModelCol,$strTestCol,$strColName) = @_;
my $intLevel = 1;
my @Errors;
# Testing Log files
# my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
# my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
#
# if(-e $strModelLog && -e $strTestLog)
# {
# my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
# if($strLogError ne "")
# {
# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
# VobPrint ("$strLogError\n",$intLevel);
#
# $strLogError = "$strLogError";
# $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
# push(@Errors,$strLogError);
# }
# else
# {
# AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
# }
# }
# else
# {
# my $strErrorColName;
# my $strLogError;
#
# if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
# if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
#
# AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
# $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
# VobPrint ("$strLogError\n",$intLevel);
# $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
#
# push(@Errors,$strLogError);
# }
# VobPrint ("\n",$intLevel);
# Testing the build.cfg
my $strModelBcfg = &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
my $strTestBcfg = &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
if(-e $strModelBcfg && -e $strTestBcfg)
{
my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
if($strBcfgError ne "")
{
if( $strOutputFormat eq "xml" ) {
print "\n";
} else {
AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
}
VobPrint ("$strBcfgError",$intLevel);
if( $strOutputFormat eq "xml" ) {
print "";
}
$strBcfgError = "$strBcfgError";
$strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
push(@Errors,$strBcfgError);
}
else
{
if( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
}
}
}
else
{
my $strErrorColName;
my $strBcfgError;
if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
$strBcfgError = "Difference Report: No Config files found in $strErrorColName";
VobPrint ("$strBcfgError\n",$intLevel);
$strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
push(@Errors,$strBcfgError);
}
VobPrint ("\n",$intLevel);
# Testing the collect.cfg
my $strModelCcfg = &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
my $strTestCcfg = &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
if(-e $strModelCcfg && -e $strTestCcfg)
{
my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
if($strCcfgError ne "")
{
if( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
}
VobPrint ("$strCcfgError",$intLevel);
if( $strOutputFormat eq "xml" ) {
print "";
}
$strCcfgError = "$strCcfgError";
$strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
push(@Errors,$strCcfgError);
}
else
{
if( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
}
}
}
else
{
my $strErrorColName;
my $strCcfgError;
if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
$strCcfgError = "Difference Report: No Config files found in $strErrorColName";
VobPrint ("$strCcfgError\n",$intLevel);
$strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
push(@Errors,$strCcfgError);
}
VobPrint ("\n",$intLevel);
# Testing databases
# index
my $strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
my $strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");
my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
if($strGdbError) {
push(@Errors,$strGdbError);
}
# archives
$strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
$strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");
$strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
if($strGdbError) {
push(@Errors,$strGdbError);
}
$strModelGdb = &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
$strTestGdb = &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
$strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
if($strGdbError) {
push(@Errors,$strGdbError);
}
VobPrint ("\n",$intLevel);
return @Errors;
}
# At present handles gdbm - need to expand to allow for jdbm and other db types
sub GdbDiff
{
my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
my $strGdbError = 0;
if(-e $strModelGdb && -e $strTestGdb)
{
#my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
$strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
if($strGdbError ne "")
{
if( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Database Comparsion Result","Failed",$intLevel);
}
VobPrint ("$strGdbError\n",$intLevel);
if( $strOutputFormat eq "xml" ) {
print "";
}
$strGdbError = "$strGdbError";
$strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
#push(@Errors,$strGdbError);
}
else
{
if( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Database Comparsion Result","Succeed",$intLevel);
}
}
}
else
{
my $strErrorColName;
if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
AlignPrint("Database Comparsion Result","Failed",$intLevel);
$strGdbError = "Difference Report: No Database files found in $strErrorColName";
VobPrint ("$strGdbError\n",$intLevel);
$strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
}
return $strGdbError;
}
sub FullTest
{
my ($strModelCol,$strTestCol,$strColName) = @_;
my @Errors = ();
my $intLevel = 0;
my $intNumberDiffs = 0;
#
if( $strOutputFormat eq "xml" ) {
#print "";
} else {
VobPrint("Initial Testing Start\n",$intLevel);
}
@Errors = InitTest($strModelCol,$strTestCol,$strColName);
$intNumberDiffs = scalar(@Errors);
if( $strOutputFormat eq "xml" ) {
#print "";
} else {
VobPrint("Initial Testing End\n",$intLevel);
VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
VobPrint("\n",$intLevel);
}
#
#
if( $strOutputFormat eq "xml" ) {
#print "";
} else {
VobPrint("Detail Testing Start\n",$intLevel);
}
push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
$intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
if( $strOutputFormat eq "xml" ) {
#print "";
} else {
VobPrint("Detail Testing End\n",$intLevel);
VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
}
#
return @Errors;
}
#----##
#--Other System Utilities
sub PrintUsage
{
my ($strProgName) = @_;
if ( $strOutputFormat eq "xml" ) {
print "usage incorrect\n";
} else {
print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop] [-debug]\n";
}
Help("Error: used incorrect parameters");
}
sub Help
{
my ($strError) = @_;
my $aryptHelps =
[ { 'name' => "verbosity",
'type' => "scale",
'argu' => "a integer" ,
'descrip' => "this parameter setup the verbosity of the testing result"},
{ 'name' => "mode",
'type' => "option",
'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
{ 'name' => "estop",
'type' => "flag",
'argu' => "NULL" ,
'descrip' => "Set then system will stop once it meets an error"},
{ 'name' => "eshow",
'type' => "flag",
'argu' => "NULL" ,
'descrip' => "Set then system will show the error summary"},
{ 'name' => "debug",
'type' => "flag",
'argu' => "NULL" ,
'descrip' => "If set, stores intermediate diff files in toplevel diffcol folder"}
];
if ( $strOutputFormat ne "xml" ) {
print "$strError\n";
foreach my $hashOneArg (@{$aryptHelps})
{
print "\n----------------------------\n";
print "Parameters: -".$hashOneArg->{"name"}."\n";
print "Type: ".$hashOneArg->{"type"}."\n";
print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
print "Description: ".$hashOneArg->{"descrip"}."\n";
print "----------------------------\n";
}
}
}
sub OutputStart
{
my ($strColName) = @_;
my $intPadding = 17 - length($strColName);
if ( $strOutputFormat eq "xml" ) {
print "\n";
} else {
print "+---------------------------------------------------------+\n";
print "| |\n";
print "| Start Testing Collection: $strColName"," " x $intPadding,"|\n";
print "| |\n";
print "+---------------------------------------------------------+\n\n";
}
}
sub OutputEnd
{
my ($strColName,$aryptErrors) = @_;
my $intPadding = 12 - length($strColName);
if ( $strOutputFormat eq "xml" ) {
print "\n";
} else {
print "\n";
print "+---------------------------------------------------------+\n";
print "| |\n";
print "| Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
print "| |\n";
print "+---------------------------------------------------------+\n\n";
}
my $intTotalErrors = scalar(@{$aryptErrors});
if ( $strOutputFormat ne "xml" ) {
print "Checking completed, there is $intTotalErrors error(s) found.\n";
}
if($gv_blnErrorShow ne "off")
{
foreach my $strEachError (@{$aryptErrors})
{
if ( $strOutputFormat eq "xml" ) {
print "";
print $strEachError;
print "\n";
} else {
print "+---------------------------------------------------------+\n";
print "| Error |\n";
print "+---------------------------------------------------------+\n\n";
print "$strEachError\n\n";
}
}
}
else
{
if ( $strOutputFormat ne "xml" ) {
print "Use -eshow to show the error detail\n\n";
}
}
}
sub AlignPrint
{
my ($strMainString,$strStatus,$intLevel) = @_;
my $intDot = 100 - length($strMainString) - length($strStatus);
VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
}
# this function is only called on DocXMLFiles.
# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
sub isDocOrMETSXMLFileWindows
{
my ($file_contents) = @_;
#return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
# Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
# what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
# for doc.xml:
# import/html_files/cleves.html
if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)(.*?:)?Metadata>@m) {
$gsdlsourcefilename = $2;
if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
return 1;
}
} elsif($file_contents =~ m@]*)?>@) { # windows slashes detected in doc.xml in index/text/HASHxxx.dir
return 1;
}
return 0;
}
sub TestEach
{
my ($strModel,$strTest,$intLevel,$strColName) = @_;
my @Errors = ();
$intLevel++;
if (-d $strModel && -d $strTest)
{
my @aryInModel = &diffutil::files_in_dir($strModel);
my @aryInTest = &diffutil::files_in_dir($strTest);
# Files to be skipped because they get generated on one OS but not the other
# On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
@aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
@aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
# Now check all remaining files in the folder exist in both model and test collections
my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
my @aryCorrectFiles = @{$aryTwoPointers[1]};
@Errors = @{$aryTwoPointers[0]};
if(scalar(@Errors) == 0)
{
foreach my $strEachFile (@aryInModel)
{
my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
# now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file # || $strEachFile =~ m/\~$/g to get rid of ~ files
{
push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
}
else
{
if ( $strOutputFormat eq "xml" ) {
print "";
} else {
VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
}
}
}
}
else
{
foreach my $strEachFile (@aryCorrectFiles)
{
my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # || $strEachFile =~ m/\~$/g to get rid of ~ files
{
push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
}
}
}
if($intLevel == $gv_intVerbosity)
{
if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
else { AlignPrint("Contents Comparsion","Failed",$intLevel);}
}
}
else
{
# allow for a namespace prefix to as happens in GreenstoneMETS docmets.xml files, e.g.
my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*(.*?:)?Metadata>\\s*\\n*";
my $strResult;
# for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
# in case these don't appear in the same order between collections, since
# diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
# when they can show up as unnecessary diff 'errors'
my ($model_contents, $test_contents);
# archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
sysread(FIN, $model_contents, -s FIN);
close(FIN);
open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
sysread(FIN, $test_contents, -s FIN);
close(FIN);
$model_contents =~ s/$ignore_line_re//g;
$test_contents =~ s/$ignore_line_re//g;
# equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
# before comparing a windows test with a linux model or vice-versa
my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
# remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
$$win_contents =~ s@[\r]@@g;
# make all single windows slashes into single unix slashes
# the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
$$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
$$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
# make windows \r newlines into constant \n newlines. Already handled when \r got replaced
#$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
#FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
# $$win_contents =~ s@\r@\n@mg;
if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
# remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
# containing solitary carriage returns instead of linefeed
$$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
# make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
# since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
# Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
$$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
$$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
# Advanced Beatles collection,
# linux version contains: IMG SRC=_httpextlink_&rl=1&href=http:///\\"http://www.boskowan.com/ (extra / slash)
# while windows contains: IMG SRC=_httpextlink_&rl=1&href=http://\\"http://www.boskowan.com/
# Normalising to windows version for doing a diff
$$lin_contents =~ s@href=http:///@href=http://@g;
}
}
# processing particular to doc.xml
if($strModel =~ m/doc\.xml$/) {
# tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
# these tmpdirs are located inside the collection directory
$model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
$test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
# remove all absolute paths upto collect folder from elements
$model_contents =~ s@((http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
$test_contents =~ s@((http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
# The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
# These tmpdirs are located inside the toplevel *greenstone* directory
(my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
$gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
my $tmpfile_regex = "http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
if($test_contents =~ m@$tmpfile_regex@) {
# found a match, replace the tmp file name with "random", keeping the original file extension
# in
my ($old_tmp_filename, $ext) = ($1, $2);
my $new_tmp_filename = "random";
## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
#$tmpfile_regex = "((http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext)";
$tmpfile_regex = "((http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext)";
if($5) {
$test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
} else { # OrigSource contains only the filename
$test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
}
# modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
$tmpfile_regex = "((http://)?)(.*)?(/tmp/)?.*?($ext)";
if($5) {
$model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
} else { # OrigSource contains only the filename
$model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
}
}
} # finished special processing of doc.xml files
if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory
&gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
&gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
# if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
# &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
# &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
# }
}
# now can diff the normalised versions of the doc.xml/docmets.xml files:
$strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
} else {
$strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
}
# The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
# these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
$strResult = &diffutil::GenerateOutput($strResult,"^\\s*.*\\s*\$");
#$strResult = GeneralOutput($strResult);
if ( $strOutputFormat eq "xml" ) {
#
} else {
VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
}
if ($strResult eq "")
{
if ( $strOutputFormat eq "xml" ) {
print "\n";
} else {
AlignPrint("Comparing File","Succeed",$intLevel);
}
}
else
{
# print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
my $strOutput = "Difference Report:\n$strResult\n";
if ( $strOutputFormat eq "xml" ) {
print "";
} else {
AlignPrint("Comparing File","Failed",$intLevel);
}
#$result=`file -b $strModel`; # linux specific test for binary file
$result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
if ( "$result" =~ "data" ) {
VobPrint( "These binary files differ", $intLevel );
} else {
VobPrint ( "$strOutput" , $intLevel);
}
if ( $strOutputFormat eq "xml" ) {
print "";
}
if($gv_blnErrorStop ne "off") { exit; }
push(@Errors,"File content comparison failed($strModel):\n$strOutput");
}
}
return @Errors;
}
sub FolderTesting
{
my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
my %hashCount = ();
my @Errors = ();
my @CorrectFiles = ();
my @TwoPointers = (\@Errors,\@CorrectFiles);
if ( $strOutputFormat eq "xml" ) {
#print "\n";
} else {
VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
}
foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
foreach my $strEachItem (@$aryptTest)
{
if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
else {$hashCount{$strEachItem} = 'T';}
}
if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
{
if ( $strOutputFormat eq "xml" ) {
print "\n";
} else {
AlignPrint("Folder Comparsion","Succeed",$intLevel);
}
return @TwoPointers;
}
else
{
if ( $strOutputFormat eq "xml" ) {
print "\n";
} else {
AlignPrint("Folder Comparsion","Failed",$intLevel);
}
foreach my $strEachItem (keys %hashCount)
{
if($hashCount{$strEachItem} ne 'B')
{
my $strOutput = "";
my $strReport = "";
if($hashCount{$strEachItem} eq 'M')
{
$strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
$strReport = "Difference Report: difference found at $strTestFolder";
}
elsif($hashCount{$strEachItem} eq 'T')
{
$strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
$strReport = "Difference Report: difference found at $strModelFolder";
}
else {die "Error occours in diffcol_mk2::TestingFolder\n"}
VobPrint ("$strOutput\n",$intLevel);
$strOutput = $strOutput."\n\t".$strReport."\n";
push(@Errors,$strOutput);
}
else {push(@CorrectFiles,$strEachItem);}
}
if( $strOutputFormat eq "xml" ) {
print "";
}
return @TwoPointers;
}
}
sub VobPrint
{
my ($strOutput, $intLevel) = @_;
my $strTab = "";
my $intTab = int($intLevel/2);
if($intLevel <= $gv_intVerbosity)
{
if($intLevel >= 1)
{
$strTab = "\t"x($intTab+1);
$strOutput =~ s/\n$//;
$strOutput =~ s/\n/\n$strTab/g;
#$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
}
if( $strOutputFormat eq "xml" ) {
$strOutput =~ s/&/&/g;
$strOutput =~ s/</g;
$strOutput =~ s/>/>/g;
}
if ( length( $strOutput ) > 1000 ) {
$strOutput = substr( $strOutput, 0, 978);
# make sure there are no stray ampersands/partial ampersands that need to be completed as < or > or &
if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # < => < or &g => > or &a(m)=> & or & => &
if(defined $1 && $1) {
my $rest = $1;
if($rest =~ m/^a/) {
$strOutput =~ s@am?p?$@amp;@;
}
elsif($rest eq "g" || $rest eq "l") {
$strOutput .= "t;"; # close the known tag
}
elsif($rest eq "gt" || $rest eq "lt") {
$strOutput .= ";";
}
} else { # & on its own
#$strOutput = substr( $strOutput, 0, 977); # lop off the &
$strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &)
# but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
}
}
$strOutput .= "... (output truncated)";
}
print $strTab.$strOutput."\n";
}
}
#----##
#--Main System----------------------------
#-----------------------------------------
# Name: main
# Perameters: arguments from command line
# Pre-condition: testing will start by calling this main function.
# Post-condition: output the test results for one or more collections.
#-----------------------------------------
sub main
{
my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os,$debugging);
my $strProgName = $0;
my $intArgc = scalar(@ARGV);
#--System Arguments Setup
if (!parsargv::parse(\@ARGV,
'estop//off', \$strErrorStop,
'eshow//off', \$strErrorShow,
'debug', \$debugging,
'verbosity/\d+/1', \$intVerbosity,
'mode/[\w\-]+/all', \$strMode,
'output/[\w\-]+/text', \$strOutputFormat,
'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
)) {
PrintUsage($strProgName);
die "\n";
}
if ($intArgc<1) {
PrintUsage($strProgName);
die "\n";
}
$gv_test_os = $test_os; # if not specified, defaults to "compute"
$gv_model_os = $model_os; # tends to be linux
$gv_blnDebugging = $debugging;
$gv_blnErrorStop = $strErrorStop;
$gv_blnErrorShow = $strErrorShow;
$gv_intVerbosity = $intVerbosity;
$gv_strMode = SetMode($strMode);
#----##
# To find out what version of perl we're using
if( $strOutputFormat eq "xml" ) {
my $perloutput = `perl -v`;
$perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
$ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}" if $ENV{'PERLPATH'};
print "Perl version: $perloutput\n"; # die "Perl version: $perloutput\n";
}
#--Collection(s) Testing
foreach $strColName (@ARGV)
{
my @ErrorsInEachCol;
my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
#--Output(Start)
OutputStart($strColName);
#----##
if(-e $strModelCol && -e $strTestCol )
{
#--Individual Testing
if ($gv_strMode eq "Individual")
{
@ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
}
#----##
#--Initial Testing
elsif ($gv_strMode eq "Initial")
{
@ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
}
#----##
#--Full Testing
elsif ($gv_strMode eq "Full")
{
@ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
}
#----##
#--Error Checking
else
{
if ( $strOutputFormat eq "xml" ) {
die "Error occoured in main function\n";
} else {
die "Error occoured in main function.\n";
}
}
#----##
}
else
{
if( $strOutputFormat eq "xml" ) {
die "Cannot find collection: $strColName\n";
} else {
die "Error: cannot find collection: $strColName\n";
}
}
#----##
#--Output(Results and Errors)
OutputEnd($strColName,\@ErrorsInEachCol);
#----##
}
}
#----##
&main();