root/other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl @ 29432

Revision 29432, 35.5 KB (checked in by sjs49, 5 years ago)

Need double escaping on html entities for less than and greater than symbols for the errors in the multimedia collection to be converted to html properly.

Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67        "etc" => 0,
68        "images" => 0,
69        "building" => 0,
70        "import" => 0,
71        "index" => 0,
72        "log" => 0,
73        "metadata" => 0,
74        "perllib" => 0,
75        "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81    my ($strModeList) = @_;
82    $strModeList =~ s/\|/ /g;
83    my @Modes = split(" ",$strModeList);
84
85    my $blnIndividual = "true";
86    my $blnInitial = "false";
87    my $blnFull = "false";
88
89
90    foreach $strEachMode (@Modes)
91    {
92        if($strEachMode eq "all")
93        {
94            $blnFull = "true";
95            $blnIndividual = "false";
96        }
97        elsif($strEachMode eq "init")
98        {
99            $blnInitial = "true";
100            $blnIndividual = "false";
101        }
102        else
103        {
104            if(defined $gv_IndivList{$strEachMode})
105            {
106                $gv_IndivList{$strEachMode} = 1;
107            }
108            else
109            {
110                die Help("Error: used undefined mode");
111            }
112        }
113    }
114
115    if($blnFull eq "true") {return "Full";}
116    elsif($blnInitial eq "true") {return "Initial";}
117    elsif($blnIndividual eq "true") {return "Individual";}
118    else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125    my ($strModelCol,$strTestCol,$strColName) = @_;
126    my @Errors = ();
127    my $intNumberOfErrors = 0;
128    foreach $strEachFolder (keys %gv_IndivList)
129    {
130        if($gv_IndivList{$strEachFolder} == 1)
131        {
132           
133            VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134            my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135            my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136            $intNumberOfErrors = scalar(@Errors);
137            push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138            $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139            VobPrint("End Comparing \"$strEachFolder\"\n",0);
140            VobPrint("Difference Found: $intNumberOfErrors\n",0);
141            VobPrint ("\n",0);
142        }
143    }
144    return @Errors;
145}
146
147sub InitTest
148{
149
150    my ($strModelCol,$strTestCol,$strColName) = @_;
151    my $intLevel = 1;
152    my @Errors;
153   
154    # Testing Log files
155#   my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156#   my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#   
158#   if(-e $strModelLog && -e $strTestLog)
159#   {
160#   my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161#   if($strLogError ne "")
162#   {
163#       AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164#       VobPrint ("$strLogError\n",$intLevel);
165#
166#       $strLogError = "$strLogError";
167#       $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168#       push(@Errors,$strLogError);
169#   }
170#   else
171#   {
172#       AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173#   }
174#   }
175#   else
176#   {
177#   my $strErrorColName;
178#   my $strLogError;
179#   
180#   if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181#   if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183#   AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184#   $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185#   VobPrint ("$strLogError\n",$intLevel);
186#   $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188#   push(@Errors,$strLogError);
189#   }
190#   VobPrint ("\n",$intLevel);
191   
192    # Testing the build.cfg
193    my $strModelBcfg =  &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194    my $strTestBcfg =  &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196    if(-e $strModelBcfg && -e $strTestBcfg)
197    {
198        my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199        if($strBcfgError ne "")
200        {
201            if( $strOutputFormat eq "xml" ) {
202                print "<build-cfg succeeded=\"no\">\n<message>";
203            } else {
204                AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205            }
206
207            VobPrint ("$strBcfgError",$intLevel);
208            if( $strOutputFormat eq "xml" ) {
209                print "</message></build-cfg>";
210            }
211
212            $strBcfgError = "$strBcfgError";
213            $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214            push(@Errors,$strBcfgError);
215        }
216        else
217        {
218            if( $strOutputFormat eq "xml" ) {
219                print "<build-cfg succeeded=\"yes\"/>";
220            } else {
221                AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222            }
223        }
224    }
225    else
226    {
227        my $strErrorColName;
228        my $strBcfgError;
229       
230        if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231        if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233        AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234        $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235        VobPrint ("$strBcfgError\n",$intLevel);
236        $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238        push(@Errors,$strBcfgError);
239    }
240    VobPrint ("\n",$intLevel);
241
242    # Testing the collect.cfg
243
244    my $strModelCcfg =  &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245    my $strTestCcfg =  &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246   
247    if(-e $strModelCcfg && -e $strTestCcfg)
248    {
249        my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250        if($strCcfgError ne "")
251        {
252            if( $strOutputFormat eq "xml" ) {
253                print "<collect-cfg succeeded=\"no\"><message>";
254            } else {
255                AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256            }
257
258            VobPrint ("$strCcfgError",$intLevel);
259
260            if( $strOutputFormat eq "xml" ) {
261                print "</message></collect-cfg>";
262            }
263
264            $strCcfgError = "$strCcfgError";
265            $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266            push(@Errors,$strCcfgError);
267        }
268        else
269        {
270            if( $strOutputFormat eq "xml" ) {
271                print "<collect-cfg succeeded=\"yes\"/>";
272            } else {
273                AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274            }
275        }
276    }
277    else
278    {
279        my $strErrorColName;
280        my $strCcfgError;
281
282        if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283        if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285        AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286        $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287        VobPrint ("$strCcfgError\n",$intLevel);
288        $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289   
290        push(@Errors,$strCcfgError);
291    }
292
293    VobPrint ("\n",$intLevel);
294   
295    # Testing databases
296
297    # index
298    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
299    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");   
300    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
301    if($strGdbError) {
302        push(@Errors,$strGdbError);
303    }
304   
305    # archives
306    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
307    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");   
308    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
309    if($strGdbError) {
310        push(@Errors,$strGdbError);
311    }
312   
313    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
314        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
315        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316        if($strGdbError) {
317            push(@Errors,$strGdbError);
318        }
319
320    VobPrint ("\n",$intLevel);
321
322    return @Errors;
323}
324
325
326# At present handles gdbm - need to expand to allow for jdbm and other db types
327sub GdbDiff
328{
329    my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
330
331    my $strGdbError = 0;
332
333    if(-e $strModelGdb && -e $strTestGdb)
334    {
335    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
336    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
337    if($strGdbError ne "")
338    {
339        if( $strOutputFormat eq "xml" ) {
340        print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
341        } else {
342        AlignPrint("Database Comparsion Result","Failed",$intLevel);
343        }
344        VobPrint ("$strGdbError\n",$intLevel);
345       
346        if( $strOutputFormat eq "xml" ) {
347        print "</message></database>";
348        }
349       
350        $strGdbError = "$strGdbError";
351        $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
352        #push(@Errors,$strGdbError);
353    }
354    else
355    {
356        if( $strOutputFormat eq "xml" ) {
357        print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
358        } else {
359        AlignPrint("Database Comparsion Result","Succeed",$intLevel);
360        }
361    }
362    }
363    else
364    {
365    my $strErrorColName;
366   
367    if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
368    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
369   
370    AlignPrint("Database Comparsion Result","Failed",$intLevel);
371   
372    $strGdbError = "Difference Report: No Database files found in $strErrorColName";
373    VobPrint ("$strGdbError\n",$intLevel);
374   
375    $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
376
377    }
378   
379    return $strGdbError;
380}
381
382sub FullTest
383{
384    my ($strModelCol,$strTestCol,$strColName) = @_;
385    my @Errors = ();
386    my $intLevel = 0;
387    my $intNumberDiffs = 0;
388   
389    # <Initial Test>
390    if( $strOutputFormat eq "xml" ) {
391        #print "<initial-test>";
392    } else {
393        VobPrint("Initial Testing Start\n",$intLevel);
394    }
395
396    @Errors = InitTest($strModelCol,$strTestCol,$strColName);
397    $intNumberDiffs = scalar(@Errors);
398
399    if( $strOutputFormat eq "xml" ) {
400        #print "</initial-test>";
401    } else {
402        VobPrint("Initial Testing End\n",$intLevel);
403        VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
404        VobPrint("\n",$intLevel);
405    }
406    # </Initial Test>
407
408    # <Detailed Test>
409    if( $strOutputFormat eq "xml" ) {
410        #print "<detailed-test>";
411    } else {
412        VobPrint("Detail Testing Start\n",$intLevel);
413    }
414    push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
415    $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
416
417    if( $strOutputFormat eq "xml" ) {
418        #print "</detailed-test>";
419    } else {
420        VobPrint("Detail Testing End\n",$intLevel);
421        VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
422    }
423    # </Detailed Test>
424
425    return @Errors;
426}
427#----##
428
429
430#--Other System Utilities
431sub PrintUsage
432{
433    my ($strProgName) = @_;
434    if ( $strOutputFormat eq "xml" ) {
435        print "<error>usage incorrect</error>\n";
436    } else {
437        print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
438    }
439    Help("Error: used incorrect parameters");
440}
441
442sub Help
443{
444    my ($strError) = @_;
445    my $aryptHelps =
446    [ { 'name' => "verbosity",
447        'type' => "scale",
448        'argu' => "a integer" ,
449        'descrip' => "this parameter setup the verbosity of the testing result"},
450      { 'name' => "mode",
451        'type' => "option",
452        'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
453        'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
454      { 'name' => "estop",
455        'type' => "flag",
456        'argu' => "NULL" ,
457        'descrip' => "Set then system will stop once it meet an error"},
458      { 'name' => "eshow",
459        'type' => "flag",
460        'argu' => "NULL" ,
461        'descrip' => "Set then system will show the error summary"}
462      ];
463   
464
465    if ( $strOutputFormat ne "xml" ) {
466        print "$strError\n";
467
468        foreach my $hashOneArg (@{$aryptHelps})
469        {
470            print "\n----------------------------\n";
471            print "Parameters: -".$hashOneArg->{"name"}."\n";
472            print "Type: ".$hashOneArg->{"type"}."\n";
473            print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
474            print "Description: ".$hashOneArg->{"descrip"}."\n";
475            print "----------------------------\n";
476        }
477    }
478}
479
480sub OutputStart
481{
482    my ($strColName) = @_;
483    my $intPadding = 17 - length($strColName);
484
485    if ( $strOutputFormat eq "xml" ) {
486        print "<diffcol>\n";
487    } else {
488        print "+---------------------------------------------------------+\n";
489        print "|                                                         |\n";
490        print "|              Start Testing Collection: $strColName"," " x $intPadding,"|\n";
491        print "|                                                         |\n";
492        print "+---------------------------------------------------------+\n\n";
493    }
494}
495
496sub OutputEnd
497{
498    my ($strColName,$aryptErrors) = @_;
499    my $intPadding = 12 - length($strColName);
500    if ( $strOutputFormat eq "xml" ) {
501        print "</diffcol>\n";
502    } else {
503        print "\n";
504        print "+---------------------------------------------------------+\n";
505        print "|                                                         |\n";
506        print "|            Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
507        print "|                                                         |\n";
508        print "+---------------------------------------------------------+\n\n";
509    }
510
511    my $intTotalErrors = scalar(@{$aryptErrors});
512    if ( $strOutputFormat ne "xml" ) {
513        print "Checking completed, there is $intTotalErrors error(s) found.\n";
514    }
515
516    if($gv_blnErrorShow ne "off")
517    {
518        foreach my $strEachError (@{$aryptErrors})
519        {
520            if ( $strOutputFormat eq "xml" ) {
521                print "<error>";
522                print $strEachError;
523                print "</error>\n";
524            } else {
525                print "+---------------------------------------------------------+\n";
526                print "|                        Error                            |\n";
527                print "+---------------------------------------------------------+\n\n";
528                print "$strEachError\n\n";
529            }
530        }
531    }
532    else
533    {
534        if ( $strOutputFormat ne "xml" ) {
535            print "Use -eshow to show the error detail\n\n";
536        }
537    }
538}
539
540sub AlignPrint
541{
542    my ($strMainString,$strStatus,$intLevel) = @_;
543    my $intDot = 100 - length($strMainString) - length($strStatus);
544    VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
545}
546
547
548# this function is only called on DocXMLFiles.
549# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
550# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
551sub isDocOrMETSXMLFileWindows
552{
553    my ($file_contents) = @_;
554   
555    #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
556   
557    # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
558    # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
559   
560    # for doc.xml:
561    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
562    if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
563        $gsdlsourcefilename = $2;
564        if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
565            return 1;
566        }
567    } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) {  # windows slashes detected in doc.xml in index/text/HASHxxx.dir
568        return 1;
569    }
570   
571    return 0;   
572}
573
574sub TestEach
575{
576    my ($strModel,$strTest,$intLevel,$strColName) = @_;
577    my @Errors = ();
578
579    $intLevel++;
580    if (-d $strModel && -d $strTest)
581    {   
582        my @aryInModel = &diffutil::files_in_dir($strModel);
583        my @aryInTest = &diffutil::files_in_dir($strTest);
584       
585        # Files to be skipped because they get generated on one OS but not the other
586        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
587        my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
588        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
589        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
590       
591        # Now check all remaining files in the folder exist in both model and test collections
592        my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
593        my @aryCorrectFiles = @{$aryTwoPointers[1]};
594        @Errors = @{$aryTwoPointers[0]};
595       
596        if(scalar(@Errors) == 0)
597        {
598            foreach my $strEachFile (@aryInModel)
599            {
600                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
601                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
602                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
603                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file #  || $strEachFile =~ m/\~$/g to get rid of ~ files
604                {
605                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
606                }
607                else
608                {
609                    if ( $strOutputFormat eq "xml" ) {
610                        print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
611                    } else {
612                        VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
613                    }
614                }
615            }
616        }
617        else
618        {
619            foreach my $strEachFile (@aryCorrectFiles)
620            {
621                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
622                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
623                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) #  || $strEachFile =~ m/\~$/g to get rid of ~ files
624                {
625                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
626                }
627            }
628        }
629        if($intLevel == $gv_intVerbosity)
630        {
631            if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
632            else {  AlignPrint("Contents Comparsion","Failed",$intLevel);}
633        }
634    }
635    else
636    {
637        # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
638        my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
639       
640        my $strResult;
641
642        # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
643        # in case these don't appear in the same order between collections, since
644        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
645        # when they can show up as unnecessary diff 'errors'
646
647        my ($model_contents, $test_contents);
648
649        # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
650        if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
651
652        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
653        sysread(FIN, $model_contents, -s FIN);
654        close(FIN);
655        open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
656        sysread(FIN, $test_contents, -s FIN);
657        close(FIN);
658
659        $model_contents =~ s/$ignore_line_re//g;
660        $test_contents =~ s/$ignore_line_re//g;
661
662
663        # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
664        # before comparing a windows test with a linux model or vice-versa
665        my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
666        my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
667       
668        if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
669       
670            my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
671            my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
672           
673            # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
674            $$win_contents =~ s@[\r]@@g;           
675       
676            # make all single windows slashes into single unix slashes
677            # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
678            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
679            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;             
680           
681            # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
682            #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
683           
684            #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
685            # $$win_contents =~ s@\r@\n@mg;
686           
687            if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
688                # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
689                # containing solitary carriage returns instead of linefeed
690                $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
691               
692               
693                # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
694                # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
695                # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
696                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
697                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
698               
699                # Advanced Beatles collection,
700                # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
701                # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
702                # Normalising to windows version for doing a diff
703                $$lin_contents =~ s@href=http:///@href=http://@g;               
704            }   
705        }
706       
707        # processing particular to doc.xml 
708        if($strModel =~ m/doc\.xml$/) {
709            # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
710            # these tmpdirs are located inside the collection directory
711            $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
712            $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
713           
714            # remove all absolute paths upto collect folder from <Metadata /> elements
715            $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
716            $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;       
717           
718            # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
719            # These tmpdirs are located inside the toplevel *greenstone* directory
720            (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
721            $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
722            my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
723           
724            if($test_contents =~ m@$tmpfile_regex@) {           
725                # found a match, replace the tmp file name with "random", keeping the original file extension
726                # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
727           
728                my ($old_tmp_filename, $ext) = ($1, $2);           
729                my $new_tmp_filename = "random";           
730               
731                ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
732                #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
733                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
734                if($5) {
735                    $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
736                } else { # OrigSource contains only the filename
737                    $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
738                }
739               
740                # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename           
741                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
742                if($5) {
743                    $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
744                } else { # OrigSource contains only the filename
745                    $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
746                }
747            }
748
749        } # finished special processing of doc.xml files
750       
751        my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory       
752#       &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
753#       &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
754#       if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
755#       &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
756#       &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
757#       }
758       
759
760       
761        # now can diff the normalised versions of the doc.xml/docmets.xml files:
762        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
763
764        } else {
765        $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
766        }
767
768        # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
769        # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
770        $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
771
772        #$strResult = GeneralOutput($strResult);
773        if ( $strOutputFormat eq "xml" ) {
774            #
775        } else {
776            VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
777        }
778        if ($strResult eq "")
779        {
780            if ( $strOutputFormat eq "xml" ) {
781                print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
782            } else {
783                AlignPrint("Comparing File","Succeed",$intLevel);
784            }
785        }
786        else
787        {
788#           print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
789
790            my $strOutput = "Difference Report:\n$strResult\n";
791            if ( $strOutputFormat eq "xml" ) {
792                print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
793            } else {
794                AlignPrint("Comparing File","Failed",$intLevel);
795            }
796
797            #$result=`file -b $strModel`; # linux specific test for binary file
798            $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
799            if ( "$result" =~  "data" ) {
800                     VobPrint( "These binary files differ", $intLevel );
801            } else {
802                     VobPrint ( "$strOutput" , $intLevel);
803            }
804
805
806            if ( $strOutputFormat eq "xml" ) {
807                print "</message></file-comparison>";
808            }
809
810            if($gv_blnErrorStop ne "off") { exit; }
811            push(@Errors,"File content comparison failed($strModel):\n$strOutput");
812        }
813    }
814
815    return @Errors;
816}
817
818
819sub FolderTesting
820{
821    my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
822    my %hashCount = ();
823    my @Errors = ();
824    my @CorrectFiles = ();
825    my @TwoPointers = (\@Errors,\@CorrectFiles);
826
827    if ( $strOutputFormat eq "xml" ) {
828        #print "<folder-comparison location=\"$strModelFolder\">\n";       
829    } else {
830        VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
831    }
832
833    foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
834    foreach my $strEachItem (@$aryptTest)
835    {
836        if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
837        else {$hashCount{$strEachItem} = 'T';}
838    }
839
840    if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
841    {
842        if ( $strOutputFormat eq "xml" ) {
843            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
844        } else {
845            AlignPrint("Folder Comparsion","Succeed",$intLevel);
846        }
847        return @TwoPointers;
848    }
849    else
850    {
851        if ( $strOutputFormat eq "xml" ) {
852            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
853        } else {
854            AlignPrint("Folder Comparsion","Failed",$intLevel);
855        }
856
857        foreach my $strEachItem (keys %hashCount)
858        {
859            if($hashCount{$strEachItem} ne 'B')
860            {
861                my $strOutput = "";
862                my $strReport = "";
863
864                if($hashCount{$strEachItem} eq 'M')
865                {
866                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
867                    $strReport = "Difference Report: difference found at $strTestFolder";
868                }
869                elsif($hashCount{$strEachItem} eq 'T')
870                {
871                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
872                    $strReport = "Difference Report: difference found at $strModelFolder";
873                }
874                else {die "Error occours in diffcol_mk2::TestingFolder\n"}
875
876                VobPrint ("$strOutput\n",$intLevel);
877                $strOutput = $strOutput."\n\t".$strReport."\n";
878                push(@Errors,$strOutput);
879            }
880            else {push(@CorrectFiles,$strEachItem);}
881        }
882        if( $strOutputFormat eq "xml" ) {
883            print "</message></folder-comparison>";
884        }
885
886        return @TwoPointers;
887    }
888}
889
890sub VobPrint
891{
892    my ($strOutput, $intLevel) = @_;
893    my $strTab = "";
894    my $intTab = int($intLevel/2);
895    if($intLevel <= $gv_intVerbosity)
896    {
897        if($intLevel >= 1)
898        {
899            $strTab = "\t"x($intTab+1);
900            $strOutput =~ s/\n$//;
901            $strOutput =~ s/\n/\n$strTab/g;
902            #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
903        }
904
905        if( $strOutputFormat eq "xml" ) {
906            $strOutput =~ s/&/&amp;/g;
907            $strOutput =~ s/</&amp;lt;/g;
908            $strOutput =~ s/>/&amp;gt;/g;
909        }
910
911      if ( length( $strOutput ) > 1000 ) {
912            $strOutput = substr( $strOutput, 0, 978);
913
914        # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
915        if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
916        if(defined $1 && $1) {
917            my $rest = $1;
918            if($rest =~ m/^a/) {
919                $strOutput =~ s@am?p?$@amp;@;
920            }
921            elsif($rest eq "g" || $rest eq "l") {
922            $strOutput .= "t;"; # close the known tag
923            }
924            elsif($rest eq "gt" || $rest eq "lt") {
925            $strOutput .= ";";
926            }           
927        } else { # & on its own
928            #$strOutput = substr( $strOutput, 0, 977); # lop off the &
929            $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
930            # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
931        }
932        }
933        $strOutput .= "... (output truncated)";
934      }
935
936
937        print $strTab.$strOutput."\n";
938    }
939}
940#----##
941
942
943#--Main System----------------------------
944#-----------------------------------------
945# Name: main
946# Perameters: arguments from command line
947# Pre-condition: testing will start by calling this main function.
948# Post-condition: output the test results for one or more collections.
949#-----------------------------------------
950sub main
951{
952    my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
953    my $strProgName = $0;
954    my $intArgc = scalar(@ARGV);
955   
956    #--System Arguments Setup
957    if (!parsargv::parse(\@ARGV,
958        'estop//off', \$strErrorStop,
959        'eshow//off', \$strErrorShow,
960        'verbosity/\d+/1', \$intVerbosity,
961        'mode/[\w\-]+/all', \$strMode,
962        'output/[\w\-]+/text', \$strOutputFormat,
963        'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
964        'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
965        )) {
966        PrintUsage($strProgName);
967        die "\n";
968    }
969
970    if ($intArgc<1) {
971        PrintUsage($strProgName);
972        die "\n";
973    }
974
975    $gv_test_os = $test_os; # if not specified, defaults to "compute"
976    $gv_model_os = $model_os; # tends to be linux
977
978    $gv_blnErrorStop = $strErrorStop;
979    $gv_blnErrorShow = $strErrorShow;
980    $gv_intVerbosity = $intVerbosity;
981    $gv_strMode = SetMode($strMode);
982
983    #----##
984
985# To find out what version of perl we're using
986    if( $strOutputFormat eq "xml" ) {
987        my $perloutput = `perl -v`;
988        $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
989        $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}";       
990        print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
991    }
992
993    #--Collection(s) Testing
994    foreach $strColName (@ARGV)
995    {
996        my @ErrorsInEachCol;
997        my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
998        my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
999
1000        #--Output(Start)
1001        OutputStart($strColName);
1002        #----##
1003       
1004        if(-e $strModelCol && -e $strTestCol )
1005        {
1006           
1007            #--Individual Testing
1008            if ($gv_strMode eq "Individual")
1009            {
1010                @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1011            }
1012            #----##
1013           
1014            #--Initial Testing
1015            elsif   ($gv_strMode eq "Initial")
1016            {
1017                @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1018            }
1019            #----##
1020           
1021            #--Full Testing
1022            elsif   ($gv_strMode eq "Full")
1023            {
1024                @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1025            }
1026            #----##
1027           
1028            #--Error Checking
1029            else
1030            {
1031                if ( $strOutputFormat eq "xml" ) {
1032                    die "<error>Error occoured in main function</error>\n";
1033                } else {
1034                    die "Error occoured in main function.\n";
1035                }
1036            }
1037            #----##
1038           
1039        }
1040        else
1041        {
1042            if( $strOutputFormat eq "xml" ) {
1043                die "<error>Cannot find collection: $strColName</error>\n";
1044            } else {
1045                die "Error: cannot find collection: $strColName\n";
1046            }
1047        }
1048        #----##
1049
1050        #--Output(Results and Errors)
1051        OutputEnd($strColName,\@ErrorsInEachCol);
1052        #----##
1053
1054    }
1055}
1056#----##
1057
1058&main();
Note: See TracBrowser for help on using the browser.