root/other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl @ 29398

Revision 29398, 35.5 KB (checked in by ak19, 5 years ago)

1. There's always a deprecation warning on the switch statement used in task.pl. Replaced with plain if-else block. 2. The perl version found by diffcol is written out to the xml version of the report (not the html version). This may help in detecting new changes.

Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67        "etc" => 0,
68        "images" => 0,
69        "building" => 0,
70        "import" => 0,
71        "index" => 0,
72        "log" => 0,
73        "metadata" => 0,
74        "perllib" => 0,
75        "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81    my ($strModeList) = @_;
82    $strModeList =~ s/\|/ /g;
83    my @Modes = split(" ",$strModeList);
84
85    my $blnIndividual = "true";
86    my $blnInitial = "false";
87    my $blnFull = "false";
88
89
90    foreach $strEachMode (@Modes)
91    {
92        if($strEachMode eq "all")
93        {
94            $blnFull = "true";
95            $blnIndividual = "false";
96        }
97        elsif($strEachMode eq "init")
98        {
99            $blnInitial = "true";
100            $blnIndividual = "false";
101        }
102        else
103        {
104            if(defined $gv_IndivList{$strEachMode})
105            {
106                $gv_IndivList{$strEachMode} = 1;
107            }
108            else
109            {
110                die Help("Error: used undefined mode");
111            }
112        }
113    }
114
115    if($blnFull eq "true") {return "Full";}
116    elsif($blnInitial eq "true") {return "Initial";}
117    elsif($blnIndividual eq "true") {return "Individual";}
118    else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125    my ($strModelCol,$strTestCol,$strColName) = @_;
126    my @Errors = ();
127    my $intNumberOfErrors = 0;
128    foreach $strEachFolder (keys %gv_IndivList)
129    {
130        if($gv_IndivList{$strEachFolder} == 1)
131        {
132           
133            VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134            my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135            my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136            $intNumberOfErrors = scalar(@Errors);
137            push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138            $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139            VobPrint("End Comparing \"$strEachFolder\"\n",0);
140            VobPrint("Difference Found: $intNumberOfErrors\n",0);
141            VobPrint ("\n",0);
142        }
143    }
144    return @Errors;
145}
146
147sub InitTest
148{
149
150    my ($strModelCol,$strTestCol,$strColName) = @_;
151    my $intLevel = 1;
152    my @Errors;
153   
154    # Testing Log files
155#   my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156#   my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#   
158#   if(-e $strModelLog && -e $strTestLog)
159#   {
160#   my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161#   if($strLogError ne "")
162#   {
163#       AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164#       VobPrint ("$strLogError\n",$intLevel);
165#
166#       $strLogError = "$strLogError";
167#       $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168#       push(@Errors,$strLogError);
169#   }
170#   else
171#   {
172#       AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173#   }
174#   }
175#   else
176#   {
177#   my $strErrorColName;
178#   my $strLogError;
179#   
180#   if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181#   if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183#   AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184#   $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185#   VobPrint ("$strLogError\n",$intLevel);
186#   $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188#   push(@Errors,$strLogError);
189#   }
190#   VobPrint ("\n",$intLevel);
191   
192    # Testing the build.cfg
193    my $strModelBcfg =  &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194    my $strTestBcfg =  &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196    if(-e $strModelBcfg && -e $strTestBcfg)
197    {
198        my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199        if($strBcfgError ne "")
200        {
201            if( $strOutputFormat eq "xml" ) {
202                print "<build-cfg succeeded=\"no\">\n<message>";
203            } else {
204                AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205            }
206
207            VobPrint ("$strBcfgError",$intLevel);
208            if( $strOutputFormat eq "xml" ) {
209                print "</message></build-cfg>";
210            }
211
212            $strBcfgError = "$strBcfgError";
213            $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214            push(@Errors,$strBcfgError);
215        }
216        else
217        {
218            if( $strOutputFormat eq "xml" ) {
219                print "<build-cfg succeeded=\"yes\"/>";
220            } else {
221                AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222            }
223        }
224    }
225    else
226    {
227        my $strErrorColName;
228        my $strBcfgError;
229       
230        if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231        if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233        AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234        $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235        VobPrint ("$strBcfgError\n",$intLevel);
236        $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238        push(@Errors,$strBcfgError);
239    }
240    VobPrint ("\n",$intLevel);
241
242    # Testing the collect.cfg
243
244    my $strModelCcfg =  &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245    my $strTestCcfg =  &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246   
247    if(-e $strModelCcfg && -e $strTestCcfg)
248    {
249        my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250        if($strCcfgError ne "")
251        {
252            if( $strOutputFormat eq "xml" ) {
253                print "<collect-cfg succeeded=\"no\"><message>";
254            } else {
255                AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256            }
257
258            VobPrint ("$strCcfgError",$intLevel);
259
260            if( $strOutputFormat eq "xml" ) {
261                print "</message></collect-cfg>";
262            }
263
264            $strCcfgError = "$strCcfgError";
265            $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266            push(@Errors,$strCcfgError);
267        }
268        else
269        {
270            if( $strOutputFormat eq "xml" ) {
271                print "<collect-cfg succeeded=\"yes\"/>";
272            } else {
273                AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274            }
275        }
276    }
277    else
278    {
279        my $strErrorColName;
280        my $strCcfgError;
281
282        if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283        if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285        AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286        $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287        VobPrint ("$strCcfgError\n",$intLevel);
288        $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289   
290        push(@Errors,$strCcfgError);
291    }
292
293    VobPrint ("\n",$intLevel);
294   
295    # Testing databases
296
297    # index
298    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
299    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");   
300    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
301    if($strGdbError) {
302        push(@Errors,$strGdbError);
303    }
304   
305    # archives
306    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
307    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");   
308    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
309    if($strGdbError) {
310        push(@Errors,$strGdbError);
311    }
312   
313    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
314        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
315        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316        if($strGdbError) {
317            push(@Errors,$strGdbError);
318        }
319
320    VobPrint ("\n",$intLevel);
321
322    return @Errors;
323}
324
325
326# At present handles gdbm - need to expand to allow for jdbm and other db types
327sub GdbDiff
328{
329    my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
330
331    my $strGdbError = 0;
332
333    if(-e $strModelGdb && -e $strTestGdb)
334    {
335    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
336    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
337    if($strGdbError ne "")
338    {
339        if( $strOutputFormat eq "xml" ) {
340        print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
341        } else {
342        AlignPrint("Database Comparsion Result","Failed",$intLevel);
343        }
344        VobPrint ("$strGdbError\n",$intLevel);
345       
346        if( $strOutputFormat eq "xml" ) {
347        print "</message></database>";
348        }
349       
350        $strGdbError = "$strGdbError";
351        $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
352        #push(@Errors,$strGdbError);
353    }
354    else
355    {
356        if( $strOutputFormat eq "xml" ) {
357        print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
358        } else {
359        AlignPrint("Database Comparsion Result","Succeed",$intLevel);
360        }
361    }
362    }
363    else
364    {
365    my $strErrorColName;
366   
367    if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
368    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
369   
370    AlignPrint("Database Comparsion Result","Failed",$intLevel);
371   
372    $strGdbError = "Difference Report: No Database files found in $strErrorColName";
373    VobPrint ("$strGdbError\n",$intLevel);
374   
375    $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
376
377    }
378   
379    return $strGdbError;
380}
381
382sub FullTest
383{
384    my ($strModelCol,$strTestCol,$strColName) = @_;
385    my @Errors = ();
386    my $intLevel = 0;
387    my $intNumberDiffs = 0;
388   
389    # <Initial Test>
390    if( $strOutputFormat eq "xml" ) {
391        #print "<initial-test>";
392    } else {
393        VobPrint("Initial Testing Start\n",$intLevel);
394    }
395
396    @Errors = InitTest($strModelCol,$strTestCol,$strColName);
397    $intNumberDiffs = scalar(@Errors);
398
399    if( $strOutputFormat eq "xml" ) {
400        #print "</initial-test>";
401    } else {
402        VobPrint("Initial Testing End\n",$intLevel);
403        VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
404        VobPrint("\n",$intLevel);
405    }
406    # </Initial Test>
407
408    # <Detailed Test>
409    if( $strOutputFormat eq "xml" ) {
410        #print "<detailed-test>";
411    } else {
412        VobPrint("Detail Testing Start\n",$intLevel);
413    }
414    push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
415    $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
416
417    if( $strOutputFormat eq "xml" ) {
418        #print "</detailed-test>";
419    } else {
420        VobPrint("Detail Testing End\n",$intLevel);
421        VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
422    }
423    # </Detailed Test>
424
425    return @Errors;
426}
427#----##
428
429
430#--Other System Utilities
431sub PrintUsage
432{
433    my ($strProgName) = @_;
434    if ( $strOutputFormat eq "xml" ) {
435        print "<error>usage incorrect</error>\n";
436    } else {
437        print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
438    }
439    Help("Error: used incorrect parameters");
440}
441
442sub Help
443{
444    my ($strError) = @_;
445    my $aryptHelps =
446    [ { 'name' => "verbosity",
447        'type' => "scale",
448        'argu' => "a integer" ,
449        'descrip' => "this parameter setup the verbosity of the testing result"},
450      { 'name' => "mode",
451        'type' => "option",
452        'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
453        'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
454      { 'name' => "estop",
455        'type' => "flag",
456        'argu' => "NULL" ,
457        'descrip' => "Set then system will stop once it meet an error"},
458      { 'name' => "eshow",
459        'type' => "flag",
460        'argu' => "NULL" ,
461        'descrip' => "Set then system will show the error summary"}
462      ];
463   
464
465    if ( $strOutputFormat ne "xml" ) {
466        print "$strError\n";
467
468        foreach my $hashOneArg (@{$aryptHelps})
469        {
470            print "\n----------------------------\n";
471            print "Parameters: -".$hashOneArg->{"name"}."\n";
472            print "Type: ".$hashOneArg->{"type"}."\n";
473            print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
474            print "Description: ".$hashOneArg->{"descrip"}."\n";
475            print "----------------------------\n";
476        }
477    }
478}
479
480sub OutputStart
481{
482    my ($strColName) = @_;
483    my $intPadding = 17 - length($strColName);
484
485    if ( $strOutputFormat eq "xml" ) {
486        print "<diffcol>\n";
487    } else {
488        print "+---------------------------------------------------------+\n";
489        print "|                                                         |\n";
490        print "|              Start Testing Collection: $strColName"," " x $intPadding,"|\n";
491        print "|                                                         |\n";
492        print "+---------------------------------------------------------+\n\n";
493    }
494}
495
496sub OutputEnd
497{
498    my ($strColName,$aryptErrors) = @_;
499    my $intPadding = 12 - length($strColName);
500    if ( $strOutputFormat eq "xml" ) {
501        print "</diffcol>\n";
502    } else {
503        print "\n";
504        print "+---------------------------------------------------------+\n";
505        print "|                                                         |\n";
506        print "|            Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
507        print "|                                                         |\n";
508        print "+---------------------------------------------------------+\n\n";
509    }
510
511    my $intTotalErrors = scalar(@{$aryptErrors});
512    if ( $strOutputFormat ne "xml" ) {
513        print "Checking completed, there is $intTotalErrors error(s) found.\n";
514    }
515
516    if($gv_blnErrorShow ne "off")
517    {
518        foreach my $strEachError (@{$aryptErrors})
519        {
520            if ( $strOutputFormat eq "xml" ) {
521                print "<error>";
522                print $strEachError;
523                print "</error>\n";
524            } else {
525                print "+---------------------------------------------------------+\n";
526                print "|                        Error                            |\n";
527                print "+---------------------------------------------------------+\n\n";
528                print "$strEachError\n\n";
529            }
530        }
531    }
532    else
533    {
534        if ( $strOutputFormat ne "xml" ) {
535            print "Use -eshow to show the error detail\n\n";
536        }
537    }
538}
539
540sub AlignPrint
541{
542    my ($strMainString,$strStatus,$intLevel) = @_;
543    my $intDot = 100 - length($strMainString) - length($strStatus);
544    VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
545}
546
547
548# this function is only called on DocXMLFiles.
549# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
550# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
551sub isDocOrMETSXMLFileWindows
552{
553    my ($file_contents) = @_;
554   
555    #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
556   
557    # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
558    # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
559   
560    # for doc.xml:
561    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
562    if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
563        $gsdlsourcefilename = $2;
564        if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
565            return 1;
566        }
567    } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) {  # windows slashes detected in doc.xml in index/text/HASHxxx.dir
568        return 1;
569    }
570   
571    return 0;   
572}
573
574sub TestEach
575{
576    my ($strModel,$strTest,$intLevel,$strColName) = @_;
577    my @Errors = ();
578
579    $intLevel++;
580    if (-d $strModel && -d $strTest)
581    {   
582        my @aryInModel = &diffutil::files_in_dir($strModel);
583        my @aryInTest = &diffutil::files_in_dir($strTest);
584       
585        # Files to be skipped because they get generated on one OS but not the other
586        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
587        my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
588        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
589        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
590       
591        # Now check all remaining files in the folder exist in both model and test collections
592        my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
593        my @aryCorrectFiles = @{$aryTwoPointers[1]};
594        @Errors = @{$aryTwoPointers[0]};
595       
596        if(scalar(@Errors) == 0)
597        {
598            foreach my $strEachFile (@aryInModel)
599            {
600                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
601                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
602                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
603                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file #  || $strEachFile =~ m/\~$/g to get rid of ~ files
604                {
605                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
606                }
607                else
608                {
609                    if ( $strOutputFormat eq "xml" ) {
610                        print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
611                    } else {
612                        VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
613                    }
614                }
615            }
616        }
617        else
618        {
619            foreach my $strEachFile (@aryCorrectFiles)
620            {
621                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
622                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
623                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) #  || $strEachFile =~ m/\~$/g to get rid of ~ files
624                {
625                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
626                }
627            }
628        }
629        if($intLevel == $gv_intVerbosity)
630        {
631            if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
632            else {  AlignPrint("Contents Comparsion","Failed",$intLevel);}
633        }
634    }
635    else
636    {
637        # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
638        my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
639       
640        my $strResult;
641
642        # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
643        # in case these don't appear in the same order between collections, since
644        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
645        # when they can show up as unnecessary diff 'errors'
646
647        my ($model_contents, $test_contents);
648
649        # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
650        if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
651
652        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
653        sysread(FIN, $model_contents, -s FIN);
654        close(FIN);
655        open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
656        sysread(FIN, $test_contents, -s FIN);
657        close(FIN);
658
659        $model_contents =~ s/$ignore_line_re//g;
660        $test_contents =~ s/$ignore_line_re//g;
661
662
663        # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
664        # before comparing a windows test with a linux model or vice-versa
665        my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
666        my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
667       
668        if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
669       
670            my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
671            my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
672           
673            # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
674            $$win_contents =~ s@[\r]@@g;           
675       
676            # make all single windows slashes into single unix slashes
677            # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
678            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
679            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;             
680           
681            # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
682            #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
683           
684            #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
685            # $$win_contents =~ s@\r@\n@mg;
686           
687            if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
688                # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
689                # containing solitary carriage returns instead of linefeed
690                $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
691               
692               
693                # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
694                # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
695                # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
696                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
697                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
698               
699                # Advanced Beatles collection,
700                # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
701                # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
702                # Normalising to windows version for doing a diff
703                $$lin_contents =~ s@href=http:///@href=http://@g;               
704            }   
705        }
706       
707        # processing particular to doc.xml 
708        if($strModel =~ m/doc\.xml$/) {
709            # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
710            # these tmpdirs are located inside the collection directory
711            $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
712            $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
713           
714            # remove all absolute paths upto collect folder from <Metadata /> elements
715            $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
716            $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;       
717           
718            # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
719            # These tmpdirs are located inside the toplevel *greenstone* directory
720            (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
721            $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
722            my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
723           
724            if($test_contents =~ m@$tmpfile_regex@) {           
725                # found a match, replace the tmp file name with "random", keeping the original file extension
726                # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
727           
728                my ($old_tmp_filename, $ext) = ($1, $2);           
729                my $new_tmp_filename = "random";           
730               
731                ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
732                #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
733                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
734                if($5) {
735                    $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
736                } else { # OrigSource contains only the filename
737                    $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
738                }
739               
740                # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename           
741                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
742                if($5) {
743                    $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
744                } else { # OrigSource contains only the filename
745                    $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
746                }
747            }
748
749        } # finished special processing of doc.xml files
750       
751        my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory       
752#       &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
753#       &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
754#       if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
755#       &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
756#       &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
757#       }
758       
759
760       
761        # now can diff the normalised versions of the doc.xml/docmets.xml files:
762        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
763
764        } else {
765        $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
766        }
767
768        # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
769        # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
770        $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
771
772        #$strResult = GeneralOutput($strResult);
773        if ( $strOutputFormat eq "xml" ) {
774            #
775        } else {
776            VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
777        }
778        if ($strResult eq "")
779        {
780            if ( $strOutputFormat eq "xml" ) {
781                print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
782            } else {
783                AlignPrint("Comparing File","Succeed",$intLevel);
784            }
785        }
786        else
787        {
788#           print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
789
790            my $strOutput = "Difference Report:\n$strResult\n";
791            if ( $strOutputFormat eq "xml" ) {
792                print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
793            } else {
794                AlignPrint("Comparing File","Failed",$intLevel);
795            }
796
797            #$result=`file -b $strModel`; # linux specific test for binary file
798            $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
799            if ( "$result" =~  "data" ) {
800                     VobPrint( "These binary files differ", $intLevel );
801            } else {
802                     VobPrint ( "$strOutput" , $intLevel);
803            }
804
805
806            if ( $strOutputFormat eq "xml" ) {
807                print "</message></file-comparison>";
808            }
809
810            if($gv_blnErrorStop ne "off") { exit; }
811            push(@Errors,"File content comparison failed($strModel):\n$strOutput");
812        }
813    }
814
815    return @Errors;
816}
817
818
819sub FolderTesting
820{
821    my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
822    my %hashCount = ();
823    my @Errors = ();
824    my @CorrectFiles = ();
825    my @TwoPointers = (\@Errors,\@CorrectFiles);
826
827    if ( $strOutputFormat eq "xml" ) {
828        #print "<folder-comparison location=\"$strModelFolder\">\n";       
829    } else {
830        VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
831    }
832
833    foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
834    foreach my $strEachItem (@$aryptTest)
835    {
836        if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
837        else {$hashCount{$strEachItem} = 'T';}
838    }
839
840    if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
841    {
842        if ( $strOutputFormat eq "xml" ) {
843            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
844        } else {
845            AlignPrint("Folder Comparsion","Succeed",$intLevel);
846        }
847        return @TwoPointers;
848    }
849    else
850    {
851        if ( $strOutputFormat eq "xml" ) {
852            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
853        } else {
854            AlignPrint("Folder Comparsion","Failed",$intLevel);
855        }
856
857        foreach my $strEachItem (keys %hashCount)
858        {
859            if($hashCount{$strEachItem} ne 'B')
860            {
861                my $strOutput = "";
862                my $strReport = "";
863
864                if($hashCount{$strEachItem} eq 'M')
865                {
866                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
867                    $strReport = "Difference Report: difference found at $strTestFolder";
868                }
869                elsif($hashCount{$strEachItem} eq 'T')
870                {
871                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
872                    $strReport = "Difference Report: difference found at $strModelFolder";
873                }
874                else {die "Error occours in diffcol_mk2::TestingFolder\n"}
875
876                VobPrint ("$strOutput\n",$intLevel);
877                $strOutput = $strOutput."\n\t".$strReport."\n";
878                push(@Errors,$strOutput);
879            }
880            else {push(@CorrectFiles,$strEachItem);}
881        }
882        if( $strOutputFormat eq "xml" ) {
883            print "</message></folder-comparison>";
884        }
885
886        return @TwoPointers;
887    }
888}
889
890sub VobPrint
891{
892    my ($strOutput, $intLevel) = @_;
893    my $strTab = "";
894    my $intTab = int($intLevel/2);
895    if($intLevel <= $gv_intVerbosity)
896    {
897        if($intLevel >= 1)
898        {
899            $strTab = "\t"x($intTab+1);
900            $strOutput =~ s/\n$//;
901            $strOutput =~ s/\n/\n$strTab/g;
902            #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
903        }
904
905        if( $strOutputFormat eq "xml" ) {
906            $strOutput =~ s/&/&amp;/g;
907            $strOutput =~ s/</&lt;/g;
908            $strOutput =~ s/>/&gt;/g;
909        }
910
911      if ( length( $strOutput ) > 1000 ) {
912            $strOutput = substr( $strOutput, 0, 978);
913
914        # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
915        if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
916        if(defined $1 && $1) {
917            my $rest = $1;
918            if($rest =~ m/^a/) {
919                $strOutput =~ s@am?p?$@amp;@;
920            }
921            elsif($rest eq "g" || $rest eq "l") {
922            $strOutput .= "t;"; # close the known tag
923            }
924            elsif($rest eq "gt" || $rest eq "lt") {
925            $strOutput .= ";";
926            }           
927        } else { # & on its own
928            #$strOutput = substr( $strOutput, 0, 977); # lop off the &
929            $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
930            # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
931        }
932        }
933        $strOutput .= "... (output truncated)";
934      }
935
936
937        print $strTab.$strOutput."\n";
938    }
939}
940#----##
941
942
943#--Main System----------------------------
944#-----------------------------------------
945# Name: main
946# Perameters: arguments from command line
947# Pre-condition: testing will start by calling this main function.
948# Post-condition: output the test results for one or more collections.
949#-----------------------------------------
950sub main
951{
952    my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
953    my $strProgName = $0;
954    my $intArgc = scalar(@ARGV);
955   
956    #--System Arguments Setup
957    if (!parsargv::parse(\@ARGV,
958        'estop//off', \$strErrorStop,
959        'eshow//off', \$strErrorShow,
960        'verbosity/\d+/1', \$intVerbosity,
961        'mode/[\w\-]+/all', \$strMode,
962        'output/[\w\-]+/text', \$strOutputFormat,
963        'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
964        'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
965        )) {
966        PrintUsage($strProgName);
967        die "\n";
968    }
969
970    if ($intArgc<1) {
971        PrintUsage($strProgName);
972        die "\n";
973    }
974
975    $gv_test_os = $test_os; # if not specified, defaults to "compute"
976    $gv_model_os = $model_os; # tends to be linux
977
978    $gv_blnErrorStop = $strErrorStop;
979    $gv_blnErrorShow = $strErrorShow;
980    $gv_intVerbosity = $intVerbosity;
981    $gv_strMode = SetMode($strMode);
982
983    #----##
984
985# To find out what version of perl we're using
986    if( $strOutputFormat eq "xml" ) {
987        my $perloutput = `perl -v`;
988        $perloutput =~ s@.*\((v\d+(\.\d+)*)\).*@$1@s;
989        $ENV{'PATH'}="$ENV{'PERLPATH'}:$ENV{'PATH'}";       
990        print "<perl-version>Perl version: $perloutput</perl-version>\n"; # die "<error>Perl version: $perloutput</error>\n";
991    }
992
993    #--Collection(s) Testing
994    foreach $strColName (@ARGV)
995    {
996        my @ErrorsInEachCol;
997        my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
998        my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
999
1000        #--Output(Start)
1001        OutputStart($strColName);
1002        #----##
1003       
1004        if(-e $strModelCol && -e $strTestCol )
1005        {
1006           
1007            #--Individual Testing
1008            if ($gv_strMode eq "Individual")
1009            {
1010                @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1011            }
1012            #----##
1013           
1014            #--Initial Testing
1015            elsif   ($gv_strMode eq "Initial")
1016            {
1017                @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1018            }
1019            #----##
1020           
1021            #--Full Testing
1022            elsif   ($gv_strMode eq "Full")
1023            {
1024                @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1025            }
1026            #----##
1027           
1028            #--Error Checking
1029            else
1030            {
1031                if ( $strOutputFormat eq "xml" ) {
1032                    die "<error>Error occoured in main function</error>\n";
1033                } else {
1034                    die "Error occoured in main function.\n";
1035                }
1036            }
1037            #----##
1038           
1039        }
1040        else
1041        {
1042            if( $strOutputFormat eq "xml" ) {
1043                die "<error>Cannot find collection: $strColName</error>\n";
1044            } else {
1045                die "Error: cannot find collection: $strColName\n";
1046            }
1047        }
1048        #----##
1049
1050        #--Output(Results and Errors)
1051        OutputEnd($strColName,\@ErrorsInEachCol);
1052        #----##
1053
1054    }
1055}
1056#----##
1057
1058&main();
Note: See TracBrowser for help on using the browser.