root/other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl2 @ 30652

Revision 30652, 35.4 KB (checked in by ak19, 4 years ago)

Committing outstanding files for diffcol supporting jdb for GS3 diffing. Not yet in use, but I want it on SVN and to not go missing.

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl -w
2
3#TODO: Individual Testing
4
5###########################################################################
6#
7# test.pl -- for testing is built collection is consistent with model collection
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 1999 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29
30package diffcol_mk2;
31
32BEGIN {
33    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37}
38
39use parsargv;
40use util;
41use FileUtils;
42use logdiff;
43use cfgdiff;
44use gdbdiff;
45use diffutil;
46use Text::Diff;
47use Cwd;
48
49#--Global Variables Declaration-----------
50my ($gv_test_os, $gv_model_os); # still just file globals
51
52if($ENV{'GSDL3SRCHOME'}) {
53    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/model-collect");
54    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites","localsite","/collect");
55} else {
56    $gv_strModelColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/model-collect");
57    $gv_strTestColRoot = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"/collect");
58}
59
60$gv_blnErrorStop = "false";
61$gv_blnErrorShow = "false";
62$gv_intVerbosity = 0;
63$gv_strMode = "Full";
64$strOutputFormat = "xml" unless defined $strOutputFormat; # global var with default
65
66%gv_IndivList = ("archives" => 0,
67        "etc" => 0,
68        "images" => 0,
69        "building" => 0,
70        "import" => 0,
71        "index" => 0,
72        "log" => 0,
73        "metadata" => 0,
74        "perllib" => 0,
75        "temp" => 0);
76#----##
77
78#--System Setup---------------------------
79sub SetMode
80{
81    my ($strModeList) = @_;
82    $strModeList =~ s/\|/ /g;
83    my @Modes = split(" ",$strModeList);
84
85    my $blnIndividual = "true";
86    my $blnInitial = "false";
87    my $blnFull = "false";
88
89
90    foreach $strEachMode (@Modes)
91    {
92        if($strEachMode eq "all")
93        {
94            $blnFull = "true";
95            $blnIndividual = "false";
96        }
97        elsif($strEachMode eq "init")
98        {
99            $blnInitial = "true";
100            $blnIndividual = "false";
101        }
102        else
103        {
104            if(defined $gv_IndivList{$strEachMode})
105            {
106                $gv_IndivList{$strEachMode} = 1;
107            }
108            else
109            {
110                die Help("Error: used undefined mode");
111            }
112        }
113    }
114
115    if($blnFull eq "true") {return "Full";}
116    elsif($blnInitial eq "true") {return "Initial";}
117    elsif($blnIndividual eq "true") {return "Individual";}
118    else {die "Error occured in function SetMode!!\n";}
119}
120#----##
121
122#--System Process-------------------------
123sub IndivTest
124{
125    my ($strModelCol,$strTestCol,$strColName) = @_;
126    my @Errors = ();
127    my $intNumberOfErrors = 0;
128    foreach $strEachFolder (keys %gv_IndivList)
129    {
130        if($gv_IndivList{$strEachFolder} == 1)
131        {
132           
133            VobPrint("Start Comparing \"$strEachFolder\"\n",0);
134            my $strModelFolder = &FileUtils::filenameConcatenate($strModelCol,$strEachFolder);
135            my $strTestFolder = &FileUtils::filenameConcatenate($strTestCol,$strEachFolder);
136            $intNumberOfErrors = scalar(@Errors);
137            push(@Errors,TestEach($strModelFolder,$strTestFolder,0,$strColName));
138            $intNumberOfErrors = scalar(@Errors) - $intNumberOfErrors;
139            VobPrint("End Comparing \"$strEachFolder\"\n",0);
140            VobPrint("Difference Found: $intNumberOfErrors\n",0);
141            VobPrint ("\n",0);
142        }
143    }
144    return @Errors;
145}
146
147sub InitTest
148{
149
150    my ($strModelCol,$strTestCol,$strColName) = @_;
151    my $intLevel = 1;
152    my @Errors;
153   
154    # Testing Log files
155#   my $strModelLog = &FileUtils::filenameConcatenate($strModelCol,"log");
156#   my $strTestLog = &FileUtils::filenameConcatenate($strTestCol,"log");
157#   
158#   if(-e $strModelLog && -e $strTestLog)
159#   {
160#   my $strLogError = logdiff::test_log($strModelLog,$strTestLog);
161#   if($strLogError ne "")
162#   {
163#       AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
164#       VobPrint ("$strLogError\n",$intLevel);
165#
166#       $strLogError = "$strLogError";
167#       $strLogError = "Difference Found at Log Folder Testing\n".$strLogError."\n";
168#       push(@Errors,$strLogError);
169#   }
170#   else
171#   {
172#       AlignPrint("Log Folder Comparison Result","Succeed",$intLevel);
173#   }
174#   }
175#   else
176#   {
177#   my $strErrorColName;
178#   my $strLogError;
179#   
180#   if(!(-e $strModelLog)){ $strErrorColName = $strErrorColName."(Model Collection)";}
181#   if(!(-e $strTestLog)){ $strErrorColName = $strErrorColName."(Test Collection)";}
182#
183#   AlignPrint("Log Folder Comparison Result","Failed",$intLevel);
184#   $strLogError = "Difference Report: No Log Folder found in $strErrorColName";
185#   VobPrint ("$strLogError\n",$intLevel);
186#   $strLogError = "Difference Found at Log Folder Testing (Log folders are only created using GLI)\n".$strLogError."\n";
187#
188#   push(@Errors,$strLogError);
189#   }
190#   VobPrint ("\n",$intLevel);
191   
192    # Testing the build.cfg
193    my $strModelBcfg =  &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
194    my $strTestBcfg =  &FileUtils::filenameConcatenate($strTestCol,"index","build.cfg");
195
196    if(-e $strModelBcfg && -e $strTestBcfg)
197    {
198        my $strBcfgError = cfgdiff::test_cfg($strModelBcfg,$strTestBcfg,"build.cfg");
199        if($strBcfgError ne "")
200        {
201            if( $strOutputFormat eq "xml" ) {
202                print "<build-cfg succeeded=\"no\">\n<message>";
203            } else {
204                AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
205            }
206
207            VobPrint ("$strBcfgError",$intLevel);
208            if( $strOutputFormat eq "xml" ) {
209                print "</message></build-cfg>";
210            }
211
212            $strBcfgError = "$strBcfgError";
213            $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
214            push(@Errors,$strBcfgError);
215        }
216        else
217        {
218            if( $strOutputFormat eq "xml" ) {
219                print "<build-cfg succeeded=\"yes\"/>";
220            } else {
221                AlignPrint("Config File(build.cfg) Comparison Result","Succeed",$intLevel);
222            }
223        }
224    }
225    else
226    {
227        my $strErrorColName;
228        my $strBcfgError;
229       
230        if(!(-e $strModelBcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
231        if(!(-e $strTestBcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
232
233        AlignPrint("Config File(build.cfg) Comparison Result","Failed",$intLevel);
234        $strBcfgError = "Difference Report: No Config files found in $strErrorColName";
235        VobPrint ("$strBcfgError\n",$intLevel);
236        $strBcfgError = "Difference Found at Config File(build.cfg) Comparison\n".$strBcfgError."\n";
237
238        push(@Errors,$strBcfgError);
239    }
240    VobPrint ("\n",$intLevel);
241
242    # Testing the collect.cfg
243
244    my $strModelCcfg =  &FileUtils::filenameConcatenate($strModelCol,"etc","collect.cfg");
245    my $strTestCcfg =  &FileUtils::filenameConcatenate($strTestCol,"etc","collect.cfg");
246   
247    if(-e $strModelCcfg && -e $strTestCcfg)
248    {
249        my $strCcfgError = cfgdiff::test_cfg($strModelCcfg,$strTestCcfg,"collect.cfg");
250        if($strCcfgError ne "")
251        {
252            if( $strOutputFormat eq "xml" ) {
253                print "<collect-cfg succeeded=\"no\"><message>";
254            } else {
255                AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
256            }
257
258            VobPrint ("$strCcfgError",$intLevel);
259
260            if( $strOutputFormat eq "xml" ) {
261                print "</message></collect-cfg>";
262            }
263
264            $strCcfgError = "$strCcfgError";
265            $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
266            push(@Errors,$strCcfgError);
267        }
268        else
269        {
270            if( $strOutputFormat eq "xml" ) {
271                print "<collect-cfg succeeded=\"yes\"/>";
272            } else {
273                AlignPrint("Config File(collect.cfg) Comparison Result","Succeed",$intLevel);
274            }
275        }
276    }
277    else
278    {
279        my $strErrorColName;
280        my $strCcfgError;
281
282        if(!(-e $strModelCcfg)){ $strErrorColName = $strErrorColName."(Model Collection)";}
283        if(!(-e $strTestCcfg)){ $strErrorColName = $strErrorColName."(Test Collection)";}
284
285        AlignPrint("Config File(collect.cfg) Comparison Result","Failed",$intLevel);
286        $strCcfgError = "Difference Report: No Config files found in $strErrorColName";
287        VobPrint ("$strCcfgError\n",$intLevel);
288        $strCcfgError = "Difference Found at Config File(collect.cfg) Comparison\n".$strCcfgError."\n";
289   
290        push(@Errors,$strCcfgError);
291    }
292
293    VobPrint ("\n",$intLevel);
294   
295    # Testing databases
296
297    # index
298    my $dbext = ".gdb"; # assume we're working with gdbm
299    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName");
300    if(-f "$strModelGdb$dbext") {
301        $strModelGdb .= $dbext;
302    } else { # gdbm file does not exist, try for jdbm file
303        $gdbext = ".jdb";
304        $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName".$dbext);
305    }
306    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName".$dbext);
307    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
308    if($strGdbError) {
309        push(@Errors,$strGdbError);
310    }
311   
312    # archives
313    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc".$dbext);
314    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc".$dbext);
315    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
316    if($strGdbError) {
317        push(@Errors,$strGdbError);
318    }
319   
320    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src".$dbext);
321        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src".$dbext);
322        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
323        if($strGdbError) {
324            push(@Errors,$strGdbError);
325        }
326
327    VobPrint ("\n",$intLevel);
328
329    return @Errors;
330}
331
332
333# At present handles gdbm - need to expand to allow for jdbm and other db types
334sub GdbDiff
335{
336    my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol) = @_;
337
338    my $strGdbError = 0;
339
340    if(-e $strModelGdb && -e $strTestGdb)
341    {
342    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
343    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol);
344    if($strGdbError ne "")
345    {
346        if( $strOutputFormat eq "xml" ) {
347        print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
348        } else {
349        AlignPrint("Database Comparsion Result","Failed",$intLevel);
350        }
351        VobPrint ("$strGdbError\n",$intLevel);
352       
353        if( $strOutputFormat eq "xml" ) {
354        print "</message></database>";
355        }
356       
357        $strGdbError = "$strGdbError";
358        $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
359        #push(@Errors,$strGdbError);
360    }
361    else
362    {
363        if( $strOutputFormat eq "xml" ) {
364        print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
365        } else {
366        AlignPrint("Database Comparsion Result","Succeed",$intLevel);
367        }
368    }
369    }
370    else
371    {
372    my $strErrorColName;
373   
374    if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
375    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
376   
377    AlignPrint("Database Comparsion Result","Failed",$intLevel);
378   
379    $strGdbError = "Difference Report: No Database files found in $strErrorColName";
380    VobPrint ("$strGdbError\n",$intLevel);
381   
382    $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
383
384    }
385   
386    return $strGdbError;
387}
388
389sub FullTest
390{
391    my ($strModelCol,$strTestCol,$strColName) = @_;
392    my @Errors = ();
393    my $intLevel = 0;
394    my $intNumberDiffs = 0;
395   
396    # <Initial Test>
397    if( $strOutputFormat eq "xml" ) {
398        #print "<initial-test>";
399    } else {
400        VobPrint("Initial Testing Start\n",$intLevel);
401    }
402
403    @Errors = InitTest($strModelCol,$strTestCol,$strColName);
404    $intNumberDiffs = scalar(@Errors);
405
406    if( $strOutputFormat eq "xml" ) {
407        #print "</initial-test>";
408    } else {
409        VobPrint("Initial Testing End\n",$intLevel);
410        VobPrint("Difference Found in Initial Testing: $intNumberDiffs\n",$intLevel);
411        VobPrint("\n",$intLevel);
412    }
413    # </Initial Test>
414
415    # <Detailed Test>
416    if( $strOutputFormat eq "xml" ) {
417        #print "<detailed-test>";
418    } else {
419        VobPrint("Detail Testing Start\n",$intLevel);
420    }
421    push(@Errors,TestEach($strModelCol,$strTestCol,$intLevel,$strColName));
422    $intNumberDiffs = scalar(@Errors) - $intNumberDiffs;
423
424    if( $strOutputFormat eq "xml" ) {
425        #print "</detailed-test>";
426    } else {
427        VobPrint("Detail Testing End\n",$intLevel);
428        VobPrint("Difference Found in Detail Testing: $intNumberDiffs\n",$intLevel);
429    }
430    # </Detailed Test>
431
432    return @Errors;
433}
434#----##
435
436
437#--Other System Utilities
438sub PrintUsage
439{
440    my ($strProgName) = @_;
441    if ( $strOutputFormat eq "xml" ) {
442        print "<error>usage incorrect</error>\n";
443    } else {
444        print STDERR "Usage: $strProgName test-col [more-col] [-verbosity d] [-mode modes] [-eshow] [-estop]\n";
445    }
446    Help("Error: used incorrect parameters");
447}
448
449sub Help
450{
451    my ($strError) = @_;
452    my $aryptHelps =
453    [ { 'name' => "verbosity",
454        'type' => "scale",
455        'argu' => "a integer" ,
456        'descrip' => "this parameter setup the verbosity of the testing result"},
457      { 'name' => "mode",
458        'type' => "option",
459        'argu' => "mode type \"[all|init|archives|building|etc|images|import|index|perllib|tmp]\" default to \"all\"" ,
460        'descrip' => "setup testing mode: all-full testing, init-initial testing (include configuration file test,database testing and log testing), others-for individual folder testing"},
461      { 'name' => "estop",
462        'type' => "flag",
463        'argu' => "NULL" ,
464        'descrip' => "Set then system will stop once it meet an error"},
465      { 'name' => "eshow",
466        'type' => "flag",
467        'argu' => "NULL" ,
468        'descrip' => "Set then system will show the error summary"}
469      ];
470   
471
472    if ( $strOutputFormat ne "xml" ) {
473        print "$strError\n";
474
475        foreach my $hashOneArg (@{$aryptHelps})
476        {
477            print "\n----------------------------\n";
478            print "Parameters: -".$hashOneArg->{"name"}."\n";
479            print "Type: ".$hashOneArg->{"type"}."\n";
480            print "Supply Argument: ".$hashOneArg->{"argu"}."\n";
481            print "Description: ".$hashOneArg->{"descrip"}."\n";
482            print "----------------------------\n";
483        }
484    }
485}
486
487sub OutputStart
488{
489    my ($strColName) = @_;
490    my $intPadding = 17 - length($strColName);
491
492    if ( $strOutputFormat eq "xml" ) {
493        print "<diffcol>\n";
494    } else {
495        print "+---------------------------------------------------------+\n";
496        print "|                                                         |\n";
497        print "|              Start Testing Collection: $strColName"," " x $intPadding,"|\n";
498        print "|                                                         |\n";
499        print "+---------------------------------------------------------+\n\n";
500    }
501}
502
503sub OutputEnd
504{
505    my ($strColName,$aryptErrors) = @_;
506    my $intPadding = 12 - length($strColName);
507    if ( $strOutputFormat eq "xml" ) {
508        print "</diffcol>\n";
509    } else {
510        print "\n";
511        print "+---------------------------------------------------------+\n";
512        print "|                                                         |\n";
513        print "|            Result of Collection Testing: $strColName"," " x $intPadding,"|\n";
514        print "|                                                         |\n";
515        print "+---------------------------------------------------------+\n\n";
516    }
517
518    my $intTotalErrors = scalar(@{$aryptErrors});
519    if ( $strOutputFormat ne "xml" ) {
520        print "Checking completed, there is $intTotalErrors error(s) found.\n";
521    }
522
523    if($gv_blnErrorShow ne "off")
524    {
525        foreach my $strEachError (@{$aryptErrors})
526        {
527            if ( $strOutputFormat eq "xml" ) {
528                print "<error>";
529                print $strEachError;
530                print "</error>\n";
531            } else {
532                print "+---------------------------------------------------------+\n";
533                print "|                        Error                            |\n";
534                print "+---------------------------------------------------------+\n\n";
535                print "$strEachError\n\n";
536            }
537        }
538    }
539    else
540    {
541        if ( $strOutputFormat ne "xml" ) {
542            print "Use -eshow to show the error detail\n\n";
543        }
544    }
545}
546
547sub AlignPrint
548{
549    my ($strMainString,$strStatus,$intLevel) = @_;
550    my $intDot = 100 - length($strMainString) - length($strStatus);
551    VobPrint ($strMainString."."x$intDot."$strStatus\n",$intLevel);
552}
553
554
555# this function is only called on DocXMLFiles.
556# so far, only doc.xml files need special Windows processing (db files' OS-sensitivity are handled in gdbdiff.pm)
557# Returns true if the doc.xml contains windows style slashes in the gsdlsourcefilename meta field
558sub isDocOrMETSXMLFileWindows
559{
560    my ($file_contents) = @_;
561   
562    #return ($file_contents =~ m/\\/) ? 1 : 0; # windows slashes detected.
563   
564    # Is this a better test? look for gsdlsourcefilename, see if it contains windows slashes.
565    # what if $gsdlsourcefilename is not guaranteed to exist in all doc.xml files?
566   
567    # for doc.xml:
568    #     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
569    if($file_contents =~ m@<(.*?:)?Metadata name="gsdlsourcefilename">([^>]*)</(.*?:)?Metadata>@m) {
570        $gsdlsourcefilename = $2;
571        if($gsdlsourcefilename =~ m/\\/) { # windows slashes detected.
572            return 1;
573        }
574    } elsif($file_contents =~ m@<Doc (.*)? file="(.*)?\\doc.xml" ([^>]*)?>@) {  # windows slashes detected in doc.xml in index/text/HASHxxx.dir
575        return 1;
576    }
577   
578    return 0;   
579}
580
581sub TestEach
582{
583    my ($strModel,$strTest,$intLevel,$strColName) = @_;
584    my @Errors = ();
585
586    $intLevel++;
587    if (-d $strModel && -d $strTest)
588    {   
589        my @aryInModel = &diffutil::files_in_dir($strModel);
590        my @aryInTest = &diffutil::files_in_dir($strTest);
591       
592        # Files to be skipped because they get generated on one OS but not the other
593        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
594        my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail.log$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
595        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
596        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
597       
598        # Now check all remaining files in the folder exist in both model and test collections
599        my @aryTwoPointers = FolderTesting(\@aryInModel,\@aryInTest,$strModel,$strTest,$intLevel);
600        my @aryCorrectFiles = @{$aryTwoPointers[1]};
601        @Errors = @{$aryTwoPointers[0]};
602       
603        if(scalar(@Errors) == 0)
604        {
605            foreach my $strEachFile (@aryInModel)
606            {
607                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
608                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
609                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
610                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file #  || $strEachFile =~ m/\~$/g to get rid of ~ files
611                {
612                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
613                }
614                else
615                {
616                    if ( $strOutputFormat eq "xml" ) {
617                        print "<file-comparision location=\"$strEachFile\" blocked=\"yes\" succeeded=\"yes\"/>";
618                    } else {
619                        VobPrint ("Blocked File Report: Test \"$strEachFile\" by using -mode \"init\"\n",$intLevel);
620                    }
621                }
622            }
623        }
624        else
625        {
626            foreach my $strEachFile (@aryCorrectFiles)
627            {
628                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
629                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
630                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/collect\.bak$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) #  || $strEachFile =~ m/\~$/g to get rid of ~ files
631                {
632                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName));
633                }
634            }
635        }
636        if($intLevel == $gv_intVerbosity)
637        {
638            if(scalar(@Errors) == 0){ AlignPrint("Contents Comparsion","Succeed",$intLevel);}
639            else {  AlignPrint("Contents Comparsion","Failed",$intLevel);}
640        }
641    }
642    else
643    {
644        # allow for a namespace prefix to <Metadata> as happens in GreenstoneMETS docmets.xml files, e.g. <gsdl3:Metadata></gsdl3:Metadata>
645        my $ignore_line_re = "<(.*?:)?Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate|ex.File.FileModifyDate|ex.File.FilePermissions|ImageSize|FileSize|ex.Composite.LightValue)\">.*</(.*?:)?Metadata>\\s*\\n*";
646       
647        my $strResult;
648
649        # for doc.xml and docmets.xml files, need to ignore many date fields. Filter these out before diffing,
650        # in case these don't appear in the same order between collections, since
651        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
652        # when they can show up as unnecessary diff 'errors'
653
654        my ($model_contents, $test_contents);
655
656        # archives/doc.xml files, archives/docmets.xml files and index/text/doc.xml files
657        if($strModel =~ m/doc(mets)?\.xml$/ || ($strModel =~ m@index[\\/]text@ && $strModel =~ m/doc\.xml$/)) {
658
659        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
660        sysread(FIN, $model_contents, -s FIN);
661        close(FIN);
662        open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
663        sysread(FIN, $test_contents, -s FIN);
664        close(FIN);
665
666        $model_contents =~ s/$ignore_line_re//g;
667        $test_contents =~ s/$ignore_line_re//g;
668
669
670        # equalise/normalise the two doc.xml/docmets.xml files for OS differences, if there are any
671        # before comparing a windows test with a linux model or vice-versa
672        my $testIsWin = ($gv_test_os ne "compute") ? ($gv_test_os eq "windows") : &isDocOrMETSXMLFileWindows($test_contents);
673        my $modelIsWin = ($gv_model_os ne "compute") ? ($gv_model_os eq "windows") : &isDocOrMETSXMLFileWindows($model_contents);
674       
675        if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant
676       
677            my $win_contents = $testIsWin ? \$test_contents : \$model_contents;
678            my $lin_contents = $testIsWin ? \$model_contents : \$test_contents;
679           
680            # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
681            $$win_contents =~ s@[\r]@@g;           
682       
683            # make all single windows slashes into single unix slashes
684            # the 1 char look-ahead requires a double pass, otherwise import\3\3.pdf will get replaced with import/3\3.pdf
685            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
686            $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;             
687           
688            # make windows \r newlines into constant \n newlines. Already handled when \r got replaced
689            #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file
690           
691            #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?)
692            # $$win_contents =~ s@\r@\n@mg;
693           
694            if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
695                # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
696                # containing solitary carriage returns instead of linefeed
697                $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
698               
699               
700                # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
701                # since we've converted *all* single backslashes in the windows doc.xml to / (whether it was meant as a windows path slash or not).
702                # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison
703                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
704                $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g;
705               
706                # Advanced Beatles collection,
707                # linux version contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http:///\\&quot;http://www.boskowan.com/ (extra / slash)
708                # while windows contains: IMG SRC=_httpextlink_&amp;amp;rl=1&amp;amp;href=http://\\&quot;http://www.boskowan.com/
709                # Normalising to windows version for doing a diff
710                $$lin_contents =~ s@href=http:///@href=http://@g;               
711            }   
712        }
713       
714        # processing particular to doc.xml 
715        if($strModel =~ m/doc\.xml$/) {
716            # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
717            # these tmpdirs are located inside the collection directory
718            $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
719            $test_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
720           
721            # remove all absolute paths upto collect folder from <Metadata /> elements
722            $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;
723            $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g;       
724           
725            # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
726            # These tmpdirs are located inside the toplevel *greenstone* directory
727            (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
728            $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
729            my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
730           
731            if($test_contents =~ m@$tmpfile_regex@) {           
732                # found a match, replace the tmp file name with "random", keeping the original file extension
733                # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
734           
735                my ($old_tmp_filename, $ext) = ($1, $2);           
736                my $new_tmp_filename = "random";           
737               
738                ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
739                #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
740                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
741                if($5) {
742                    $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
743                } else { # OrigSource contains only the filename
744                    $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
745                }
746               
747                # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename           
748                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
749                if($5) {
750                    $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
751                } else { # OrigSource contains only the filename
752                    $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
753                }
754            }
755
756        } # finished special processing of doc.xml files
757       
758        my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory       
759#       &gdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
760#       &gdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
761#       if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
762#       &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
763#       &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
764#       }
765       
766
767       
768        # now can diff the normalised versions of the doc.xml/docmets.xml files:
769        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
770
771        } else {
772        $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
773        }
774
775        # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
776        # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
777        $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
778
779        #$strResult = GeneralOutput($strResult);
780        if ( $strOutputFormat eq "xml" ) {
781            #
782        } else {
783            VobPrint ("Comparing Files:\n\"$strModel\"\n\"$strTest\"\n",$intLevel);
784        }
785        if ($strResult eq "")
786        {
787            if ( $strOutputFormat eq "xml" ) {
788                print "<file-comparison location=\"$strModel\" succeeded=\"yes\"/>\n";
789            } else {
790                AlignPrint("Comparing File","Succeed",$intLevel);
791            }
792        }
793        else
794        {
795#           print STDERR "**** Diff is: $strResult\n"; # print any differences to the screen
796
797            my $strOutput = "Difference Report:\n$strResult\n";
798            if ( $strOutputFormat eq "xml" ) {
799                print "<file-comparison location=\"$strModel\" succeeded=\"no\"><message>";
800            } else {
801                AlignPrint("Comparing File","Failed",$intLevel);
802            }
803
804            #$result=`file -b $strModel`; # linux specific test for binary file
805            $result = (-B $strModel) ? 1 : 0; # perl test for binary file, see http://perldoc.perl.org/functions/-X.html
806            if ( "$result" =~  "data" ) {
807                     VobPrint( "These binary files differ", $intLevel );
808            } else {
809                     VobPrint ( "$strOutput" , $intLevel);
810            }
811
812
813            if ( $strOutputFormat eq "xml" ) {
814                print "</message></file-comparison>";
815            }
816
817            if($gv_blnErrorStop ne "off") { exit; }
818            push(@Errors,"File content comparison failed($strModel):\n$strOutput");
819        }
820    }
821
822    return @Errors;
823}
824
825
826sub FolderTesting
827{
828    my ($aryptModel,$aryptTest,$strModelFolder,$strTestFolder,$intLevel) = @_;
829    my %hashCount = ();
830    my @Errors = ();
831    my @CorrectFiles = ();
832    my @TwoPointers = (\@Errors,\@CorrectFiles);
833
834    if ( $strOutputFormat eq "xml" ) {
835        #print "<folder-comparison location=\"$strModelFolder\">\n";       
836    } else {
837        VobPrint ("Comparing Folder contents at \"$strModelFolder\"\n",$intLevel);
838    }
839
840    foreach my $strEachItem (@$aryptModel) {$hashCount{$strEachItem} = 'M'}
841    foreach my $strEachItem (@$aryptTest)
842    {
843        if(defined $hashCount{$strEachItem} && $hashCount{$strEachItem} eq 'M') {$hashCount{$strEachItem} = 'B';}
844        else {$hashCount{$strEachItem} = 'T';}
845    }
846
847    if( scalar(@$aryptModel)==scalar(@$aryptTest) && scalar(@$aryptModel)==scalar(keys %hashCount) )
848    {
849        if ( $strOutputFormat eq "xml" ) {
850            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"yes\"/>\n";
851        } else {
852            AlignPrint("Folder Comparsion","Succeed",$intLevel);
853        }
854        return @TwoPointers;
855    }
856    else
857    {
858        if ( $strOutputFormat eq "xml" ) {
859            print "<folder-comparison location=\"$strModelFolder\" succeeded=\"no\"><message>\n";
860        } else {
861            AlignPrint("Folder Comparsion","Failed",$intLevel);
862        }
863
864        foreach my $strEachItem (keys %hashCount)
865        {
866            if($hashCount{$strEachItem} ne 'B')
867            {
868                my $strOutput = "";
869                my $strReport = "";
870
871                if($hashCount{$strEachItem} eq 'M')
872                {
873                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Test Collection";
874                    $strReport = "Difference Report: difference found at $strTestFolder";
875                }
876                elsif($hashCount{$strEachItem} eq 'T')
877                {
878                    $strOutput = "Difference Found at FolderTesting: \"$strEachItem\" is not found in the Model Collection";
879                    $strReport = "Difference Report: difference found at $strModelFolder";
880                }
881                else {die "Error occours in diffcol_mk2::TestingFolder\n"}
882
883                VobPrint ("$strOutput\n",$intLevel);
884                $strOutput = $strOutput."\n\t".$strReport."\n";
885                push(@Errors,$strOutput);
886            }
887            else {push(@CorrectFiles,$strEachItem);}
888        }
889        if( $strOutputFormat eq "xml" ) {
890            print "</message></folder-comparison>";
891        }
892
893        return @TwoPointers;
894    }
895}
896
897sub VobPrint
898{
899    my ($strOutput, $intLevel) = @_;
900    my $strTab = "";
901    my $intTab = int($intLevel/2);
902    if($intLevel <= $gv_intVerbosity)
903    {
904        if($intLevel >= 1)
905        {
906            $strTab = "\t"x($intTab+1);
907            $strOutput =~ s/\n$//;
908            $strOutput =~ s/\n/\n$strTab/g;
909            #$strTab =~ s/"\n"/"\n"."\t"x($intTab+1)/g;
910        }
911
912        if( $strOutputFormat eq "xml" ) {
913            $strOutput =~ s/&/&amp;/g;
914            $strOutput =~ s/</&lt;/g;
915            $strOutput =~ s/>/&gt;/g;
916        }
917
918      if ( length( $strOutput ) > 1000 ) {
919            $strOutput = substr( $strOutput, 0, 978);
920
921        # make sure there are no stray ampersands/partial ampersands that need to be completed as &lt; or &gt; or &amp;
922        if($strOutput =~ m/&(.{1,2})?$/ || $strOutput =~ m/&(am?p?)$/) { # &lt => &lt; or &g => &gt; or &a(m)=> &amp; or &amp => &amp;
923        if(defined $1 && $1) {
924            my $rest = $1;
925            if($rest =~ m/^a/) {
926                $strOutput =~ s@am?p?$@amp;@;
927            }
928            elsif($rest eq "g" || $rest eq "l") {
929            $strOutput .= "t;"; # close the known tag
930            }
931            elsif($rest eq "gt" || $rest eq "lt") {
932            $strOutput .= ";";
933            }           
934        } else { # & on its own
935            #$strOutput = substr( $strOutput, 0, 977); # lop off the &
936            $strOutput .= "gt;"; # 50% chance the closing tag is what was missing (else can make this &amp;)
937            # but even so, when the xslt is applied to report it doesn't break as long as & is not left dangling
938        }
939        }
940        $strOutput .= "... (output truncated)";
941      }
942
943
944        print $strTab.$strOutput."\n";
945    }
946}
947#----##
948
949
950#--Main System----------------------------
951#-----------------------------------------
952# Name: main
953# Perameters: arguments from command line
954# Pre-condition: testing will start by calling this main function.
955# Post-condition: output the test results for one or more collections.
956#-----------------------------------------
957sub main
958{
959    my ($intVerbosity,$strErrorStop,$strErrorShow,$strMode,$test_os,$model_os);
960    my $strProgName = $0;
961    my $intArgc = scalar(@ARGV);
962   
963    #--System Arguments Setup
964    if (!parsargv::parse(\@ARGV,
965        'estop//off', \$strErrorStop,
966        'eshow//off', \$strErrorShow,
967        'verbosity/\d+/1', \$intVerbosity,
968        'mode/[\w\-]+/all', \$strMode,
969        'output/[\w\-]+/text', \$strOutputFormat,
970        'testos/(windows|linux|darwin|compute)/compute', \$test_os, # param-name,regex,default
971        'modelos/(windows|linux|darwin|compute)/compute', \$model_os # actually defaults to linux in task.pl
972        )) {
973        PrintUsage($strProgName);
974        die "\n";
975    }
976
977    if ($intArgc<1) {
978        PrintUsage($strProgName);
979        die "\n";
980    }
981
982    $gv_test_os = $test_os; # if not specified, defaults to "compute"
983    $gv_model_os = $model_os; # tends to be linux
984
985    $gv_blnErrorStop = $strErrorStop;
986    $gv_blnErrorShow = $strErrorShow;
987    $gv_intVerbosity = $intVerbosity;
988    $gv_strMode = SetMode($strMode);
989
990    #----##
991
992    #--Collection(s) Testing
993    foreach $strColName (@ARGV)
994    {
995        my @ErrorsInEachCol;
996        my $strModelCol = &FileUtils::filenameConcatenate($gv_strModelColRoot,$strColName);
997        my $strTestCol = &FileUtils::filenameConcatenate($gv_strTestColRoot,$strColName);
998
999        #--Output(Start)
1000        OutputStart($strColName);
1001        #----##
1002       
1003        if(-e $strModelCol && -e $strTestCol )
1004        {
1005           
1006            #--Individual Testing
1007            if ($gv_strMode eq "Individual")
1008            {
1009                @ErrorsInEachCol = IndivTest($strModelCol,$strTestCol,$strColName);
1010            }
1011            #----##
1012           
1013            #--Initial Testing
1014            elsif   ($gv_strMode eq "Initial")
1015            {
1016                @ErrorsInEachCol = InitTest($strModelCol,$strTestCol,$strColName);
1017            }
1018            #----##
1019           
1020            #--Full Testing
1021            elsif   ($gv_strMode eq "Full")
1022            {
1023                @ErrorsInEachCol = FullTest($strModelCol,$strTestCol,$strColName);
1024            }
1025            #----##
1026           
1027            #--Error Checking
1028            else
1029            {
1030                if ( $strOutputFormat eq "xml" ) {
1031                    die "<error>Error occoured in main function</error>\n";
1032                } else {
1033                    die "Error occoured in main function.\n";
1034                }
1035            }
1036            #----##
1037           
1038        }
1039        else
1040        {
1041            if( $strOutputFormat eq "xml" ) {
1042                die "<error>Cannot find collection: $strColName</error>\n";
1043            } else {
1044                die "Error: cannot find collection: $strColName\n";
1045            }
1046        }
1047        #----##
1048
1049        #--Output(Results and Errors)
1050        OutputEnd($strColName,\@ErrorsInEachCol);
1051        #----##
1052
1053    }
1054}
1055#----##
1056
1057&main();
Note: See TracBrowser for help on using the browser.