Ignore:
Timestamp:
2013-06-12T18:43:04+12:00 (11 years ago)
Author:
ak19
Message:

Fixing up diffcol process so it works better. Current state finds no errors in Small-HTML model-collection. 1. Better handling of gdb database (and ignores .idh) by filtering out fields that are expected to differ such as date before doing the diff. Handles archiveinf-doc.gdb and -src.gdb files and with the sort flag Dr Bainbridge added to db2text and the sorting of keys in perllib/dbutil/gdbmtxtgz, the ordering of keys in the database is no longer affecting the outcome. 2. Better handling of doc.xml files. Once more date fields that will differ are filtered out before performing the diff. EarliestDatestamp file is ignored. 3. The task script now ensures that model-collect is up to date with the svn version when about to perform the diff col testing.

Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27579 r27604  
    285285    VobPrint ("\n",$intLevel);
    286286   
    287     # Testing database
    288 
    289     my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.ldb");
    290     my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.ldb");
    291    
    292     if(-e $strModelGdb && -e $strTestGdb)
    293     {
    294         my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
    295         if($strGdbError ne "")
    296         {
    297             if( $strOutputFormat eq "xml" ) {
    298                 print "<database succeeded=\"no\"><message>";
    299             } else {
    300                 AlignPrint("Database Comparsion Result","Failed",$intLevel);
    301             }
    302             VobPrint ("$strGdbError\n",$intLevel);
    303    
    304             if( $strOutputFormat eq "xml" ) {
    305                 print "</message></database>";
    306             }
    307 
    308             $strGdbError = "$strGdbError";
    309             $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
    310             push(@Errors,$strGdbError);
    311         }
    312         else
    313         {
    314             if( $strOutputFormat eq "xml" ) {
    315                 print "<database succeeded=\"yes\"/>";
    316             } else {
    317                 AlignPrint("Database Comparsion Result","Succeed",$intLevel);
    318             }
    319         }
    320     }
    321     else
    322     {   
    323         my $strErrorColName;
    324         my $strGdbError;
    325        
    326         if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
    327         if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
    328    
     287    # Testing databases
     288
     289    # index
     290    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
     291    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");   
     292    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); # returns 0 if no error
     293    if($strGdbError) {
     294        push(@Errors,$strGdbError);
     295    }
     296   
     297    # archives
     298    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
     299    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");   
     300    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
     301    if($strGdbError) {
     302        push(@Errors,$strGdbError);
     303    }
     304   
     305    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
     306        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
     307        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName);
     308        if($strGdbError) {
     309            push(@Errors,$strGdbError);
     310        }
     311
     312    VobPrint ("\n",$intLevel);
     313
     314    return @Errors;
     315}
     316
     317
     318# At present handles gdbm - need to expand to allow for jdbm and other db types
     319sub GdbDiff
     320{
     321    my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName) = @_;
     322
     323    my $strGdbError = 0;
     324
     325    if(-e $strModelGdb && -e $strTestGdb)
     326    {
     327    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
     328    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb,$strColName);
     329    if($strGdbError ne "")
     330    {
     331        if( $strOutputFormat eq "xml" ) {
     332        print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>";
     333        } else {
    329334        AlignPrint("Database Comparsion Result","Failed",$intLevel);
    330        
    331         $strGdbError = "Difference Report: No Database files found in $strErrorColName";
    332         VobPrint ("$strGdbError\n",$intLevel);
    333    
    334         $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
    335    
    336         push(@Errors,$strGdbError);
    337     }
    338 
    339     VobPrint ("\n",$intLevel);
    340 
    341     return @Errors;
     335        }
     336        VobPrint ("$strGdbError\n",$intLevel);
     337       
     338        if( $strOutputFormat eq "xml" ) {
     339        print "</message></database>";
     340        }
     341       
     342        $strGdbError = "$strGdbError";
     343        $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n";
     344        #push(@Errors,$strGdbError);
     345    }
     346    else
     347    {
     348        if( $strOutputFormat eq "xml" ) {
     349        print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>";
     350        } else {
     351        AlignPrint("Database Comparsion Result","Succeed",$intLevel);
     352        }
     353    }
     354    }
     355    else
     356    {
     357    my $strErrorColName;
     358   
     359    if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";}
     360    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
     361   
     362    AlignPrint("Database Comparsion Result","Failed",$intLevel);
     363   
     364    $strGdbError = "Difference Report: No Database files found in $strErrorColName";
     365    VobPrint ("$strGdbError\n",$intLevel);
     366   
     367    $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n";
     368
     369    }
     370   
     371    return $strGdbError;
    342372}
    343373
     
    527557                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
    528558                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
    529                 if(!($strEachFile eq "log" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.(l|b)db$/g))
     559                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file
     560                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g))
    530561                {
    531562                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel));
     
    547578                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile);
    548579                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile);
    549                 if(!($strEachFile eq "log" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.(l|b)db$/g))
     580                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g))
    550581                {
    551582                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel));
     
    561592    else
    562593    {
    563         my $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
    564         $strResult = &diffutil::GenerateOutput($strResult,"^<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\$");
     594        my $ignore_line_re = "<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\\n*";
     595        my $strResult;
     596
     597        # for doc.xml files, need to ignore many date fields. Filter these out before diffing,
     598        # in case these don't appear in the same order between collections, since
     599        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done
     600        # when they can show up as unnecessary diff 'errors'
     601        if($strModel =~ m/doc\.xml$/) {
     602        my ($model_contents, $test_contents);
     603        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n";
     604        sysread(FIN, $model_contents, -s FIN);
     605        close(FIN);
     606        open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n";
     607        sysread(FIN, $test_contents, -s FIN);
     608        close(FIN);
     609
     610        $model_contents =~ s/$ignore_line_re//g;
     611        $test_contents =~ s/$ignore_line_re//g;
     612       
     613        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
     614
     615        } else {
     616        $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" };
     617        }
     618
     619        # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter
     620        # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs
     621        $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$");
     622
    565623        #$strResult = GeneralOutput($strResult);
    566624        if ( $strOutputFormat eq "xml" ) {
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffutil.pm

    r21711 r27604  
    4747    else
    4848    {
    49     if($strModel =~ m/$strIgnoreExp/ && $strTest =~ m/$strIgnoreExp/)
     49    if($strModel =~ m/$strIgnoreExp/ && $strTest =~ m/$strIgnoreExp/) #if($strModel =~ m/$strIgnoreExp/mg && $strTest =~ m/$strIgnoreExp/mg)
    5050    {
    5151        return 1;
     
    6464    my $intCounter = 0;
    6565    my $hashptErrorLines;
    66    
     66
    6767    while(my $strFirstLine = shift(@aryErrors))
    6868    {
     
    106106        if($intModelLineCount == 1 && $intTestLineCount == 1 &&
    107107           &IgnoreExp($aryModelErrors[0],$aryModelErrors[0],$strIgnoreExp) == 1)
     108           #&IgnoreExp($aryModelErrors[0],$aryTestErrors[0],$strIgnoreExp) == 1) # 2nd param should be aryTestErrors?
    108109        {
    109110            $strAError = "";
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27579 r27604  
    3232sub test_gdb
    3333{
    34     my ($full_modeldb, $full_testdb) = @_;
     34    my ($full_modeldb, $full_testdb,$strColName) = @_;
    3535
    3636
    3737   # print "Now is testing database\n";
    3838   
    39     my $model_cmd = "db2txt $full_modeldb 2>&1";
    40     my $test_cmd  = "db2txt $full_testdb 2>&1";
     39    # need to sort text output of both test and model col database files, to normalise them for the comparison
     40    # the -sort option to db2txt was added specifically to support diffcol
     41    my $model_cmd = "db2txt -sort $full_modeldb 2>&1";
     42    my $test_cmd  = "db2txt -sort $full_testdb 2>&1";
    4143
    4244    my $model_text = readin_gdb($model_cmd);
    4345    my $test_text = readin_gdb($test_cmd);
     46
     47
     48    # filter out the fields that can be ignored in the two database files
     49    my $ignore_line_re = "\n<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>([^\n])*";
     50    $model_text =~ s/$ignore_line_re//g;
     51    $test_text =~ s/$ignore_line_re//g;
     52
     53
     54    # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)
     55
     56    # Remember the original model col on SVN could have been built anywhere,
     57    # and in the gdb files, absolute paths are stored to the collection location.
     58    # Crop these paths to the collect/<colname> point.
     59   
     60    # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
     61    # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
     62    # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
     63    # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm
     64
     65    $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
     66    $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
     67
     68
    4469    my $report_type = "OldStyle"; # Can not change this type.
    4570    my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type };
    4671   
     72    # leaving the ignore regex as it used to be in the following, in case it helps with single line comparisons
    4773    $diff_gdb = &diffutil::GenerateOutput($diff_gdb,"^<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>.*");
    4874
Note: See TracChangeset for help on using the changeset viewer.