Changeset 27604

Show
Ignore:
Timestamp:
12.06.2013 18:43:04 (6 years ago)
Author:
ak19
Message:

Fixing up diffcol process so it works better. Current state finds no errors in Small-HTML model-collection. 1. Better handling of gdb database (and ignores .idh) by filtering out fields that are expected to differ such as date before doing the diff. Handles archiveinf-doc.gdb and -src.gdb files and with the sort flag Dr Bainbridge added to db2text and the sorting of keys in perllib/dbutil/gdbmtxtgz, the ordering of keys in the database is no longer affecting the outcome. 2. Better handling of doc.xml files. Once more date fields that will differ are filtered out before performing the diff. EarliestDatestamp? file is ignored. 3. The task script now ensures that model-collect is up to date with the svn version when about to perform the diff col testing.

Location:
other-projects/nightly-tasks/diffcol/trunk
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27579 r27604  
    285285    VobPrint ("\n",$intLevel); 
    286286     
    287     # Testing database 
    288  
    289     my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.ldb"); 
    290     my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.ldb"); 
    291      
    292     if(-e $strModelGdb && -e $strTestGdb) 
    293     { 
    294         my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb); 
    295         if($strGdbError ne "") 
    296         { 
    297             if( $strOutputFormat eq "xml" ) { 
    298                 print "<database succeeded=\"no\"><message>"; 
    299             } else { 
    300                 AlignPrint("Database Comparsion Result","Failed",$intLevel); 
    301             } 
    302             VobPrint ("$strGdbError\n",$intLevel); 
    303      
    304             if( $strOutputFormat eq "xml" ) { 
    305                 print "</message></database>"; 
    306             } 
    307  
    308             $strGdbError = "$strGdbError";  
    309             $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n"; 
    310             push(@Errors,$strGdbError); 
    311         } 
    312         else 
    313         { 
    314             if( $strOutputFormat eq "xml" ) { 
    315                 print "<database succeeded=\"yes\"/>"; 
    316             } else { 
    317                 AlignPrint("Database Comparsion Result","Succeed",$intLevel); 
    318             } 
    319         } 
    320     } 
    321     else  
    322     {    
    323         my $strErrorColName; 
    324         my $strGdbError; 
    325          
    326         if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";} 
    327         if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";} 
    328      
     287    # Testing databases 
     288 
     289    # index 
     290    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb"); 
     291    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");     
     292    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); # returns 0 if no error 
     293    if($strGdbError) { 
     294        push(@Errors,$strGdbError); 
     295    } 
     296     
     297    # archives 
     298    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb"); 
     299    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");     
     300    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); 
     301    if($strGdbError) { 
     302        push(@Errors,$strGdbError); 
     303    } 
     304     
     305    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb"); 
     306        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb"); 
     307        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName); 
     308        if($strGdbError) { 
     309            push(@Errors,$strGdbError); 
     310        } 
     311 
     312    VobPrint ("\n",$intLevel); 
     313 
     314    return @Errors; 
     315} 
     316 
     317 
     318# At present handles gdbm - need to expand to allow for jdbm and other db types 
     319sub GdbDiff 
     320{ 
     321    my ($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName) = @_; 
     322 
     323    my $strGdbError = 0; 
     324 
     325    if(-e $strModelGdb && -e $strTestGdb) 
     326    { 
     327    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb); 
     328    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb,$strColName); 
     329    if($strGdbError ne "") 
     330    { 
     331        if( $strOutputFormat eq "xml" ) { 
     332        print "<database succeeded=\"no\" location=\"$strModelGdb\"><message>"; 
     333        } else { 
    329334        AlignPrint("Database Comparsion Result","Failed",$intLevel); 
    330          
    331         $strGdbError = "Difference Report: No Database files found in $strErrorColName"; 
    332         VobPrint ("$strGdbError\n",$intLevel); 
    333      
    334         $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n"; 
    335      
    336         push(@Errors,$strGdbError); 
    337     } 
    338  
    339     VobPrint ("\n",$intLevel); 
    340  
    341     return @Errors; 
     335        } 
     336        VobPrint ("$strGdbError\n",$intLevel); 
     337         
     338        if( $strOutputFormat eq "xml" ) { 
     339        print "</message></database>"; 
     340        } 
     341         
     342        $strGdbError = "$strGdbError"; 
     343        $strGdbError = "Difference Found at Database Comparsion\n".$strGdbError."\n"; 
     344        #push(@Errors,$strGdbError); 
     345    } 
     346    else 
     347    { 
     348        if( $strOutputFormat eq "xml" ) { 
     349        print "<database succeeded=\"yes\" location=\"$strModelGdb\"/>"; 
     350        } else { 
     351        AlignPrint("Database Comparsion Result","Succeed",$intLevel); 
     352        } 
     353    } 
     354    } 
     355    else 
     356    { 
     357    my $strErrorColName; 
     358     
     359    if(!(-e $strModelGdb)){ $strErrorColName = $strErrorColName."(Model Collection)";} 
     360    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";} 
     361     
     362    AlignPrint("Database Comparsion Result","Failed",$intLevel); 
     363     
     364    $strGdbError = "Difference Report: No Database files found in $strErrorColName"; 
     365    VobPrint ("$strGdbError\n",$intLevel); 
     366     
     367    $strGdbError = "Difference Found at Database Comparison\n".$strGdbError."\n"; 
     368 
     369    } 
     370     
     371    return $strGdbError; 
    342372} 
    343373 
     
    527557                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 
    528558                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 
    529                 if(!($strEachFile eq "log" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.(l|b)db$/g)) 
     559                # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 
     560                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g)) 
    530561                { 
    531562                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); 
     
    547578                my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 
    548579                my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 
    549                 if(!($strEachFile eq "log" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.(l|b)db$/g)) 
     580                if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh)$/g)) 
    550581                { 
    551582                    push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel)); 
     
    561592    else 
    562593    { 
    563         my $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" }; 
    564         $strResult = &diffutil::GenerateOutput($strResult,"^<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\$"); 
     594        my $ignore_line_re = "<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\\n*"; 
     595        my $strResult; 
     596 
     597        # for doc.xml files, need to ignore many date fields. Filter these out before diffing, 
     598        # in case these don't appear in the same order between collections, since 
     599        # diffutil::GenerateOutput only handles the ignore_regex after a diff has been done 
     600        # when they can show up as unnecessary diff 'errors' 
     601        if($strModel =~ m/doc\.xml$/) { 
     602        my ($model_contents, $test_contents); 
     603        open(FIN,"<$strModel") or die "Unable to open $strModel...ERROR: $!\n"; 
     604        sysread(FIN, $model_contents, -s FIN); 
     605        close(FIN); 
     606        open(FIN,"<$strTest") or die "Unable to open $strTest...ERROR: $!\n"; 
     607        sysread(FIN, $test_contents, -s FIN); 
     608        close(FIN); 
     609 
     610        $model_contents =~ s/$ignore_line_re//g; 
     611        $test_contents =~ s/$ignore_line_re//g; 
     612         
     613        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; 
     614 
     615        } else { 
     616        $strResult = diff $strModel, $strTest, { STYLE => "OldStyle" }; 
     617        } 
     618 
     619        # The following tries to apply a regex to exclude fields after diffing. This is now no longer necessary since we filter 
     620        # these fields out now before the diff, but leaving it in in case different regexes at this point helps with single line diffs 
     621        $strResult = &diffutil::GenerateOutput($strResult,"^\\s*<Metadata name=\"(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)\">.*</Metadata>\\s*\$"); 
     622 
    565623        #$strResult = GeneralOutput($strResult); 
    566624        if ( $strOutputFormat eq "xml" ) { 
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffutil.pm

    r21711 r27604  
    4747    else 
    4848    { 
    49     if($strModel =~ m/$strIgnoreExp/ && $strTest =~ m/$strIgnoreExp/) 
     49    if($strModel =~ m/$strIgnoreExp/ && $strTest =~ m/$strIgnoreExp/) #if($strModel =~ m/$strIgnoreExp/mg && $strTest =~ m/$strIgnoreExp/mg) 
    5050    { 
    5151        return 1; 
     
    6464    my $intCounter = 0; 
    6565    my $hashptErrorLines; 
    66      
     66 
    6767    while(my $strFirstLine = shift(@aryErrors)) 
    6868    { 
     
    106106        if($intModelLineCount == 1 && $intTestLineCount == 1 &&  
    107107           &IgnoreExp($aryModelErrors[0],$aryModelErrors[0],$strIgnoreExp) == 1) 
     108           #&IgnoreExp($aryModelErrors[0],$aryTestErrors[0],$strIgnoreExp) == 1) # 2nd param should be aryTestErrors? 
    108109        { 
    109110            $strAError = ""; 
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27579 r27604  
    3232sub test_gdb 
    3333{ 
    34     my ($full_modeldb, $full_testdb) = @_; 
     34    my ($full_modeldb, $full_testdb,$strColName) = @_; 
    3535 
    3636 
    3737   # print "Now is testing database\n"; 
    3838     
    39     my $model_cmd = "db2txt $full_modeldb 2>&1"; 
    40     my $test_cmd  = "db2txt $full_testdb 2>&1"; 
     39    # need to sort text output of both test and model col database files, to normalise them for the comparison 
     40    # the -sort option to db2txt was added specifically to support diffcol 
     41    my $model_cmd = "db2txt -sort $full_modeldb 2>&1"; 
     42    my $test_cmd  = "db2txt -sort $full_testdb 2>&1"; 
    4143 
    4244    my $model_text = readin_gdb($model_cmd); 
    4345    my $test_text = readin_gdb($test_cmd); 
     46 
     47 
     48    # filter out the fields that can be ignored in the two database files 
     49    my $ignore_line_re = "\n<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>([^\n])*"; 
     50    $model_text =~ s/$ignore_line_re//g; 
     51    $test_text =~ s/$ignore_line_re//g; 
     52 
     53 
     54    # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files) 
     55 
     56    # Remember the original model col on SVN could have been built anywhere,  
     57    # and in the gdb files, absolute paths are stored to the collection location.  
     58    # Crop these paths to the collect/<colname> point. 
     59     
     60    # Entries are of the form [Entry] or <Entry>. In order to do a sensible diff, 
     61    # need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry 
     62    # E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext] 
     63    # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 
     64 
     65    $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
     66    $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 
     67 
     68 
    4469    my $report_type = "OldStyle"; # Can not change this type. 
    4570    my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type }; 
    4671     
     72    # leaving the ignore regex as it used to be in the following, in case it helps with single line comparisons 
    4773    $diff_gdb = &diffutil::GenerateOutput($diff_gdb,"^<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>.*"); 
    4874 
  • other-projects/nightly-tasks/diffcol/trunk/task

    r27557 r27604  
    1111# control if an existing compiled greenstone is used  
    1212# or, if one should be checked out, which revision to checkout from svn 
    13 SVN_OPT_R="-r head" 
     13SVN_OPT_REV="-r head" 
    1414#export GSDLHOME= 
    1515#export GSDL3SRCHOME= 
     
    168168    echo '<test time="'$dateid'" id="'$dateid'">' > $xmlout 
    169169 
     170    # make sure that diffcol/model-collect is up to date before copying it over to greenstone-home 
     171    echo "Updating $TASK_HOME/model-collect:" 
     172    #cd $TASK_HOME/model-collect 
     173    svn up $TASK_HOME/model-collect 
     174 
    170175    # go to whichever greenstone_home we're using 
    171176    cd $greenstone_home 
     
    271276function mail_simple_message() { 
    272277    # email out brief failure message if failed                                                        
    273     echo "Checking if failed... " 
     278    echo "Checking if successful... " 
    274279    result=`java org.apache.xalan.xslt.Process -IN "$xmlout" -XSL "$TASK_HOME/xsl/passed-or-not.xsl"` 
    275280    echo "result: "$result 
     
    287292 
    288293    # email out with report attached, if the tests failed                                                       
    289     echo "Checking if failed... " 
     294    echo "Checking if successful... " 
    290295    result=`java org.apache.xalan.xslt.Process -IN "$xmlout" -XSL "$TASK_HOME/xsl/passed-or-not.xsl"` 
    291296    echo "result: "$result 
    292297    if [ "$result" != "yes" ]; then 
    293298    echo $gsdl' regression test for '$dateid' failed' | mutt -a $DATA_DIR'/report-'$dateid'.html' -s 'Regression Test Failed' -- $MONITOR_EMAIL 
     299    echo "Sent mail with report attached." 
    294300    fi 
    295     echo "Sent mail with report attached." 
    296301} 
    297302 
     
    341346elif [ "$action" == "upload" ]; then 
    342347    upload 
    343     mail_simple_message 
     348#   mail_simple_message 
    344349    mail_with_report_attached 
    345350elif [ "$action" == "all" ]; then