Changeset 36807


Ignore:
Timestamp:
2022-10-16T18:51:49+13:00 (19 months ago)
Author:
anupama
Message:

We used to run diffcol as a nightly task only for GS2. Commit 36655 was the first stage of getting diffcol to work for GS3, but skipped a lot of important code branches (like comparing the index\text\j/gdb files) in order to fix up the easier parts of the code. Now that I think the remainder of the diffcol scripts have been got to work with diffcol for GS3, where the index\text\flatdb files are compared and diffcol works for them, I can commit the important changes as well as commented out debugging statements made to the diffcol scripts that get the full diffcol code to work for GS3 diffcol. I will recommit again after removing the debugging statements. And I still need to do a full local diffcol run again, as well as testing if diffcol still works after locally undoing my sort field changes to some GS3 model cols (the recent commits to Tudor, Word-PDF, Images-GPS and Multimedia collections) to see if Dr Bainbridge's PERL_HASH_SEED env var addition fixes all of those collections diffcol failures, making the extra sorting redundant. In that case, I will recommit those model collections after updating their col configurations to not do the extra sorting.

Location:
other-projects/nightly-tasks/diffcol/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r36655 r36807  
    196196    ## where the model-col needs to be built & committed from a Win machine),
    197197    ## need to comment out the following 3 newly added lines and fix any remaining issues
    198     if($ENV{'GSVERSION'} eq "3") {
    199         return @Errors;
    200     }
    201    
     198#   if($ENV{'GSVERSION'} eq "3") {
     199#       return @Errors;
     200#   }
     201  if($ENV{'GSVERSION'} eq "3") {
    202202    # Testing the build.cfg
    203203    my $strModelBcfg =  &FileUtils::filenameConcatenate($strModelCol,"index","build.cfg");
     
    310310
    311311    VobPrint ("\n",$intLevel);
    312    
     312  }
    313313    # Testing databases
    314314
    315315    # index
    316     my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName.gdb");
    317     my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName.gdb");   
     316    my $flat_db_ext = ".gdb";
     317   
     318    my $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
     319    if(!(-e $strModelGdb)){ # try jdb
     320        $flat_db_ext = ".jdb";
     321        $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"index","text","$strColName$flat_db_ext");
     322    }
     323   
     324    #print STDERR "*********** USING DB EXTENSION: $flat_db_ext\n";
     325    #print STDERR "@@@ PERL_PERTURB_KEYS: ".$ENV{'PERL_PERTURB_KEYS'}."\n";
     326   
     327    my $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"index","text","$strColName$flat_db_ext");   
    318328    my $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol); # returns 0 if no error
    319329    if($strGdbError) {
     
    322332   
    323333    # archives
    324     $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc.gdb");
    325     $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc.gdb");   
     334    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-doc$flat_db_ext");
     335    $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-doc$flat_db_ext");   
    326336    $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
    327337    if($strGdbError) {
     
    329339    }
    330340   
    331     $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src.gdb");
    332         $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src.gdb");
     341    $strModelGdb =  &FileUtils::filenameConcatenate($strModelCol,"archives","archiveinf-src$flat_db_ext");
     342        $strTestGdb =  &FileUtils::filenameConcatenate($strTestCol,"archives","archiveinf-src$flat_db_ext");
    333343        $strGdbError = &GdbDiff($strModelGdb,$strTestGdb,$strOutputFormat,$intLevel,$strColName,$strTestCol,$strModelCol);
    334344        if($strGdbError) {
     
    352362    {
    353363    #my $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb);
     364   
     365#   $gv_blnDebugging = 1;
     366   
    354367    $strGdbError = gdbdiff::test_gdb($strModelGdb, $strTestGdb, $strColName,$gv_test_os, $gv_model_os,$strTestCol,$strModelCol, $gv_blnDebugging);
     368#   $gv_blnDebugging = 0;
     369   
    355370    if($strGdbError ne "")
    356371    {
     
    607622        # Files to be skipped because they get generated on one OS but not the other
    608623        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
    609         my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
     624        my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
    610625        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
    611626        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
     
    10061021    $gv_test_os = $test_os; # if not specified, defaults to "compute"
    10071022    $gv_model_os = $model_os; # tends to be linux
     1023   
     1024    #print STDERR "@@@@@ test_os: $test_os\n";
     1025    #print STDERR "@@@@@ model_os: $model_os\n";
    10081026
    10091027    $gv_blnDebugging = $debugging;
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r35231 r36807  
    106106    $test_text =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g;
    107107
     108
     109#print STDERR "@@@@ DEBUGGING: $debugging\n";
     110#print STDERR "******** full_modeldb: $full_modeldb\n$model_text\n\n";
     111#print STDERR "******** full_testdb: $full_testdb\n$test_text\n\n";
     112   
    108113    # if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files
    109114    # in test and model collection to an even base for comparison
     
    231236    } # end of equalising differences between a windows collection's db file and linux coll's db file
    232237   
     238   
     239    # Windows or linux: if index is a flat db file, then ensure the docIDs listed in <contains> field of
     240    # both test and model flat db file are alphabetically sorted. So to the numbers in <mdoffset> field.
     241    # Despite PERL_PERTURB_KEYS envvar being set to 0 on both machine when generating model collections
     242    # and when test collections were generated on test machine, still collections like Images-GPS and some
     243    # other colls list items in <contains> and <mdoffset> in different orders. So reordering alphabetically.   
     244    #if($dbname =~ m/$strColName/) {   
     245        # regex modifiers mge: multi-line, global (replace as many as match), e allows function call in substitution
     246        ##$model_text =~ s@^<contains>(.*)@sort_contains_field($1, "MODEL", $debugging)@mge;
     247        ##$test_text =~ s@^<contains>(.*)@sort_contains_field($1, "TEST", $debugging)@mge;
     248    #   $model_text =~ s@^<(contains|mdoffset)>(.*)@sort_field_value($1, $2, "MODEL", $debugging)@mge;
     249    #   $test_text =~ s@^<(contains|mdoffset)>(.*)@sort_field_value($1, $2, "TEST", $debugging)@mge;       
     250    #}
     251   
    233252    # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
    234253    # These tmpdirs are located inside the toplevel *greenstone* directory
     
    302321    # Call diff?
    303322}
     323
     324# Unused, but may come in handy when debugging again: regex substitution helper function
     325sub sort_field_value {
     326    my($fieldname, $fieldvalue, $displayStr, $debugging) = @_;
     327   
     328    print STDERR "\n$displayStr BEFORE sort: <$fieldname>$fieldvalue\n" if($debugging);
     329   
     330    $fieldvalue =~ s@(\r|\n|\\n)*$@@; # get rid of trailing newlines/carriage returns
     331    my @values_list = split(';', $fieldvalue);
     332    @values_list = sort @values_list;
     333    $fieldvalue = "<$fieldname>".join(';', @values_list). "\n";
     334   
     335    print STDERR "$displayStr AFTER  sort: $fieldvalue\n" if($debugging);
     336   
     337    return $fieldvalue;
     338}
     339
    304340
    305341# returns true if the contents are windows AND it matters for the diffing on the db that it's windows
  • other-projects/nightly-tasks/diffcol/trunk/task.pl

    r36655 r36807  
    517517
    518518    my $imagickzip = &getImageMagickBins();
     519
     520    # To successfully compile up GS3 that gets checked out by diffcol (with --gs3 flag),
     521    # Need at least unzip and wget on path. So adding winbin to path.
     522    # Better to have dedicated bin folder with necessities for diffcol and add that to PATH?
     523   
     524#   if($isWin) { # steps from http://wiki.greenstone.org/doku.php?id=en:developer:windows_source_install
     525#       my $iconv_dir = filename_concat($greenstone3_home, "gs2build", "common-src", "indexers", "packages", "windows", "iconv");
     526#       chdir ("$iconv_dir");       
     527#       move("iconv-winVS14-VS2015-plus.zip", "iconv.zip"); #move("iconv.zip", "iconv_preVS14.zip");
     528        # https://www.oreilly.com/library/view/perl-cookbook/1565922433/ch09s09.html
     529#       rmtree("iconv");
     530       
     531        # need unzip, and maybe more from winbin   
     532#       my $winbin_dir = filename_concat($greenstone3_home, "gs2build", "bin", "windows");
     533#       $ENV{'PATH'} = $winbin_dir . ";" . $ENV{'PATH'};
     534#   }
    519535    chdir ("$greenstone3_home");
    520536
     
    721737    my $openofficeext = &filename_concat($greenstone_home, "ext", "open-office");
    722738    if(!-d $openofficeext && $install_version eq "2") { # We don't checkout openoffice with GS3
    723         # (and not all Win machines where me may want to run Win diffcol have openoffice installed)
     739        # (and not all Win machines where we may want to run Win diffcol have openoffice installed)
    724740        &getOpenOfficeExt();
    725741    }
Note: See TracChangeset for help on using the changeset viewer.