Ignore:
Timestamp:
2023-03-08T20:01:55+13:00 (14 months ago)
Author:
anupama
Message:

These changes seem to have fixed the broken parts of diffcol for GS2 on Linux when locally running diff again on checked out GS2 and all pre-built GS2 test collections.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r36876 r37446  
    386386    if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";}
    387387   
    388     AlignPrint("Database Comparsion Result","Failed",$intLevel);
     388    AlignPrint("Database Comparison Result","Failed",$intLevel);
    389389   
    390390    $strGdbError = "Difference Report: No Database files found in $strErrorColName";
     
    607607        # Files to be skipped because they get generated on one OS but not the other
    608608        # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux
    609         my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
     609        my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|archiveinf-timestamp.out|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html
    610610        @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl
    611611        @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest;
     
    748748            (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
    749749            $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
    750             my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
    751            
    752             if($test_contents =~ m@$tmpfile_regex@) {           
    753                 # found a match, replace the tmp file name with "random", keeping the original file extension
    754                 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
    755            
    756                 my ($old_tmp_filename, $ext) = ($1, $2);           
    757                 my $new_tmp_filename = "random";           
     750            #my $tmpfile_regex = "<Metadata name=\"URL\">https?://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long   
     751            my $tmpfile_regex = "<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long
     752           
     753            if($test_contents =~ m@$tmpfile_regex@) {
     754                # found a match, replace the tmp file name with "random", keeping the original file extension
     755                # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename">
     756               
     757                my ($old_tmp_filename, $ext) = ($3, $4);               
     758                my $new_tmp_filename = "random";
     759                $old_tmp_filename =~ s@.*?([^/]*)$@$1@; # remove any intermediate timestamp folders under tmp dir               
     760               
     761                ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
     762                #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
     763                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?.*?($old_tmp_filename)($ext</Metadata>)";
     764                if($5) { # there is a tmp folder
     765                $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$7@mg;             
     766                } else { # OrigSource contains only the filename
     767                $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$7@mg;
     768                }
    758769               
    759                 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
    760                 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
    761                 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
    762                 if($5) {
    763                     $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
    764                 } else { # OrigSource contains only the filename
    765                     $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
    766                 }
    767                
    768                 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename           
    769                 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
    770                 if($5) {
    771                     $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
    772                 } else { # OrigSource contains only the filename
    773                     $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
    774                 }
     770                # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename
     771                $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
     772                if($5) {
     773                    $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
     774                } else { # OrigSource contains only the filename
     775                    $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
     776                }
    775777            }
    776778
    777779        } # finished special processing of doc.xml files
    778780       
    779 
     781        #$gv_blnDebugging=1;
    780782        if($gv_blnDebugging) {# && $gv_intVerbosity > 0) {
    781783            my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory       
    782784            &flatdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
    783785            &flatdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
    784 #           if($strModel =~ m/(HASH0164.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
     786#           if($strModel =~ m/(HASH019c.dir|HASH2bdf.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
    785787#           &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
    786788#           &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
    787789#           }
    788790        }       
    789 
     791        #$gv_blnDebugging=0;
    790792       
    791793        # now can diff the normalised versions of the doc.xml/docmets.xml files:
Note: See TracChangeset for help on using the changeset viewer.