Changeset 27767

Show
Ignore:
Timestamp:
05.07.2013 22:55:09 (6 years ago)
Author:
ak19
Message:

Fixes to previous commit: the random file names (created by PDFBox for its intermediate html files) are generated with random numbers. As a result, the HASH OIDs of these intermediate source files, although constant between model and test collections, can therefore appear in a different order in the model and the test collection when both are ordered alphabetically in the index/col.gdb file.

Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27766 r27767  
    679679        # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox 
    680680        # These tmpdirs are located inside the toplevel *greenstone* directory 
    681         (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;       
     681        (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g; 
     682        $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'}; 
    682683        my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long   
    683684        if($test_contents =~ m@$tmpfile_regex@) {            
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27766 r27767  
    172172    # These tmpdirs are located inside the toplevel *greenstone* directory 
    173173    (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;       
     174    $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'}; 
    174175    my $tmpfile_regex = "<URL>http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long            
    175176    if($test_text =~ m@$tmpfile_regex@g) {           
     
    200201         
    201202        # index/col.gdb also has entries for the random tmp file names in the form: [http://research/ak19/GS2bin_5July2013/tmp/F639.html] 
    202         # need to equalise these also 
    203         $test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; 
    204         $model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; 
     203        # need to equalise these also. Sadly, when there are multiple intermediate files, their random tmp filenames are not  
     204        # guaranteed to be generated in the same (alphabetical/numerical) order between model and test collection, so the  
     205        # HASH OIDs, although all of them accounted for, appear in a different order. So we have to remove the Hash OIDs. 
     206        #$test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; # HASH OIDs can appear in different order 
     207        #$model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; 
     208        $test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]\n<section>[^\n]*\n@tmp/random$1\n<section>RandomHash\n@sg; 
     209        $model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]\n<section>[^\n]*\n@tmp/random$1\n<section>RandomHash\n@sg; 
    205210    }    
    206211