Ignore:
Timestamp:
2013-07-05T22:55:09+12:00 (11 years ago)
Author:
ak19
Message:

Fixes to previous commit: the random file names (created by PDFBox for its intermediate html files) are generated with random numbers. As a result, the HASH OIDs of these intermediate source files, although constant between model and test collections, can therefore appear in a different order in the model and the test collection when both are ordered alphabetically in the index/col.gdb file.

Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r27766 r27767  
    679679        # The following block of code is necessary to deal with tmp (html) source files generated when using PDFBox
    680680        # These tmpdirs are located inside the toplevel *greenstone* directory
    681         (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;     
     681        (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;
     682        $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
    682683        my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
    683684        if($test_contents =~ m@$tmpfile_regex@) {           
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r27766 r27767  
    172172    # These tmpdirs are located inside the toplevel *greenstone* directory
    173173    (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g;     
     174    $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
    174175    my $tmpfile_regex = "<URL>http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long           
    175176    if($test_text =~ m@$tmpfile_regex@g) {         
     
    200201       
    201202        # index/col.gdb also has entries for the random tmp file names in the form: [http://research/ak19/GS2bin_5July2013/tmp/F639.html]
    202         # need to equalise these also
    203         $test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg;
    204         $model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg;
     203        # need to equalise these also. Sadly, when there are multiple intermediate files, their random tmp filenames are not
     204        # guaranteed to be generated in the same (alphabetical/numerical) order between model and test collection, so the
     205        # HASH OIDs, although all of them accounted for, appear in a different order. So we have to remove the Hash OIDs.
     206        #$test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; # HASH OIDs can appear in different order
     207        #$model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg;
     208        $test_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]\n<section>[^\n]*\n@tmp/random$1\n<section>RandomHash\n@sg;
     209        $model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]\n<section>[^\n]*\n@tmp/random$1\n<section>RandomHash\n@sg;
    205210    }   
    206211
Note: See TracChangeset for help on using the changeset viewer.