Changeset 28071

Show
Ignore:
Timestamp:
15.08.2013 22:09:07 (6 years ago)
Author:
ak19
Message:

1. If the tutorial collection involves tmp folders (such as timestamped ones), the equalised txt file version of the test and model GDB files is written out to a gdb file and read back into txt sorted, to get the now-relative paths to the tmp folders in the same order. 2. Square brackets around the random.html filenames that replace the random paths to GS-generated html files. 3. diffcol.pl's processing of doc.xml also had greedy matching where there should have been none. 4. The tmp folders generated for the Multimedia collection contain further subfolders that contain the actual file to be renamed to random, and this wasn't handled properly before in diffcol.pl's test collection case for the OrigSource? field.

Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r28019 r28071  
    699699        $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'}; 
    700700        my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long   
     701         
    701702        if($test_contents =~ m@$tmpfile_regex@) {            
    702703            # found a match, replace the tmp file name with "random", keeping the original file extension  
     
    706707            my $new_tmp_filename = "random";             
    707708             
    708             $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)"; 
     709            ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file. 
     710            #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)"; 
     711            $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)"; 
    709712            if($5) {  
    710                 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@g; 
     713                $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg; 
    711714            } else { # OrigSource contains only the filename 
    712                 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@g; 
     715                $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg; 
    713716            } 
    714717             
    715718            # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename            
    716             $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*($ext</Metadata>)"; 
     719            $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)"; 
    717720            if($5) {  
    718                 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@g; 
     721                $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg; 
    719722            } else { # OrigSource contains only the filename 
    720                 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@g; 
     723                $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg; 
    721724            } 
    722725        } 
    723726         
    724727#       my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory        
    725 #       &gdbdiff::print_string_to_file($model_contents, $savepath."model_doc.xml"); 
    726 #       &gdbdiff::print_string_to_file($test_contents, $savepath."test_doc.xml"); 
     728#       if($strModel =~ m/(HASH010d.dir)/) { # list the HASH dirs for which you want the doc.xml file generated 
     729#       &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml"); 
     730#       &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml"); 
     731#       } 
    727732         
    728733        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; 
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r28067 r28071  
    220220        #$model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg; 
    221221         
    222         $test_text  =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@tmp/random$1\n<section>$2\n@sg; 
    223         $model_text =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@tmp/random$1\n<section>$2\n@sg; 
     222        $test_text  =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@[tmp/random$1\n<section>$2]\n@sg; 
     223        $model_text =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@[tmp/random$1\n<section>$2]\n@sg; 
     224         
     225        # need to re- sort the keys, now that the absolute paths to tmp locations has been removed 
     226        # so that we get the tmp files in the same order in both model and test collections 
     227         
     228        # http://stackoverflow.com/questions/1909262/how-can-i-pipe-input-into-a-java-command-from-perl 
     229        open PIPE, "| txt2db model.gdb"; 
     230        print PIPE "$model_text"; 
     231        close(PIPE); 
     232        open PIPE, "| txt2db test.gdb"; 
     233        print PIPE "$test_text"; 
     234        close(PIPE); 
     235         
     236        $model_cmd = " db2txt -sort model.gdb 2>&1"; 
     237        $test_cmd  = "db2txt -sort test.gdb 2>&1"; 
     238        $model_text = readin_gdb($model_cmd); 
     239        $test_text = readin_gdb($test_cmd); 
    224240    }    
    225241