Ignore:
Timestamp:
2013-08-15T22:09:07+12:00 (11 years ago)
Author:
ak19
Message:
  1. If the tutorial collection involves tmp folders (such as timestamped ones), the equalised txt file version of the test and model GDB files is written out to a gdb file and read back into txt sorted, to get the now-relative paths to the tmp folders in the same order. 2. Square brackets around the random.html filenames that replace the random paths to GS-generated html files. 3. diffcol.pl's processing of doc.xml also had greedy matching where there should have been none. 4. The tmp folders generated for the Multimedia collection contain further subfolders that contain the actual file to be renamed to random, and this wasn't handled properly before in diffcol.pl's test collection case for the OrigSource field.
Location:
other-projects/nightly-tasks/diffcol/trunk/diffcol
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r28019 r28071  
    699699        $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'};
    700700        my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 
     701       
    701702        if($test_contents =~ m@$tmpfile_regex@) {           
    702703            # found a match, replace the tmp file name with "random", keeping the original file extension
     
    706707            my $new_tmp_filename = "random";           
    707708           
    708             $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
     709            ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file.
     710            #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)";
     711            $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)";
    709712            if($5) {
    710                 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@g;
     713                $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
    711714            } else { # OrigSource contains only the filename
    712                 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@g;
     715                $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
    713716            }
    714717           
    715718            # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename           
    716             $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*($ext</Metadata>)";
     719            $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)";
    717720            if($5) {
    718                 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@g;
     721                $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg;
    719722            } else { # OrigSource contains only the filename
    720                 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@g;
     723                $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg;
    721724            }
    722725        }
    723726       
    724727#       my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory       
    725 #       &gdbdiff::print_string_to_file($model_contents, $savepath."model_doc.xml");
    726 #       &gdbdiff::print_string_to_file($test_contents, $savepath."test_doc.xml");
     728#       if($strModel =~ m/(HASH010d.dir)/) { # list the HASH dirs for which you want the doc.xml file generated
     729#       &gdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
     730#       &gdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
     731#       }
    727732       
    728733        $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" };
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm

    r28067 r28071  
    220220        #$model_text =~ s@\[http://.*/tmp/.*(\..{3,4})\]@tmp/random$1@mg;
    221221       
    222         $test_text  =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@tmp/random$1\n<section>$2\n@sg;
    223         $model_text =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@tmp/random$1\n<section>$2\n@sg;
     222        $test_text  =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@[tmp/random$1\n<section>$2]\n@sg;
     223        $model_text =~ s@\[http://[^\n]*?/tmp/.*?(\..{3,4})\]\n<section>([^\n]*?)\n@[tmp/random$1\n<section>$2]\n@sg;
     224       
     225        # need to re- sort the keys, now that the absolute paths to tmp locations has been removed
     226        # so that we get the tmp files in the same order in both model and test collections
     227       
     228        # http://stackoverflow.com/questions/1909262/how-can-i-pipe-input-into-a-java-command-from-perl
     229        open PIPE, "| txt2db model.gdb";
     230        print PIPE "$model_text";
     231        close(PIPE);
     232        open PIPE, "| txt2db test.gdb";
     233        print PIPE "$test_text";
     234        close(PIPE);
     235       
     236        $model_cmd = " db2txt -sort model.gdb 2>&1";
     237        $test_cmd  = "db2txt -sort test.gdb 2>&1";
     238        $model_text = readin_gdb($model_cmd);
     239        $test_text = readin_gdb($test_cmd);
    224240    }   
    225241
Note: See TracChangeset for help on using the changeset viewer.