Changeset 37446
- Timestamp:
- 2023-03-08T20:01:55+13:00 (2 weeks ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl
r36876 r37446 386 386 if(!(-e $strTestGdb)){ $strErrorColName = $strErrorColName."(Test Collection)";} 387 387 388 AlignPrint("Database Compar sion Result","Failed",$intLevel);388 AlignPrint("Database Comparison Result","Failed",$intLevel); 389 389 390 390 $strGdbError = "Difference Report: No Database files found in $strErrorColName"; … … 607 607 # Files to be skipped because they get generated on one OS but not the other 608 608 # On windows, files of the form col.invf.state.\d\d\d\d get generated (e.g. Small-HTML.invf.state.1228) that aren't there on linux 609 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp| fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html609 my $skipfiles_re = qr/(\.invf\.state\.\d+$)|~$|earliestDatestamp|archiveinf-timestamp.out|fail\.log|oai-inf-tmp\.(gdb|jdb|lg)$/; # Create a regex of all files to be skipped, see http://perldoc.perl.org/perlop.html 610 610 @aryInModel = grep { $_ !~ m/$skipfiles_re/ } @aryInModel; # http://stackoverflow.com/questions/174292/what-is-the-best-way-to-delete-a-value-from-an-array-in-perl 611 611 @aryInTest = grep { $_ !~ m/$skipfiles_re/ } @aryInTest; … … 748 748 (my $gsdlhome_re = $ENV{'GSDLHOME'}) =~ s@\\@\/@g; 749 749 $gsdlhome_re = ".*" unless $$ENV{'GSDLHOME'}; 750 my $tmpfile_regex = "<Metadata name=\"URL\">http://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 751 752 if($test_contents =~ m@$tmpfile_regex@) { 753 # found a match, replace the tmp file name with "random", keeping the original file extension 754 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename"> 755 756 my ($old_tmp_filename, $ext) = ($1, $2); 757 my $new_tmp_filename = "random"; 750 #my $tmpfile_regex = "<Metadata name=\"URL\">https?://$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 751 my $tmpfile_regex = "<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?$gsdlhome_re/tmp/([^\.]*)(\..{3,4})</Metadata>"; # $gsdlhome/tmp/randomfilename.html, file ext can be 3 or 4 chars long 752 753 if($test_contents =~ m@$tmpfile_regex@) { 754 # found a match, replace the tmp file name with "random", keeping the original file extension 755 # in <Metadata name="OrigSource|URL|UTF8URL|gsdlconvertedfilename"> 756 757 my ($old_tmp_filename, $ext) = ($3, $4); 758 my $new_tmp_filename = "random"; 759 $old_tmp_filename =~ s@.*?([^/]*)$@$1@; # remove any intermediate timestamp folders under tmp dir 760 761 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file. 762 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)"; 763 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)($gsdlhome_re)?(/tmp/)?.*?($old_tmp_filename)($ext</Metadata>)"; 764 if($5) { # there is a tmp folder 765 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$7@mg; 766 } else { # OrigSource contains only the filename 767 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$7@mg; 768 } 758 769 759 ## The following does not work in the Multimedia collection, since there's a subfolder to tmp (the timestamp folder) which contains the output file. 760 #$tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?$old_tmp_filename($ext</Metadata>)"; 761 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)($gsdlhome_re)?(/tmp/)?.*?($ext</Metadata>)"; 762 if($5) { 763 $test_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg; 764 } else { # OrigSource contains only the filename 765 $test_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg; 766 } 767 768 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename 769 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(http://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)"; 770 if($5) { 771 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg; 772 } else { # OrigSource contains only the filename 773 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg; 774 } 770 # modelcol used a different gsdlhome, but also a tmp dir, so make the same changes to its random filename 771 $tmpfile_regex = "(<Metadata name=\"(URL|UTF8URL|gsdlconvertedfilename|OrigSource)\">(https?://)?)(.*)?(/tmp/)?.*?($ext</Metadata>)"; 772 if($5) { 773 $model_contents =~ s@$tmpfile_regex@$1$5$new_tmp_filename$6@mg; 774 } else { # OrigSource contains only the filename 775 $model_contents =~ s@$tmpfile_regex@$1$new_tmp_filename$6@mg; 776 } 775 777 } 776 778 777 779 } # finished special processing of doc.xml files 778 780 779 781 #$gv_blnDebugging=1; 780 782 if($gv_blnDebugging) {# && $gv_intVerbosity > 0) { 781 783 my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory 782 784 &flatdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml"); 783 785 &flatdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml"); 784 # if($strModel =~ m/(HASH01 64.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files786 # if($strModel =~ m/(HASH019c.dir|HASH2bdf.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files 785 787 # &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml"); 786 788 # &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml"); 787 789 # } 788 790 } 789 791 #$gv_blnDebugging=0; 790 792 791 793 # now can diff the normalised versions of the doc.xml/docmets.xml files:
Note:
See TracChangeset
for help on using the changeset viewer.