Changeset 27743 for other-projects
- Timestamp:
- 2013-07-03T21:37:18+12:00 (11 years ago)
- Location:
- other-projects/nightly-tasks/diffcol/trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl
r27730 r27743 591 591 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 592 592 # now additionally ignoring the earliestDatestamp file and the index/idx/*.idh binary file when diffing file 593 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png )$/g))593 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) # wmf = windows meta file 594 594 { 595 595 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName)); … … 611 611 my $strNewModel = &FileUtils::filenameConcatenate($strModel,$strEachFile); 612 612 my $strNewTest = &FileUtils::filenameConcatenate($strTest,$strEachFile); 613 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png )$/g))613 if(!($strEachFile eq "log" || $strEachFile eq "earliestDatestamp" || $strEachFile =~ m/\.cfg$/g || $strEachFile =~ m/\.((g|j|l|b)db|idh|i.*|wa|td|tsd|ti|t|tl|w|jpe?g|gif|png|wmf)$/g)) 614 614 { 615 615 push(@Errors,TestEach($strNewModel,$strNewTest,$intLevel,$strColName)); … … 645 645 $test_contents =~ s/$ignore_line_re//g; 646 646 647 648 # equalise/normalise the two doc.xml files for OS differences, if there are any 649 my $testIsWin = &isDocXMLFileWindows($test_contents); 650 my $modelIsWin = &isDocXMLFileWindows($model_contents); 651 652 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant 653 654 my $win_contents = $testIsWin ? \$test_contents : \$model_contents; 655 656 # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html 657 $$win_contents =~ s@[\r]@@g; 658 659 # make all single windows slashes into single unix slashes 660 $$win_contents =~ s@([^\\])\\([^\\])@$1\/$2@g; 661 # make windows \r newlines into constant \n newlines. Already handled when \r got replaced 662 #$$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file 663 664 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?) 665 # $$win_contents =~ s@\r@\n@mg; 666 } 667 668 669 647 670 # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path 648 671 $model_contents =~ s@(tmp[\\\/])(\d*[\\\/])@$1@g; … … 651 674 # remove all absolute paths upto collect folder from <Metadata /> elements 652 675 $model_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g; 653 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g; 654 655 656 # equalise/normalise the two doc.xml files for OS differences, if there are any 657 my $testIsWin = &isDocXMLFileWindows($test_contents); 658 my $modelIsWin = &isDocXMLFileWindows($model_contents); 659 660 if($testIsWin != $modelIsWin) { # one of the 2 collections is built on windows, the other on linux, so need to make newlines constant 661 662 my $win_contents = $testIsWin ? \$test_contents : \$model_contents; 663 664 # make all windows slashes into unix slashes 665 $$win_contents =~ s@\\@\/@g; 666 # make windows \r newlines into constant \n newlines 667 $$win_contents =~ s@\r\n@\n@mg; # #http://stackoverflow.com/questions/650743/in-perl-how-to-do-you-remove-m-from-a-file 668 669 #FOR MAC: old macs use CR carriage return (see http://www.perlmonks.org/?node_id=745018), so replace with \n?) 670 # $$win_contents =~ s@\r@\n@mg; 671 } 676 $test_contents =~ s@(<Metadata name=\"[^\"]*\">(http:\/\/)?).*(collect[\\\/]$strColName)@$1$3@g; 677 678 # my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory 679 # &gdbdiff::print_string_to_file($model_contents, $savepath."model_doc.xml"); 680 # &gdbdiff::print_string_to_file($test_contents, $savepath."test_doc.xml"); 672 681 673 682 $strResult = diff \$model_contents, \$test_contents, { STYLE => "OldStyle" }; -
other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm
r27730 r27743 59 59 my $test_text = readin_gdb($test_cmd); 60 60 61 # my $savepath = &getcwd."/../"; # TASK_HOME env does not exist at this stage, but it's one level up from current directory61 # my $savepath = &getcwd."/../"; # TASK_HOME env var does not exist at this stage, but it's one level up from current directory 62 62 # print_string_to_file($test_text, $savepath.$dbname."_test.out"); 63 63 # print_string_to_file($model_text, $savepath.$dbname."_model.out"); … … 92 92 # Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm 93 93 94 $model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 95 $test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 94 $model_text =~ s@^([^\\/]*(//)*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$4$6@mg; 95 $test_text =~ s@^([^\\/]*(//)*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$4$6@mg; 96 #$model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 97 #$test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg; 96 98 } 97 99 … … 116 118 for my $line (split /^/, $$win_text) { # split the string into newlines 117 119 118 if($line =~ m@^<assoc-file>(.*)(\s+)@s) { 119 $line = $1; # may be a short file name 120 # assoc-file and meta-file contain filepaths, ensure these are long windows file paths now (will later convert to linux slashes) 121 if($line =~ m@^<(assoc-file|meta-file)>(.*)(\s+)@s) { 122 $line = $2; # may be a short file name 120 123 # perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/ 121 124 122 $line = "< assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again125 $line = "<$1>".&Win32::GetLongPathName($line)."$3"; # make it a long file name and prefix assoc-file/meta-file tagname to it again 123 126 } 124 127 $tmp .= $line; … … 127 130 } 128 131 129 # slashes in windows text need to be turned into linux style slashes 130 $$win_text =~ s@\\@/@g; 132 133 # index gdb file 134 if($dbname =~ m/$strColName/) { 135 my $tmp = ""; # rebuild windows file's set of lines after processing them one by one 136 for my $line (split /^/, $$win_text) { # split the string into newlines 137 138 if($line =~ m@^<ex.File.Directory>(.*)@s) { # word-pdf collection contains double windows backslashes 139 (my $escaped_path = $1) =~ s@\\\\@\\@g; 140 $line = "<ex.File.Directory>$escaped_path"; 141 } 142 elsif($line =~ m@^<Title>(.*)@s) { 143 # print STDERR "***** TITLE: |$1|\n"; 144 145 # word-pdf collection: Title of ps files contain new lines at end when 146 # GreenstoneXMLPlugin::xml_end_tag() writes the Title back out after utf8 decode 147 # if($metadata_name eq "Title") { $metadata_value =~ s/[\n\r]*$//; } 148 149 (my $title = $1) =~ s@(\r|\n|\\n)*$@@; # get rid of trailing newlines/carriage returns 150 $line = "<Title>$title\n"; # add single newline 151 } 152 $tmp .= $line; 153 } 154 $$win_text = $tmp; 155 } 156 157 158 # slashes in windows metadata text need to be turned into linux style slashes 159 $$win_text =~ s@\\@/@g; #$$win_text =~ s@\\([^n|r|\|"])@/$1@g; # filepath something\rtf remains something\rtf 131 160 132 161 # cut down absolute paths to files to just collect/colname/.../file, same as before 133 $$lin_text =~ s@^([^\\/ /]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;134 $$win_text =~ s@^([^\\/ /]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;162 $$lin_text =~ s@^([^\\/]*(//)*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$4$6@mg; # $$lin_text =~ s@^([^\\\/]*(//)?).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$4$6@mg; 163 $$win_text =~ s@^([^\\/]*(//)*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$4$6@mg; 135 164 136 165 # for the windows text, need to further get rid of the driveletter after [ or <meta> … … 166 195 my ($dbtailname, $db_contents) = @_; # db filename without suffix 167 196 168 if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb169 return 0;170 }197 #if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb 198 # return 0; 199 #} 171 200 return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something 172 201 # for doc.xml: -
other-projects/nightly-tasks/diffcol/trunk/task.pl
r27725 r27743 417 417 for my $collection (readdir $collect_handle) { 418 418 next if ($collection eq "." || $collection eq ".."); 419 ##next if ($collection ne "Small-HTML"); ## TEMPORARY, FOR TESTING THIS SCRIPT419 # next if ($collection ne "Word-PDF-Basic"); ## TEMPORARY, FOR TESTING THIS SCRIPT 420 420 421 421 #escape the filename (in case of space)
Note:
See TracChangeset
for help on using the changeset viewer.