Changeset 28019 for other-projects/nightly-tasks/diffcol/trunk
- Timestamp:
- 2013-08-09T22:46:32+12:00 (11 years ago)
- Location:
- other-projects/nightly-tasks/diffcol/trunk/diffcol
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl
r28008 r28019 676 676 # Doing so is okay, since we're not modifying the doc.xml in the model or test collections, just normalising them in-memory for comparison 677 677 $$lin_contents =~ s@([^\\])\\([^\\])@$1\/$2@g; 678 679 # Advanced Beatles collection, 680 # linux version contains: IMG SRC=_httpextlink_&rl=1&href=http:///\\"http://www.boskowan.com/ (extra / slash) 681 # while windows contains: IMG SRC=_httpextlink_&rl=1&href=http://\\"http://www.boskowan.com/ 682 # Normalising to windows version for doing a diff 683 $$lin_contents =~ s@href=http:///@href=http://@g; 678 684 } 679 685 -
other-projects/nightly-tasks/diffcol/trunk/diffcol/gdbdiff.pm
r28005 r28019 143 143 # E.g. On windows, the Word-PDF collection(s) contains double backslashes in the ex.File.Directory field 144 144 # the MARC-Exploded collection contains double backslashes in the null_file entry field of the .gdb file 145 if($line =~ m@^<(ex.File.Directory|null_file)>(.*)@s) { 145 if($line =~ m@^<(ex.File.Directory|null_file)>(.*)@s) { 146 146 my ($fieldname, $escaped_path) = ($1, $2); 147 $escaped_path =~ s@\\\\@ \\@g; #(my $escaped_path = $2) =~ s@\\\\@\\@g;147 $escaped_path =~ s@\\\\@/@g; #(my $escaped_path = $2) =~ s@\\\\@\\@g; 148 148 $line = "<$fieldname>$escaped_path"; 149 149 } … … 160 160 $tmp .= $line; 161 161 } 162 $$win_text = $tmp; 163 } 164 165 166 # slashes in windows metadata text need to be turned into linux style slashes 167 $$win_text =~ s@\\@/@g; #$$win_text =~ s@\\([^n|r|\|"])@/$1@g; # filepath something\rtf remains something\rtf 162 $$win_text = $tmp; 163 164 # slashes in windows metadata text need to be turned into linux style slashes. 165 # index\col.gdb uses double backslashes, and single for \n,\t 166 #$$win_text =~ s@\\\\@/@g; 167 } 168 else { # archiveinf gdb file 169 170 # slashes in windows metadata text need to be turned into linux style slashes. 171 # In the two archivesinf gdb files, filepaths may use single backslashes 172 $$win_text =~ s@\\@/@g; #$$win_text =~ s@\\([^n|r|\|"])@/$1@g; # filepath something\rtf remains something\rtf 173 } 168 174 169 175 # cut down absolute paths to files to just collect/colname/.../file, same as before … … 244 250 my ($dbtailname, $db_contents) = @_; # db filename without suffix 245 251 246 #if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb 247 # return 0; 248 #} 249 return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something 250 # for doc.xml: 251 # <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata> 252 # return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something 253 254 if($dbtailname =~ m/^archiveinf-doc/) { 255 return ($db_contents =~ m@<src-file>[a-zA-Z]:\\@) ? 1 : 0; # <src-file>C:\path 256 } 257 elsif($dbtailname =~ m/^archiveinf-src/) { # <src-file>C:\path 258 return ($db_contents =~ m@\[[a-zA-Z]:\\@) ? 1 : 0; # [C:\path] 259 } 260 else { # index/col.gdb file 261 if ($db_contents =~ m@<URL>http://[a-zA-Z]:/@) { # <URL>http://C:/path 262 return 1; 263 } 264 elsif ($db_contents =~ m@^(<URL>http://[a-zA-Z]:/)|(<null_file>[^\\]*\\)@m) { # <URL>http://C:/path OR <null_file>CMSwp-all.00000001\\00000035.nul 265 return 1; 266 } 267 return 0; 268 } 252 269 } 253 270
Note:
See TracChangeset
for help on using the changeset viewer.