Ignore:
Timestamp:
2023-09-06T19:03:44+12:00 (9 months ago)
Author:
anupama
Message:

Enhanced-PDF model vs test collection on windows: On windows, xpdf-tools seems to insert more newlines (and with double carriage returns at times) than on linux. So a little more cleaning up before diffing and these minor differences are resolved also.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/nightly-tasks/diffcol/trunk/diffcol/diffcol.pl

    r38024 r38048  
    694694           
    695695            # remove all carriage returns \r - introduced into doc.xml by multiread after pdf converted to html
    696             $$win_contents =~ s@[\r]@@g;           
     696            $$win_contents =~ s@[\r]@@g;
    697697       
    698698            # make all single windows slashes into single unix slashes
     
    707707            # $$win_contents =~ s@\r@\n@mg;
    708708           
    709             if($strModel =~ m/doc\.xml$/) { # processing particular to doc.xml
     709            if($strModel =~ m/doc\.xml$/) { # processing that's particular to doc.xml
    710710                # remove solitary, stray carriage returns \r in the linux doc.xml, as occurs in the tudor collection owing to the source material
    711711                # containing solitary carriage returns instead of linefeed
    712712                $$lin_contents =~ s@[\r]@@g; #$$lin_contents =~ s@[\r][^\n]@@g;
    713713               
     714               
     715                # replace all multiple newlines with single one (for Enhanced-PDF, xpdf-tools introduces more newlines on Windows than linux)
     716                $$win_contents =~ s@(\n)+@\n@g;
     717                $$lin_contents =~ s@(\n)+@\n@g;
    714718               
    715719                # make all single back slash in the linux file into / slash, if when \ was used as a linux escape char in a path
     
    733737        }
    734738       
    735         # processing particular to doc.xml 
     739        # processing that's particular to doc.xml   
    736740        if($strModel =~ m/doc\.xml$/) {
    737741            # tmp dirs have subdirs with random numbers in name, remove randomly named subdir portion of path
     
    784788            &flatdbdiff::print_string_to_file($model_contents, $savepath."model_docmets.xml");
    785789            &flatdbdiff::print_string_to_file($test_contents, $savepath."test_docmets.xml");
    786 #           if($strModel =~ m/(HASH019c.dir|HASH2bdf.dir)/) { # list the HASH dirs for which you want the doc.xml file generated, to inspect specific doc.xml files
    787 #           &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
    788 #           &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
     790#           if($strModel =~ m/doc\.xml$/ && $strModel =~ m/archives/
     791#               && $strModel =~ m/(HASH019c.dir|HASH1a9c.dir)/) # list the HASH dirs for which you want the (archives) doc.xml file generated, to inspect specific doc.xml files
     792#           {
     793#               &flatdbdiff::print_string_to_file($model_contents, $savepath."$1_model_doc.xml");
     794#               &flatdbdiff::print_string_to_file($test_contents, $savepath."$1_test_doc.xml");
    789795#           }
    790796        }       
Note: See TracChangeset for help on using the changeset viewer.