Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/WritePostScript.pl

    r24107 r34921  
    9595sub WritePSDirectory($$$$$)
    9696{
    97     my ($exifTool, $outfile, $dirName, $dataPt, $flags) = @_;
     97    my ($et, $outfile, $dirName, $dataPt, $flags) = @_;
    9898    my $success = 2;
    9999    my $len = $dataPt ? length($$dataPt) : 0;
     
    117117        pos($$dataPt) = 0;
    118118        unless ($$dataPt =~ /(.*)(<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d)/sg) {
    119             $exifTool->Warn('No XMP packet start');
     119            $et->Warn('No XMP packet start');
    120120            return WriteXMPDir($outfile, $flags, $$dataPt);
    121121        }
     
    124124        my $p1 = pos($$dataPt);
    125125        unless ($$dataPt =~ m{<\?xpacket end=.(w|r).\?>}sg) {
    126             $exifTool->Warn('No XMP packet end');
     126            $et->Warn('No XMP packet end');
    127127            return WriteXMPDir($outfile, $flags, $$dataPt);
    128128        }
     
    143143    }
    144144    my $tagTablePtr = Image::ExifTool::GetTagTable("Image::ExifTool::${dirName}::Main");
    145     my $val = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     145    my $val = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    146146    if (defined $val) {
    147147        $dataPt = \$val;    # use modified directory
     
    157157        # so instead we write a blank XMP record
    158158        $val = <<EMPTY_XMP;
    159 <?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>
     159<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
    160160<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image::ExifTool $Image::ExifTool::VERSION'>
    161161</x:xmpmeta>
    162162EMPTY_XMP
    163         $val .= ((' ' x 100) . "\n") x 24 unless $exifTool->Options('Compact');
     163        $val .= ((' ' x 100) . "\n") x 24 unless $$et{OPTIONS}{Compact}{NoPadding};
    164164        $val .= q{<?xpacket end='w'?>};
    165165        $dataPt = \$val;
     
    239239        Write($outfile, $endToken, $/) or $success = 0;
    240240    } else {
    241         $exifTool->Warn("Can't write PS directory $dirName");
     241        $et->Warn("Can't write PS directory $dirName");
    242242    }
    243243    undef $val;
     
    263263    }
    264264    my $line = "%%$tag: $val";
    265     # postscript line limit is 255 characters
     265    # postscript line limit is 255 characters (but it seems that
     266    # the limit may be 254 characters if the DOS CR/LF is used)
    266267    # --> split if necessary using continuation comment "%%+"
    267268    my $n;
    268     for ($n=255; length($line)>$n; $n+=255+length($/)) {
     269    for ($n=254; length($line)>$n; $n+=254+length($/)) {
    269270        substr($line, $n, 0) = "$/%%+";
    270271    }
     
    278279sub WriteNewTags($$$)
    279280{
    280     my ($exifTool, $outfile, $newTags) = @_;
     281    my ($et, $outfile, $newTags) = @_;
    281282    my $success = 1;
    282283    my $tag;
     
    288289    foreach $tag (sort keys %$newTags) {
    289290        my $tagInfo = $$newTags{$tag};
    290         my $nvHash = $exifTool->GetNewValueHash($tagInfo);
    291         next unless Image::ExifTool::IsCreating($nvHash);
    292         my $val = Image::ExifTool::GetNewValues($nvHash);
    293         $exifTool->VerboseValue("+ PostScript:$$tagInfo{Name}", $val);
     291        my $nvHash = $et->GetNewValueHash($tagInfo);
     292        next unless $$nvHash{IsCreating};
     293        my $val = $et->GetNewValue($nvHash);
     294        $et->VerboseValue("+ PostScript:$$tagInfo{Name}", $val);
    294295        Write($outfile, EncodeTag($tag, $val)) or $success = 0;
    295         ++$exifTool->{CHANGED};
     296        ++$$et{CHANGED};
    296297    }
    297298    # write XMP hint if necessary
     
    300301    %$newTags = ();     # all done with new tags
    301302    return $success;
    302 }
    303 
    304 #------------------------------------------------------------------------------
    305 # check to be sure we haven't read past end of PS data in DOS-style file
    306 # Inputs: 0) RAF ref, 1) pointer to end of PS, 2) data
    307 # - modifies data and sets RAF to EOF if end of PS is reached
    308 sub CheckPSEnd($$$)
    309 {
    310     my $pos = $_[0]->Tell();
    311     if ($pos >= $_[1]) {
    312         $_[0]->Seek(0, 2);   # seek to end of file so we can't read any more
    313         $_[2] = substr($_[2], 0, length($_[2]) - $pos + $_[1]) if $pos > $_[1];
    314     }
    315 }
    316 
    317 #------------------------------------------------------------------------------
    318 # Split into lines ending in any CR, LF or CR+LF combination
    319 # (this is annoying, and could be avoided if EPS files didn't mix linefeeds!)
    320 # Inputs: 0) data pointer, 1) reference to lines array
    321 # Notes: Updates data to contain next line and fills list with remaining lines
    322 sub SplitLine($$)
    323 {
    324     my ($dataPt, $lines) = @_;
    325     for (;;) {
    326         my $endl;
    327         # find the position of the first LF (\x0a)
    328         $endl = pos($$dataPt), pos($$dataPt) = 0 if $$dataPt =~ /\x0a/g;
    329         if ($$dataPt =~ /\x0d/g) { # find the first CR (\x0d)
    330             if (defined $endl) {
    331                 # (remember, CR+LF is a DOS newline...)
    332                 $endl = pos($$dataPt) if pos($$dataPt) < $endl - 1;
    333             } else {
    334                 $endl = pos($$dataPt);
    335             }
    336         } elsif (not defined $endl) {
    337             push @$lines, $$dataPt;
    338             last;
    339         }
    340         # split into separate lines
    341         if (length $$dataPt == $endl) {
    342             push @$lines, $$dataPt;
    343             last;
    344         } else {
    345             push @$lines, substr($$dataPt, 0, $endl);
    346             $$dataPt = substr($$dataPt, $endl);
    347         }
    348     }
    349     $$dataPt = shift @$lines;   # set $$dataPt to first line
    350303}
    351304
     
    357310sub WritePS($$)
    358311{
    359     my ($exifTool, $dirInfo) = @_;
    360     $exifTool or return 1;    # allow dummy access to autoload this package
     312    my ($et, $dirInfo) = @_;
     313    $et or return 1;    # allow dummy access to autoload this package
    361314    my $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::PostScript::Main');
    362315    my $raf = $$dirInfo{RAF};
    363316    my $outfile = $$dirInfo{OutFile};
    364     my $verbose = $exifTool->Options('Verbose');
    365     my $out = $exifTool->Options('TextOut');
     317    my $verbose = $et->Options('Verbose');
     318    my $out = $et->Options('TextOut');
    366319    my ($data, $buff, %flags, $err, $mode, $endToken);
    367     my ($dos, $psStart, $psEnd, $psNewStart, $xmpHint);
     320    my ($dos, $psStart, $psNewStart, $xmpHint, @lines);
    368321
    369322    $raf->Read($data, 4) == 4 or return 0;
     
    387340                $raf->Read($data, 4) == 4 and $data eq '%!PS')
    388341        {
    389             $exifTool->Error('Invalid PS header');
     342            $et->Error('Invalid PS header');
    390343            return 1;
    391344        }
    392         $psEnd = $psStart + Get32u(\$dos, 8);
     345        $$raf{PSEnd} = $psStart + Get32u(\$dos, 8);
    393346        my $base = Get32u(\$dos, 20);
    394347        Set16u(0xffff, \$dos, 28);  # ignore checksum
     
    400353                NoTiffEnd => 1, # no end-of-TIFF check
    401354            );
    402             $buff = $exifTool->WriteTIFF(\%dirInfo);
     355            $buff = $et->WriteTIFF(\%dirInfo);
    403356            SetByteOrder('II'); # (WriteTIFF may change this)
    404357            if ($buff) {
     
    408361                my $len = Get32u(\$dos, 24);
    409362                unless ($raf->Seek($base, 0) and $raf->Read($buff, $len) == $len) {
    410                     $exifTool->Error('Error reading embedded TIFF');
     363                    $et->Error('Error reading embedded TIFF');
    411364                    return 1;
    412365                }
    413                 $exifTool->Warn('Bad embedded TIFF');
     366                $et->Warn('Bad embedded TIFF');
    414367            }
    415368            Set32u(0, \$dos, 12);                   # zero metafile pointer
     
    421374            my $len = Get32u(\$dos, 16);
    422375            unless ($raf->Seek($base, 0) and $raf->Read($buff, $len) == $len) {
    423                 $exifTool->Error('Error reading metafile section');
     376                $et->Error('Error reading metafile section');
    424377                return 1;
    425378            }
     
    438391    local $/ = GetInputRecordSeparator($raf);
    439392    unless ($/ and $raf->ReadLine($buff)) {
    440         $exifTool->Error('Invalid PostScript data');
     393        $et->Error('Invalid PostScript data');
    441394        return 1;
    442395    }
    443396    $data .= $buff;
    444397    unless ($data =~ /^%!PS-Adobe-3\.(\d+)\b/ and $1 < 2) {
    445         if ($exifTool->Error("Document does not conform to DSC spec. Metadata may be unreadable by other apps", 1)) {
     398        if ($et->Error("Document does not conform to DSC spec. Metadata may be unreadable by other apps", 2)) {
    446399            return 1;
    447400        }
     
    452405
    453406    # get hash of new information keyed by tagID and directories to add/edit
    454     my $newTags = $exifTool->GetNewTagInfoHash($tagTablePtr);
     407    my $newTags = $et->GetNewTagInfoHash($tagTablePtr);
    455408
    456409    # figure out which directories we need to write (PostScript takes priority)
    457     $exifTool->InitWriteDirs(\%psMap, 'PostScript');
    458     my $addDirs = $exifTool->{ADD_DIRS};
    459     my $editDirs = $exifTool->{EDIT_DIRS};
     410    $et->InitWriteDirs(\%psMap, 'PostScript');
     411    my $addDirs = $$et{ADD_DIRS};
     412    my $editDirs = $$et{EDIT_DIRS};
    460413    my %doneDir;
    461414
    462415    # set XMP hint flag (1 for adding, 0 for deleting, undef for no change)
    463416    $xmpHint = 1 if $$addDirs{XMP};
    464     $xmpHint = 0 if $$exifTool{DEL_GROUP}{XMP};
     417    $xmpHint = 0 if $$et{DEL_GROUP}{XMP};
    465418    $$newTags{XMP_HINT} = $xmpHint if $xmpHint;  # add special tag to newTags list
    466419
    467     my (@lines, $changedNL);
    468     my $altnl = ($/ eq "\x0d") ? "\x0a" : "\x0d";
    469 
    470420    for (;;) {
    471         if (@lines) {
    472             $data = shift @lines;
    473         } else {
    474             $raf->ReadLine($data) or last;
    475             $dos and CheckPSEnd($raf, $psEnd, $data);
    476             # split line if it contains other newline sequences
    477             if ($data =~ /$altnl/) {
    478                 if (length($data) > 500000 and IsPC()) {
    479                     # patch for Windows memory problem
    480                     unless ($changedNL) {
    481                         $changedNL = 1;
    482                         my $t = $/;
    483                         $/ = $altnl;
    484                         $altnl = $t;
    485                         $raf->Seek(-length($data), 1);
    486                         next;
    487                     }
    488                 } else {
    489                     # split into separate lines
    490                     SplitLine(\$data, \@lines);
    491                 }
    492             }
    493         }
    494         undef $changedNL;
     421        @lines or GetNextLine($raf, \@lines) or last;
     422        $data = shift @lines;
    495423        if ($endToken) {
    496424            # look for end token
     
    499427                # found end: process this information
    500428                if ($mode) {
    501                     $doneDir{$mode} and $exifTool->Error("Multiple $mode directories", 1);
     429                    $doneDir{$mode} and $et->Error("Multiple $mode directories", 1);
    502430                    $doneDir{$mode} = 1;
    503                     WritePSDirectory($exifTool, $outfile, $mode, \$buff, \%flags) or $err = 1;
     431                    WritePSDirectory($et, $outfile, $mode, \$buff, \%flags) or $err = 1;
    504432                    # write end token if we wrote the begin token
    505433                    Write($outfile, $data) or $err = 1 if $flags{WROTE_BEGIN};
     
    531459        } elsif ($data =~ m{^(%{1,2})(Begin)(?!Object:)(.*?)[:\x0d\x0a]}i) {
    532460            # comments section is over... write any new tags now
    533             WriteNewTags($exifTool, $outfile, $newTags) or $err = 1 if %$newTags;
     461            WriteNewTags($et, $outfile, $newTags) or $err = 1 if %$newTags;
    534462            undef $xmpHint;
    535463            # the beginning of a data block (can only write XMP and Photoshop)
     
    558486            #   (because Illustrator doesn't care if the Creator is changed)
    559487            if ($tag eq 'Creator' and $val =~ /^Adobe Illustrator/) {
    560                 # disable writing XMP to PS-format Adobe Illustrator files and
    561                 # older Illustrator EPS files becaues it confuses Illustrator
    562                 # (Illustrator 8 and older write PS-Adobe-3.0, newer write PS-Adobe-3.1)
    563                 if ($$editDirs{XMP} and $psRev == 0) {
    564                     if ($flags{EPS}) {
    565                         $exifTool->Warn("Can't write XMP to Illustrator 8 or older EPS files");
    566                     } else {
    567                         $exifTool->Warn("Can't write XMP to PS-format AI files");
    568                     }
     488                # disable writing XMP to PostScript-format Adobe Illustrator files
     489                # because it confuses Illustrator
     490                if ($$editDirs{XMP}) {
     491                    $et->Warn("Can't write XMP to PostScript-format Illustrator files");
    569492                    # pretend like we wrote it already so we won't try to add it later
    570493                    $doneDir{XMP} = 1;
     
    574497                # --> find a better way to do this!
    575498                if ($$newTags{$tag}) {
    576                     $exifTool->Warn("Can't change Postscript:Creator of Illustrator files");
     499                    $et->Warn("Can't change Postscript:Creator of Illustrator files");
    577500                    delete $$newTags{$tag};
    578501                }
     
    584507                # decode comment string (reading continuation lines if necessary)
    585508                $val = DecodeComment($val, $raf, \@lines, \$data);
    586                 $val = join $exifTool->Options('ListSep'), @$val if ref $val eq 'ARRAY';
    587                 my $nvHash = $exifTool->GetNewValueHash($tagInfo);
    588                 if (Image::ExifTool::IsOverwriting($nvHash, $val)) {
    589                     $exifTool->VerboseValue("- PostScript:$$tagInfo{Name}", $val);
    590                     $val = Image::ExifTool::GetNewValues($nvHash);
    591                     ++$exifTool->{CHANGED};
     509                $val = join $et->Options('ListSep'), @$val if ref $val eq 'ARRAY';
     510                my $nvHash = $et->GetNewValueHash($tagInfo);
     511                if ($et->IsOverwriting($nvHash, $val)) {
     512                    $et->VerboseValue("- PostScript:$$tagInfo{Name}", $val);
     513                    $val = $et->GetNewValue($nvHash);
     514                    ++$$et{CHANGED};
    592515                    next unless defined $val;   # next if tag is being deleted
    593                     $exifTool->VerboseValue("+ PostScript:$$tagInfo{Name}", $val);
     516                    $et->VerboseValue("+ PostScript:$$tagInfo{Name}", $val);
    594517                    $data = EncodeTag($tag, $val);
    595518                }
     
    612535            {
    613536                # write new tags at end of comments section
    614                 WriteNewTags($exifTool, $outfile, $newTags) or $err = 1;
     537                WriteNewTags($et, $outfile, $newTags) or $err = 1;
    615538                undef $xmpHint;
    616539            }
     
    629552                    if ($plateFile) {
    630553                        # PlateFile comments may contain offsets so we can't edit these files!
    631                         $exifTool->Warn("Can only edit PostScript information DCS Plate files");
     554                        $et->Warn("Can only edit PostScript information DCS Plate files");
    632555                        last;
    633556                    }
    634557                    next unless $$addDirs{$dir} or $dir eq 'XMP';
    635558                    $flags{WROTE_BEGIN} = 0;
    636                     WritePSDirectory($exifTool, $outfile, $dir, undef, \%flags) or $err = 1;
     559                    WritePSDirectory($et, $outfile, $dir, undef, \%flags) or $err = 1;
    637560                    $doneDir{$dir} = 1;
    638561                }
     
    646569                        } else {
    647570                            $raf->ReadLine($data) or undef($data), last;
    648                             $dos and CheckPSEnd($raf, $psEnd, $data);
     571                            $dos and CheckPSEnd($raf, \$data);
    649572                            if ($data =~ /[\x0d\x0a]%%EOF\b/g) {
    650573                                # split data before "%%EOF"
     
    664587                    Write($outfile, @lines) or $err = 1 if @lines;
    665588                    while ($raf->Read($data, 65536)) {
    666                         $dos and CheckPSEnd($raf, $psEnd, $data);
     589                        $dos and CheckPSEnd($raf, \$data);
    667590                        Write($outfile, $data) or $err = 1;
    668591                    }
     
    684607                    seek($outfile, $pos, 0))
    685608            {
    686                 $exifTool->Error("Can't write DOS-style PS files in non-seekable stream");
     609                $et->Error("Can't write DOS-style PS files in non-seekable stream");
    687610                $err = 1;
    688611            }
     
    696619        foreach $dir (qw{Photoshop ICC_Profile XMP}) {
    697620            push @notDone, $dir if $$editDirs{$dir} and not $doneDir{$dir} and
    698                                    not $$exifTool{DEL_GROUP}{$dir};
    699         }
    700         @notDone and $exifTool->Warn("Couldn't write ".join('/',@notDone).' information');
    701     }
    702     $endToken and $exifTool->Error("File missing $endToken");
     621                                   not $$et{DEL_GROUP}{$dir};
     622        }
     623        @notDone and $et->Warn("Couldn't write ".join('/',@notDone).' information');
     624    }
     625    $endToken and $et->Error("File missing $endToken");
    703626    return $err ? -1 : 1;
    704627}
     
    764687=head1 AUTHOR
    765688
    766 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     689Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    767690
    768691This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.