Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/WritePNG.pl

    r24107 r34921  
    1414#------------------------------------------------------------------------------
    1515# Calculate CRC or update running CRC (ref 1)
    16 # Inputs: 0) data reference, 1) running crc to update (undef intially)
     16# Inputs: 0) data reference, 1) running crc to update (undef initially)
    1717#         2) data position (undef for 0), 3) data length (undef for all data),
    1818# Returns: updated CRC
     
    6161
    6262#------------------------------------------------------------------------------
    63 # Write profile to tEXt or zTXt chunk (zTXt if Zlib is available)
     63# Write profile chunk (possibly compressed if Zlib is available)
    6464# Inputs: 0) outfile, 1) Raw profile type, 2) data ref
    6565#         3) profile header type (undef if not a text profile)
     
    6969    my ($outfile, $rawType, $dataPt, $profile) = @_;
    7070    my ($buff, $prefix, $chunk, $deflate);
    71     if (eval 'require Compress::Zlib') {
     71    if ($rawType ne $stdCase{exif} and eval { require Compress::Zlib }) {
    7272        $deflate = Compress::Zlib::deflateInit();
    7373    }
    7474    if (not defined $profile) {
    7575        # write ICC profile as compressed iCCP chunk if possible
    76         return 0 unless $deflate;
    77         $buff = $deflate->deflate($$dataPt);
    78         return 0 unless defined $buff;
    79         $buff .= $deflate->flush();
    80         my %rawTypeChunk = ( icm => 'iCCP' );
    81         $chunk = $rawTypeChunk{$rawType} or return 0;
    82         $prefix = "$rawType\0\0";
    83         $dataPt = \$buff;
     76        if ($rawType eq 'icm') {
     77            return 0 unless $deflate;
     78            $chunk = 'iCCP';
     79            $prefix = "$rawType\0\0";
     80        } else {
     81            $chunk = $rawType;
     82            if ($rawType eq $stdCase{zxif}) {
     83                $prefix = "\0" . pack('N', length $$dataPt); # (proposed compressed EXIF)
     84            } else {
     85                $prefix = '';   # standard EXIF
     86            }
     87        }
     88        if ($deflate) {
     89            $buff = $deflate->deflate($$dataPt);
     90            return 0 unless defined $buff;
     91            $buff .= $deflate->flush();
     92            $dataPt = \$buff;
     93        }
    8494    } else {
    8595        # write as ASCII-hex encoded profile in tEXt or zTXt chunk
     
    107117
    108118#------------------------------------------------------------------------------
    109 # Add iCCP to the PNG image if necessary (must come before PLTE and IDAT)
     119# Add iCCP chunk to the PNG image if necessary (must come before PLTE and IDAT)
    110120# Inputs: 0) ExifTool object ref, 1) output file or scalar ref
    111121# Returns: true on success
    112122sub Add_iCCP($$)
    113123{
    114     my ($exifTool, $outfile) = @_;
    115     if ($exifTool->{ADD_DIRS}->{ICC_Profile}) {
     124    my ($et, $outfile) = @_;
     125    if ($$et{ADD_DIRS}{ICC_Profile}) {
    116126        # write new ICC data
    117127        my $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::ICC_Profile::Main');
    118128        my %dirInfo = ( Parent => 'PNG', DirName => 'ICC_Profile' );
    119         my $buff = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     129        my $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    120130        if (defined $buff and length $buff and WriteProfile($outfile, 'icm', \$buff)) {
    121             $exifTool->VPrint(0, "Created ICC profile\n");
    122             delete $exifTool->{ADD_DIRS}->{ICC_Profile}; # don't add it again
     131            $et->VPrint(0, "Created ICC profile\n");
     132            delete $$et{ADD_DIRS}{ICC_Profile}; # don't add it again
    123133        }
    124134    }
    125135    return 1;
     136}
     137
     138#------------------------------------------------------------------------------
     139# This routine is called after we edit an existing directory
     140# Inputs: 0) ExifTool ref, 1) dir name, 2) output data ref
     141#         3) flag set if location is non-standard (to update, but not create from scratch)
     142# - on return, $$outBuff is set to '' if the directory is to be deleted
     143sub DoneDir($$$;$)
     144{
     145    my ($et, $dir, $outBuff, $nonStandard) = @_;
     146    my $saveDir = $dir;
     147    $dir = 'EXIF' if $dir eq 'IFD0';
     148    # don't add this directory again unless this is in a non-standard location
     149    if (not $nonStandard) {
     150        delete $$et{ADD_DIRS}{$dir};
     151        delete $$et{ADD_DIRS}{IFD0} if $dir eq 'EXIF';
     152    } elsif ($$et{DEL_GROUP}{$dir} or $$et{DEL_GROUP}{$saveDir}) {
     153        $et->VPrint(0,"  Deleting non-standard $dir\n");
     154        $$outBuff = '';
     155    }
    126156}
    127157
     
    133163sub BuildTextChunk($$$$$)
    134164{
    135     my ($exifTool, $tag, $tagInfo, $val, $lang) = @_;
     165    my ($et, $tag, $tagInfo, $val, $lang) = @_;
    136166    my ($xtra, $compVal, $iTXt, $comp);
    137167    if ($$tagInfo{SubDirectory}) {
     
    144174    } else {
    145175        # compress if specified
    146         $comp = 1 if $exifTool->Options('Compress');
     176        $comp = 1 if $et->Options('Compress');
    147177        if ($lang) {
    148178            $iTXt = 1;      # write as iTXt if it has a language code
    149179            $tag =~ s/-$lang$//;    # remove language code from tagID
    150         } elsif ($$exifTool{OPTIONS}{Charset} ne 'Latin' and $val =~  /[\x80-\xff]/) {
     180        } elsif ($$et{OPTIONS}{Charset} ne 'Latin' and $val =~  /[\x80-\xff]/) {
    151181            $iTXt = 1;      # write as iTXt if it contains non-Latin special characters
    152182        }
     
    154184    if ($comp) {
    155185        my $warn;
    156         if (eval 'require Compress::Zlib') {
     186        if (eval { require Compress::Zlib }) {
    157187            my $deflate = Compress::Zlib::deflateInit();
    158188            $compVal = $deflate->deflate($val) if $deflate;
     
    172202        # warn if any user-specified compression fails
    173203        if ($warn and $comp == 1) {
    174             $exifTool->Warn("PNG:$$tagInfo{Name} not compressed ($warn)", 1);
     204            $et->Warn("PNG:$$tagInfo{Name} not compressed ($warn)", 1);
    175205        }
    176206    }
    177207    # decide whether to write as iTXt, zTXt or tEXt
    178208    if ($iTXt) {
    179         $$exifTool{TextChunkType} = 'iTXt';
     209        $$et{TextChunkType} = 'iTXt';
    180210        $xtra = (defined $compVal ? "\x01\0" : "\0\0") . ($lang || '') . "\0\0";
    181211        # iTXt is encoded as UTF-8 (but note that XMP is already UTF-8)
    182         $val = $exifTool->Encode($val, 'UTF8') if $iTXt == 1;
     212        $val = $et->Encode($val, 'UTF8') if $iTXt == 1;
    183213    } elsif (defined $compVal) {
    184         $$exifTool{TextChunkType} = 'zTXt';
     214        $$et{TextChunkType} = 'zTXt';
    185215        $xtra = "\0";
    186216    } else {
    187         $$exifTool{TextChunkType} = 'tEXt';
     217        $$et{TextChunkType} = 'tEXt';
    188218        $xtra = '';
    189219    }
     
    194224# Add any outstanding new chunks to the PNG image
    195225# Inputs: 0) ExifTool object ref, 1) output file or scalar ref
     226#         2-N) dirs to add (empty to add all except EXIF 'IFD0', including PNG tags)
    196227# Returns: true on success
    197 sub AddChunks($$)
    198 {
    199     my ($exifTool, $outfile) = @_;
     228sub AddChunks($$;@)
     229{
     230    my ($et, $outfile, @add) = @_;
     231    my ($addTags, $tag, $dir, $err, $tagTablePtr, $specified);
     232
     233    if (@add) {
     234        $addTags = { }; # don't add any PNG tags
     235        $specified = 1;
     236    } else {
     237        $addTags = $$et{ADD_PNG};    # add all PNG tags...
     238        delete $$et{ADD_PNG};        # ...once
     239        # add all directories
     240        @add = sort keys %{$$et{ADD_DIRS}};
     241    }
    200242    # write any outstanding PNG tags
    201     my $addTags = $exifTool->{ADD_PNG};
    202     delete $exifTool->{ADD_PNG};
    203     my ($tag, $dir, $err, $tagTablePtr);
    204 
    205243    foreach $tag (sort keys %$addTags) {
    206244        my $tagInfo = $$addTags{$tag};
    207         my $nvHash = $exifTool->GetNewValueHash($tagInfo);
    208         # (always create native PNG information, so don't check IsCreating())
    209         next unless Image::ExifTool::IsOverwriting($nvHash) > 0;
    210         my $val = Image::ExifTool::GetNewValues($nvHash);
     245        my $nvHash = $et->GetNewValueHash($tagInfo);
     246        # (native PNG information is always preferred, so don't check IsCreating)
     247        next unless $et->IsOverwriting($nvHash);
     248        my $val = $et->GetNewValue($nvHash);
    211249        if (defined $val) {
     250            next if $$nvHash{EditOnly};
    212251            my $data;
    213252            if ($$tagInfo{Table} eq \%Image::ExifTool::PNG::TextualData) {
    214                 $data = BuildTextChunk($exifTool, $tag, $tagInfo, $val, $$tagInfo{LangCode});
    215                 $data = $$exifTool{TextChunkType} . $data;
    216                 delete $$exifTool{TextChunkType};
     253                $data = BuildTextChunk($et, $tag, $tagInfo, $val, $$tagInfo{LangCode});
     254                $data = $$et{TextChunkType} . $data;
     255                delete $$et{TextChunkType};
    217256            } else {
    218257                $data = "$tag$val";
     
    221260            my $cbuf = pack('N', CalculateCRC(\$data, undef));
    222261            Write($outfile, $hdr, $data, $cbuf) or $err = 1;
    223             $exifTool->VerboseValue("+ PNG:$$tagInfo{Name}", $val);
    224             ++$exifTool->{CHANGED};
    225         }
    226     }
    227     $addTags = { };     # prevent from adding tags again
     262            $et->VerboseValue("+ PNG:$$tagInfo{Name}", $val);
     263            ++$$et{CHANGED};
     264        }
     265    }
    228266    # create any necessary directories
    229     foreach $dir (sort keys %{$exifTool->{ADD_DIRS}}) {
     267    foreach $dir (@add) {
     268        next unless $$et{ADD_DIRS}{$dir}; # make sure we want to add it first
    230269        my $buff;
    231270        my %dirInfo = (
     
    234273        );
    235274        if ($dir eq 'IFD0') {
    236             $exifTool->VPrint(0, "Creating EXIF profile:\n");
    237             $exifTool->{TIFF_TYPE} = 'APP1';
     275            next unless $specified;     # wait until specifically asked to write EXIF 'IFD0'
     276            my $chunk = $stdCase{exif};
     277            # (zxIf was not adopted)
     278            #if ($et->Options('Compress')) {
     279            #    if (eval { require Compress::Zlib }) {
     280            #        $chunk = $stdCase{zxif};
     281            #    } else {
     282            #        $et->Warn("Creating uncompressed $stdCase{exif} chunk (Compress::Zlib not available)");
     283            #    }
     284            #}
     285            $et->VPrint(0, "Creating $chunk chunk:\n");
     286            $$et{TIFF_TYPE} = 'APP1';
    238287            $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::Exif::Main');
    239             $buff = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr, \&Image::ExifTool::WriteTIFF);
     288            $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr, \&Image::ExifTool::WriteTIFF);
    240289            if (defined $buff and length $buff) {
    241                 $buff = $Image::ExifTool::exifAPP1hdr . $buff;
    242                 WriteProfile($outfile, 'APP1', \$buff, 'generic') or $err = 1;
     290                WriteProfile($outfile, $chunk, \$buff) or $err = 1;
    243291            }
    244292        } elsif ($dir eq 'XMP') {
    245             $exifTool->VPrint(0, "Creating XMP iTXt chunk:\n");
     293            $et->VPrint(0, "Creating XMP iTXt chunk:\n");
    246294            $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::XMP::Main');
    247295            $dirInfo{ReadOnly} = 1;
    248             $buff = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     296            $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    249297            if (defined $buff and length $buff and
    250298                # the packet is read-only (because of CRC)
     
    261309            }
    262310        } elsif ($dir eq 'IPTC') {
    263             $exifTool->VPrint(0, "Creating IPTC profile:\n");
    264             # write new IPTC data
     311            $et->Warn('Creating non-standard IPTC in PNG', 1);
     312            $et->VPrint(0, "Creating IPTC profile:\n");
     313            # write new IPTC data (stored in a Photoshop directory)
     314            $dirInfo{DirName} = 'Photoshop';
    265315            $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::Photoshop::Main');
    266             $buff = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     316            $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    267317            if (defined $buff and length $buff) {
    268318                WriteProfile($outfile, 'iptc', \$buff, 'IPTC') or $err = 1;
    269319            }
    270320        } elsif ($dir eq 'ICC_Profile') {
    271             $exifTool->VPrint(0, "Creating ICC profile:\n");
     321            $et->VPrint(0, "Creating ICC profile:\n");
    272322            # write new ICC data (only done if we couldn't create iCCP chunk)
    273323            $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::ICC_Profile::Main');
    274             $buff = $exifTool->WriteDirectory(\%dirInfo, $tagTablePtr);
     324            $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
    275325            if (defined $buff and length $buff) {
    276326                WriteProfile($outfile, 'icm', \$buff, 'ICC') or $err = 1;
    277                 $exifTool->Warn('Wrote ICC as a raw profile (no Compress::Zlib)');
    278             }
    279         }
    280     }
    281     $exifTool->{ADD_DIRS} = { };    # prevent from adding dirs again
     327                $et->Warn('Wrote ICC as a raw profile (no Compress::Zlib)');
     328            }
     329        } elsif ($dir eq 'PNG-pHYs') {
     330            $et->VPrint(0, "Creating pHYs chunk (default 2834 pixels per meter):\n");
     331            $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::PNG::PhysicalPixel');
     332            my $blank = "\0\0\x0b\x12\0\0\x0b\x12\x01"; # 2834 pixels per meter (72 dpi)
     333            $dirInfo{DataPt} = \$blank;
     334            $buff = $et->WriteDirectory(\%dirInfo, $tagTablePtr);
     335            if (defined $buff and length $buff) {
     336                $buff = 'pHYs' . $buff; # CRC includes chunk name
     337                my $hdr = pack('N', length($buff) - 4);
     338                my $cbuf = pack('N', CalculateCRC(\$buff, undef));
     339                Write($outfile, $hdr, $buff, $cbuf) or $err = 1;
     340            }
     341        } else {
     342            next;
     343        }
     344        delete $$et{ADD_DIRS}{$dir};  # don't add again
     345    }
    282346    return not $err;
    283347}
     
    305369
    306370Existing text tags are always rewritten in their original form (compressed
    307 zTXt, uncompressed tEXt or internation iTXt), so pre-existing compressed
     371zTXt, uncompressed tEXt or international iTXt), so pre-existing compressed
    308372information can only be modified if Compress::Zlib is available.
    309373
     
    316380=head1 AUTHOR
    317381
    318 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     382Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    319383
    320384This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.