Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/WriteIPTC.pl

    r24107 r34921  
    8686    my $code = $iptcCharsetInv{uc($val)};
    8787    unless ($code) {
    88         if (($code = $val) =~ s/ESC /\x1b/g) {  # translate ESC chars
     88        if (($code = $val) =~ s/ESC */\x1b/ig) {  # translate ESC chars
    8989            $code =~ s/, \x1b/\x1b/g;   # remove comma separators
    9090            $code =~ tr/ //d;           # remove spaces
     
    102102sub CheckIPTC($$$)
    103103{
    104     my ($exifTool, $tagInfo, $valPtr) = @_;
    105     my $format = $$tagInfo{Format} || $tagInfo->{Table}->{FORMAT} || '';
     104    my ($et, $tagInfo, $valPtr) = @_;
     105    my $format = $$tagInfo{Format} || $$tagInfo{Table}{FORMAT} || '';
    106106    if ($format =~ /^int(\d+)/) {
    107107        my $bytes = int(($1 || 0) / 8);
    108         if ($bytes ne 1 and $bytes ne 2 and $bytes ne 4) {
     108        if ($bytes != 1 and $bytes != 2 and $bytes != 4) {
    109109            return "Can't write $bytes-byte integer";
    110110        }
     
    128128            }
    129129        }
    130         if (defined $minlen) {
     130        if (defined $minlen and $fmt ne 'string') { # (must truncate strings later, after recoding)
    131131            $maxlen or $maxlen = $minlen;
    132             return "String too short (minlen is $minlen)" if $len < $minlen;
    133             if ($len > $maxlen and not $exifTool->Options('IgnoreMinorErrors')) {
    134                 $$exifTool{CHECK_WARN} = "[minor] IPTC:$$tagInfo{Name} exceeds length limit (truncated)";
     132            if ($len < $minlen) {
     133                unless ($$et{OPTIONS}{IgnoreMinorErrors}) {
     134                    return "[Minor] String too short (minlen is $minlen)";
     135                }
     136                $$et{CHECK_WARN} = "String too short for IPTC:$$tagInfo{Name} (written anyway)";
     137            } elsif ($len > $maxlen and not $$et{OPTIONS}{IgnoreMinorErrors}) {
     138                $$et{CHECK_WARN} = "[Minor] IPTC:$$tagInfo{Name} exceeds length limit (truncated)";
    135139                $$valPtr = substr($$valPtr, 0, $maxlen);
    136140            }
     
    150154sub FormatIPTC($$$$$;$)
    151155{
    152     my ($exifTool, $tagInfo, $valPtr, $xlatPtr, $rec, $read) = @_;
     156    my ($et, $tagInfo, $valPtr, $xlatPtr, $rec, $read) = @_;
    153157    my $format = $$tagInfo{Format} || $$tagInfo{Table}{FORMAT};
    154158    return unless $format;
     
    177181        if ($rec == 1) {
    178182            if ($$tagInfo{Name} eq 'CodedCharacterSet') {
    179                 $$xlatPtr = HandleCodedCharset($exifTool, $$valPtr);
     183                $$xlatPtr = HandleCodedCharset($et, $$valPtr);
    180184            }
    181185        } elsif ($$xlatPtr and $rec < 7 and $$valPtr =~ /[\x80-\xff]/) {
    182             TranslateCodedString($exifTool, $valPtr, $xlatPtr, $read);
     186            TranslateCodedString($et, $valPtr, $xlatPtr, $read);
     187        }
     188        # must check length now (after any string recoding)
     189        if (not $read and $format =~ /^string\[(\d+),?(\d*)\]$/) {
     190            my ($minlen, $maxlen) = ($1, $2);
     191            my $len = length $$valPtr;
     192            $maxlen or $maxlen = $minlen;
     193            if ($len < $minlen) {
     194                if ($et->Warn("String too short for IPTC:$$tagInfo{Name} (padded)", 2)) {
     195                    $$valPtr .= ' ' x ($minlen - $len);
     196                }
     197            } elsif ($len > $maxlen) {
     198                if ($et->Warn("IPTC:$$tagInfo{Name} exceeds length limit (truncated)", 2)) {
     199                    $$valPtr = substr($$valPtr, 0, $maxlen);
     200                    # make sure UTF-8 is still valid
     201                    if (($$xlatPtr || $et->Options('Charset')) eq 'UTF8') {
     202                        require Image::ExifTool::XMP;
     203                        Image::ExifTool::XMP::FixUTF8($valPtr,'.');
     204                    }
     205                }
     206            }
    183207        }
    184208    }
     
    192216{
    193217    my $val = shift;
    194     unless ($val =~ s/.*(\d{4}):?(\d{2}):?(\d{2}).*/$1$2$3/s) {
     218    unless ($val =~ s{^.*(\d{4})[-:/.]?(\d{2})[-:/.]?(\d{2}).*}{$1$2$3}s) {
    195219        warn "Invalid date format (use YYYY:mm:dd)\n";
    196220        undef $val;
     
    214238            $tz = '+0000';  # UTC
    215239        } else {
    216             # use local system timezone by default 
     240            # use local system timezone by default
    217241            my (@tm, $time);
    218             if ($date and $date =~ /^(\d{4}):(\d{2}):(\d{2})\s*$/ and eval 'require Time::Local') {
     242            if ($date and $date =~ /^(\d{4}):(\d{2}):(\d{2})\s*$/ and eval { require Time::Local }) {
    219243                # we were given a date too, so determine the local timezone
    220244                # offset at the specified date/time
    221                 my @d = ($3,$2-1,$1-1900);
     245                my @d = ($3,$2-1,$1);
    222246                $val =~ /(\d{2})(\d{2})(\d{2})/;
    223247                @tm = ($3,$2,$1,@d);
     
    238262        undef $val;     # time format error
    239263    }
     264    return $val;
     265}
     266
     267#------------------------------------------------------------------------------
     268# Inverse print conversion for IPTC date or time value
     269# Inputs: 0) ExifTool ref, 1) IPTC date or 'now'
     270# Returns: IPTC date
     271sub InverseDateOrTime($$)
     272{
     273    my ($et, $val) = @_;
     274    return $et->TimeNow() if lc($val) eq 'now';
    240275    return $val;
    241276}
     
    292327sub DoWriteIPTC($$$)
    293328{
    294     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
    295     my $verbose = $exifTool->Options('Verbose');
    296     my $out = $exifTool->Options('TextOut');
     329    my ($et, $dirInfo, $tagTablePtr) = @_;
     330    my $verbose = $et->Options('Verbose');
     331    my $out = $et->Options('TextOut');
    297332
    298333    # avoid editing IPTC directory unless necessary:
     
    300335    # - avoids changing current MD5 digest unnecessarily
    301336    # - avoids adding mandatory tags unless some other IPTC is changed
    302     unless (exists $$exifTool{EDIT_DIRS}{$$dirInfo{DirName}} or
    303         # standard IPTC tags in other locations should be edited too (ie. AFCP_IPTC)
    304         ($tagTablePtr = \%Image::ExifTool::IPTC::Main and exists $$exifTool{EDIT_DIRS}{IPTC}))
     337    unless (exists $$et{EDIT_DIRS}{$$dirInfo{DirName}} or
     338        # standard IPTC tags in other locations should be edited too (eg. AFCP_IPTC)
     339        ($tagTablePtr eq \%Image::ExifTool::IPTC::Main and exists $$et{EDIT_DIRS}{IPTC}))
    305340    {
    306         print $out "$$exifTool{INDENT}  [nothing changed]\n" if $verbose;
     341        print $out "$$et{INDENT}  [nothing changed]\n" if $verbose;
    307342        return undef;
    308343    }
     
    317352
    318353    # start by assuming default IPTC encoding
    319     my $xlat = $exifTool->Options('CharsetIPTC');
    320     undef $xlat if $xlat eq $exifTool->Options('Charset');
     354    my $xlat = $et->Options('CharsetIPTC');
     355    undef $xlat if $xlat eq $et->Options('Charset');
    321356
    322357    # make sure our dataLen is defined (note: allow zero length directory)
     
    330365                         substr($$dataPt, $start + 3, 1) eq "\x1c")
    331366    {
    332         $exifTool->Warn('IPTC data was improperly byte-swapped');
     367        $et->Warn('IPTC data was improperly byte-swapped');
    333368        my $newData = pack('N*', unpack('V*', substr($$dataPt, $start, $dirLen) . "\0\0\0"));
    334369        $dataPt = \$newData;
     
    339374    my %recordNum;
    340375    foreach $tag (Image::ExifTool::TagTableKeys($tagTablePtr)) {
    341         $tagInfo = $tagTablePtr->{$tag};
     376        $tagInfo = $$tagTablePtr{$tag};
    342377        $$tagInfo{SubDirectory} or next;
    343         my $table = $tagInfo->{SubDirectory}->{TagTable} or next;
     378        my $table = $$tagInfo{SubDirectory}{TagTable} or next;
    344379        my $subTablePtr = Image::ExifTool::GetTagTable($table);
    345380        $recordNum{$subTablePtr} = $tag;
     
    348383    # loop through new values and accumulate all IPTC information
    349384    # into lists based on their IPTC record type
    350     foreach $tagInfo ($exifTool->GetNewTagInfoList()) {
     385    foreach $tagInfo ($et->GetNewTagInfoList()) {
    351386        my $table = $$tagInfo{Table};
    352387        my $record = $recordNum{$table};
     
    392427                if ($rec < $lastRec) {
    393428                    if ($rec == 0) {
    394                         return undef if $exifTool->Warn("IPTC record 0 encountered, subsequent records ignored", 1);
     429                        return undef if $et->Warn("IPTC record 0 encountered, subsequent records ignored", 2);
    395430                        undef $rec;
    396431                        $pos = $dirEnd;
    397432                        $len = 0;
    398433                    } else {
    399                         return undef if $exifTool->Warn("IPTC doesn't conform to spec: Records out of sequence", 1);
     434                        return undef if $et->Warn("IPTC doesn't conform to spec: Records out of sequence", 2);
    400435                    }
    401436                }
     
    455490                                next if $foundRec{$lastRec}->{$mandTag};
    456491                                unless ($subTablePtr) {
    457                                     $tagInfo = $tagTablePtr->{$lastRec};
     492                                    $tagInfo = $$tagTablePtr{$lastRec};
    458493                                    $tagInfo and $$tagInfo{SubDirectory} or warn("WriteIPTC: Internal error 1\n"), next;
    459                                     $tagInfo->{SubDirectory}->{TagTable} or next;
    460                                     $subTablePtr = Image::ExifTool::GetTagTable($tagInfo->{SubDirectory}->{TagTable});
     494                                    $$tagInfo{SubDirectory}{TagTable} or next;
     495                                    $subTablePtr = Image::ExifTool::GetTagTable($$tagInfo{SubDirectory}{TagTable});
    461496                                }
    462                                 $tagInfo = $subTablePtr->{$mandTag} or warn("WriteIPTC: Internal error 2\n"), next;
    463                                 my $value = $mandatory->{$mandTag};
    464                                 $exifTool->VerboseValue("+ IPTC:$$tagInfo{Name}", $value, ' (mandatory)');
     497                                $tagInfo = $$subTablePtr{$mandTag} or warn("WriteIPTC: Internal error 2\n"), next;
     498                                my $value = $$mandatory{$mandTag};
     499                                $et->VerboseValue("+ IPTC:$$tagInfo{Name}", $value, ' (mandatory)');
    465500                                # apply necessary format conversions
    466                                 FormatIPTC($exifTool, $tagInfo, \$value, \$xlat, $lastRec);
     501                                FormatIPTC($et, $tagInfo, \$value, \$xlat, $lastRec);
    467502                                $len = length $value;
    468503                                # generate our new entry
     
    470505                                $newData .= $entry . $value;    # add entry to new IPTC data
    471506                                # (don't mark as changed if just mandatory tags changed)
    472                                 # ++$exifTool->{CHANGED};
     507                                # ++$$et{CHANGED};
    473508                            }
    474509                        }
     
    487522                }
    488523                my $newTag = $$tagInfo{TagID};
    489                 my $nvHash = $exifTool->GetNewValueHash($tagInfo);
     524                my $nvHash = $et->GetNewValueHash($tagInfo);
    490525                # only add new values if...
    491526                my ($doSet, @values);
     
    496531                } elsif ($$tagInfo{List}) {
    497532                    # ...tag is List and it existed before or we are creating it
    498                     $doSet = 1 if $found or Image::ExifTool::IsCreating($nvHash);
     533                    $doSet = 1 if $found ? not $$nvHash{CreateOnly} : $$nvHash{IsCreating};
    499534                } else {
    500535                    # ...tag didn't exist before and we are creating it
    501                     $doSet = 1 if not $found and Image::ExifTool::IsCreating($nvHash);
     536                    $doSet = 1 if not $found and $$nvHash{IsCreating};
    502537                }
    503538                if ($doSet) {
    504                     @values = Image::ExifTool::GetNewValues($nvHash);
     539                    @values = $et->GetNewValue($nvHash);
    505540                    @values and $foundRec{$newRec}->{$newTag} = $found | 0x04;
    506541                    # write tags for each value in list
    507542                    my $value;
    508543                    foreach $value (@values) {
    509                         $exifTool->VerboseValue("+ IPTC:$$tagInfo{Name}", $value);
     544                        $et->VerboseValue("+ $$dirInfo{DirName}:$$tagInfo{Name}", $value);
    510545                        # reset allMandatory flag if a non-mandatory tag is written
    511546                        if ($allMandatory) {
    512547                            my $mandatory = $mandatory{$newRec};
    513                             $allMandatory = 0 unless $mandatory and $mandatory->{$newTag};
     548                            $allMandatory = 0 unless $mandatory and $$mandatory{$newTag};
    514549                        }
    515550                        # apply necessary format conversions
    516                         FormatIPTC($exifTool, $tagInfo, \$value, \$xlat, $newRec);
     551                        FormatIPTC($et, $tagInfo, \$value, \$xlat, $newRec);
    517552                        # (note: IPTC string values are NOT null terminated)
    518553                        $len = length $value;
     
    526561                        }
    527562                        $newData .= $entry . $value;    # add entry to new IPTC data
    528                         ++$exifTool->{CHANGED};
     563                        ++$$et{CHANGED};
    529564                    }
    530565                }
     
    553588        $tagInfo = $set{$rec}->{$tag};
    554589        if ($tagInfo) {
    555             my $nvHash = $exifTool->GetNewValueHash($tagInfo);
     590            my $nvHash = $et->GetNewValueHash($tagInfo);
    556591            $len = $pos - $valuePtr;
    557592            my $val = substr($$dataPt, $valuePtr, $len);
     593            # remove null terminator if it exists (written by braindead software like Picasa 2.0)
     594            $val =~ s/\0+$// if $$tagInfo{Format} and $$tagInfo{Format} =~ /^string/;
    558595            my $oldXlat = $xlat;
    559             FormatIPTC($exifTool, $tagInfo, \$val, \$xlat, $rec, 1);
    560             if (Image::ExifTool::IsOverwriting($nvHash, $val)) {
     596            FormatIPTC($et, $tagInfo, \$val, \$xlat, $rec, 1);
     597            if ($et->IsOverwriting($nvHash, $val)) {
    561598                $xlat = $oldXlat;   # don't change translation (not writing this value)
    562                 $exifTool->VerboseValue("- IPTC:$$tagInfo{Name}", $val);
    563                 ++$exifTool->{CHANGED};
     599                $et->VerboseValue("- $$dirInfo{DirName}:$$tagInfo{Name}", $val);
     600                ++$$et{CHANGED};
    564601                # set deleted flag to indicate we found and deleted this tag
    565602                $foundRec{$rec}->{$tag} |= 0x02;
     
    577614            # handle CodedCharacterSet tag
    578615            my $val = substr($$dataPt, $valuePtr, $pos - $valuePtr);
    579             $xlat = HandleCodedCharset($exifTool, $val);
     616            $xlat = HandleCodedCharset($et, $val);
    580617        }
    581618        # reset allMandatory flag if a non-mandatory tag is written
    582619        if ($allMandatory) {
    583620            my $mandatory = $mandatory{$rec};
    584             unless ($mandatory and $mandatory->{$tag}) {
     621            unless ($mandatory and $$mandatory{$tag}) {
    585622                $allMandatory = 0;
    586623            }
     
    591628    # make sure the rest of the data is zero
    592629    if ($tail < $dirEnd) {
    593         my $trailer = substr($$dataPt, $tail, $dirEnd-$tail);
    594         if ($trailer =~ /[^\0]/) {
    595             return undef if $exifTool->Warn('Unrecognized data in IPTC trailer', 1);
     630        my $pad = substr($$dataPt, $tail, $dirEnd-$tail);
     631        if ($pad =~ /[^\0]/) {
     632            return undef if $et->Warn('Unrecognized data in IPTC padding', 2);
    596633        }
    597634    }
     
    606643sub WriteIPTC($$$)
    607644{
    608     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
    609     $exifTool or return 1;    # allow dummy access to autoload this package
    610 
    611     my $newData = DoWriteIPTC($exifTool, $dirInfo, $tagTablePtr);
     645    my ($et, $dirInfo, $tagTablePtr) = @_;
     646    $et or return 1;    # allow dummy access to autoload this package
     647
     648    my $newData = DoWriteIPTC($et, $dirInfo, $tagTablePtr);
    612649
    613650    # calculate standard IPTC digests only if we are writing or deleting
    614651    # Photoshop:IPTCDigest with a value of 'new' or 'old'
    615652    while ($Image::ExifTool::Photoshop::iptcDigestInfo) {
    616         my $nvHash = $exifTool->{NEW_VALUE}{$Image::ExifTool::Photoshop::iptcDigestInfo};
     653        my $nvHash = $$et{NEW_VALUE}{$Image::ExifTool::Photoshop::iptcDigestInfo};
    617654        last unless defined $nvHash;
    618         last unless IsStandardIPTC($exifTool->MetadataPath());
    619         my @values = Image::ExifTool::GetNewValues($nvHash);
     655        last unless IsStandardIPTC($et->MetadataPath());
     656        my @values = $et->GetNewValue($nvHash);
    620657        push @values, @{$$nvHash{DelValue}} if $$nvHash{DelValue};
    621658        my $new = grep /^new$/, @values;
    622659        my $old = grep /^old$/, @values;
    623660        last unless $new or $old;
    624         unless (eval 'require Digest::MD5') {
    625             $exifTool->Warn('Digest::MD5 must be installed to calculate IPTC digest');
     661        unless (eval { require Digest::MD5 }) {
     662            $et->Warn('Digest::MD5 must be installed to calculate IPTC digest');
    626663            last;
    627664        }
     
    638675            }
    639676            # set NewIPTCDigest data member unless IPTC is being deleted
    640             $$exifTool{NewIPTCDigest} = Digest::MD5::md5($$dataPt) if length $$dataPt;
     677            $$et{NewIPTCDigest} = Digest::MD5::md5($$dataPt) if length $$dataPt;
    641678        }
    642679        if ($old) {
    643680            if ($new and not defined $newData) {
    644                 $$exifTool{OldIPTCDigest} = $$exifTool{NewIPTCDigest};
     681                $$et{OldIPTCDigest} = $$et{NewIPTCDigest};
    645682            } elsif ($$dirInfo{DataPt}) { #(may be undef if creating new IPTC)
    646683                $dataPt = $$dirInfo{DataPt};
     
    649686                    $dataPt = \$buff;
    650687                }
    651                 $$exifTool{OldIPTCDigest} = Digest::MD5::md5($$dataPt) if length $$dataPt;
     688                $$et{OldIPTCDigest} = Digest::MD5::md5($$dataPt) if length $$dataPt;
    652689            }
    653690        }
    654691        last;
    655692    }
     693    # set changed if ForceWrite tag was set to "IPTC"
     694    ++$$et{CHANGED} if defined $newData and length $newData and $$et{FORCE_WRITE}{IPTC};
    656695    return $newData;
    657696}
     
    677716=head1 AUTHOR
    678717
    679 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     718Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    680719
    681720This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.