Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/GPS.pm

    r24107 r34921  
    1313use Image::ExifTool::Exif;
    1414
    15 $VERSION = '1.34';
     15$VERSION = '1.52';
    1616
    1717my %coordConv = (
     
    3939        Name => 'GPSLatitudeRef',
    4040        Writable => 'string',
     41        Notes => q{
     42            tags 0x0001-0x0006 used for camera location according to MWG 2.0. ExifTool
     43            will also accept a number when writing GPSLatitudeRef, positive for north
     44            latitudes or negative for south, or a string ending in N or S
     45        },
    4146        Count => 2,
    4247        PrintConv => {
     
    4651                my ($val, $inv) = @_;
    4752                return undef unless $inv;
    48                 return uc $1 if $val =~ /\b([NS])$/i;
    49                 return $1 eq '-' ? 'S' : 'N' if $val =~ /^([-+]?)\d+(\.\d*)?$/;
     53                return uc $1 if $val =~ /\b([NS])\b/i;
     54                return $1 eq '-' ? 'S' : 'N' if $val =~ /([-+]?)\d+/;
    5055                return undef;
    5156            },
     
    6469        Writable => 'string',
    6570        Count => 2,
     71        Notes => q{
     72            ExifTool will also accept a number when writing this tag, positive for east
     73            longitudes or negative for west, or a string ending in E or W
     74        },
    6675        PrintConv => {
    6776            # extract E/W if written from Composite:GPSLongitude
     
    7079                my ($val, $inv) = @_;
    7180                return undef unless $inv;
    72                 return uc $1 if $val =~ /\b([EW])$/i;
    73                 return $1 eq '-' ? 'W' : 'E' if $val =~ /^([-+]?)\d+(\.\d*)?$/;
     81                return uc $1 if $val =~ /\b([EW])\b/i;
     82                return $1 eq '-' ? 'W' : 'E' if $val =~ /([-+]?)\d+/;
    7483                return undef;
    7584            },
     
    8796        Name => 'GPSAltitudeRef',
    8897        Writable => 'int8u',
    89         PrintConv => {
     98        Notes => q{
     99            ExifTool will also accept number when writing this tag, with negative
     100            numbers indicating below sea level
     101        },
     102        PrintConv => {
     103            OTHER => sub {
     104                my ($val, $inv) = @_;
     105                return undef unless $inv and $val =~ /^([-+0-9])/;
     106                return($1 eq '-' ? 1 : 0);
     107            },
    90108            0 => 'Above Sea Level',
    91109            1 => 'Below Sea Level',
     
    107125        Shift => 'Time',
    108126        Notes => q{
    109             when writing, date is stripped off if present, and time is adjusted to UTC
    110             if it includes a timezone
     127            UTC time of GPS fix.  When writing, date is stripped off if present, and
     128            time is adjusted to UTC if it includes a timezone
    111129        },
    112130        ValueConv => 'Image::ExifTool::GPS::ConvertTimeStamp($val)',
    113131        ValueConvInv => '$val=~tr/:/ /;$val',
     132        PrintConv => 'Image::ExifTool::GPS::PrintTimeStamp($val)',
    114133        # pull time out of any format date/time string
    115134        # (converting to UTC if a timezone is given)
    116135        PrintConvInv => sub {
    117             my $v = shift;
     136            my ($v, $et) = @_;
     137            $v = $et->TimeNow() if lc($v) eq 'now';
    118138            my @tz;
    119139            if ($v =~ s/([-+])(.*)//s) {    # remove timezone
     
    210230        Name => 'GPSDestLatitudeRef',
    211231        Writable => 'string',
    212         Count => 2,
    213         PrintConv => {
    214             N => 'North',
    215             S => 'South',
    216         },
     232        Notes => 'tags 0x0013-0x001a used for subject location according to MWG 2.0',
     233        Count => 2,
     234        PrintConv => { N => 'North', S => 'South' },
    217235    },
    218236    0x0014 => {
     
    226244        Writable => 'string',
    227245        Count => 2,
    228         PrintConv => {
    229             E => 'East',
    230             W => 'West',
    231         },
     246        PrintConv => { E => 'East', W => 'West' },
    232247    },
    233248    0x0016 => {
     
    268283        Writable => 'undef',
    269284        Notes => 'values of "GPS", "CELLID", "WLAN" or "MANUAL" by the EXIF spec.',
    270         RawConv => 'Image::ExifTool::Exif::ConvertExifText($self,$val)',
     285        RawConv => 'Image::ExifTool::Exif::ConvertExifText($self,$val,1,$tag)',
    271286        RawConvInv => 'Image::ExifTool::Exif::EncodeExifText($self,$val)',
    272287    },
     
    274289        Name => 'GPSAreaInformation',
    275290        Writable => 'undef',
    276         RawConv => 'Image::ExifTool::Exif::ConvertExifText($self,$val)',
     291        RawConv => 'Image::ExifTool::Exif::ConvertExifText($self,$val,1,$tag)',
    277292        RawConvInv => 'Image::ExifTool::Exif::EncodeExifText($self,$val)',
    278293    },
     
    281296        Groups => { 2 => 'Time' },
    282297        Writable => 'string',
    283         Notes => 'YYYY:mm:dd',
     298        Format => 'undef', # (Casio EX-H20G uses "\0" instead of ":" as a separator)
    284299        Count => 11,
    285300        Shift => 'Time',
    286301        Notes => q{
    287302            when writing, time is stripped off if present, after adjusting date/time to
    288             UTC if time includes a timezone
    289         },
     303            UTC if time includes a timezone.  Format is YYYY:mm:dd
     304        },
     305        RawConv => '$val =~ s/\0+$//; $val',
    290306        ValueConv => 'Image::ExifTool::Exif::ExifDate($val)',
    291307        ValueConvInv => '$val',
     
    294310        PrintConvInv => q{
    295311            my $secs;
     312            $val = $self->TimeNow() if lc($val) eq 'now';
    296313            if ($val =~ /[-+]/ and ($secs = Image::ExifTool::GetUnixTime($val, 1))) {
    297314                $val = Image::ExifTool::ConvertUnixTime($secs);
     
    315332        Writable => 'rational64u',
    316333    },
     334    # 0xea1c - Nokia Lumina 1020, Samsung GT-I8750, and other Windows 8
     335    #          phones write this (padding) in GPS IFD - PH
    317336);
    318337
     
    352371    },
    353372    GPSAltitude => {
    354         SubDoc => 1,    # generate for all sub-documents
     373        SubDoc => [1,3], # generate for sub-documents if Desire 1 or 3 has a chance to exist
    355374        Desire => {
    356375            0 => 'GPS:GPSAltitude',
     
    364383            my $alt = $val[0];
    365384            $alt = $val[2] unless defined $alt;
    366             return undef unless defined $alt;
    367             return ($val[1] || $val[3]) ? -$alt : $alt;
     385            return undef unless defined $alt and IsFloat($alt);
     386            return(($val[1] || $val[3]) ? -$alt : $alt);
    368387        },
    369388        PrintConv => q{
    370389            $val = int($val * 10) / 10;
    371             return ($val =~ s/^-// ? "$val m Below" : "$val m Above") . " Sea Level";
    372         },
     390            return(($val =~ s/^-// ? "$val m Below" : "$val m Above") . " Sea Level");
     391        },
     392    },
     393    GPSDestLatitude => {
     394        Require => {
     395            0 => 'GPS:GPSDestLatitude',
     396            1 => 'GPS:GPSDestLatitudeRef',
     397        },
     398        ValueConv => '$val[1] =~ /^S/i ? -$val[0] : $val[0]',
     399        PrintConv => 'Image::ExifTool::GPS::ToDMS($self, $val, 1, "N")',
     400    },
     401    GPSDestLongitude => {
     402        SubDoc => 1,    # generate for all sub-documents
     403        Require => {
     404            0 => 'GPS:GPSDestLongitude',
     405            1 => 'GPS:GPSDestLongitudeRef',
     406        },
     407        ValueConv => '$val[1] =~ /^W/i ? -$val[0] : $val[0]',
     408        PrintConv => 'Image::ExifTool::GPS::ToDMS($self, $val, 1, "E")',
    373409    },
    374410);
     
    388424    $h = int($f / 3600); $f -= $h * 3600;
    389425    $m = int($f / 60);   $f -= $m * 60;
    390     $s = int($f);        $f -= $s;
    391     $f = int($f * 1000000 + 0.5);
    392     if ($f) {
    393         ($f = sprintf(".%.6d", $f)) =~ s/0+$//;
     426    my $ss = sprintf('%012.9f', $f);
     427    if ($ss >= 60) {
     428        $ss = '00';
     429        ++$m >= 60 and $m -= 60, ++$h;
    394430    } else {
    395         $f = ''
     431        $ss =~ s/\.?0+$//;  # trim trailing zeros + decimal
    396432    }
    397     return sprintf("%.2d:%.2d:%.2d$f",$h,$m,$s);
     433    return sprintf("%.2d:%.2d:%s",$h,$m,$ss);
     434}
     435
     436#------------------------------------------------------------------------------
     437# Print GPS timestamp
     438# Inputs: 0) EXIF-formatted time string
     439# Returns: time rounded to the nearest microsecond
     440sub PrintTimeStamp($)
     441{
     442    my $val = shift;
     443    return $val unless $val =~ s/:(\d{2}\.\d+)$//;
     444    my $s = int($1 * 1000000 + 0.5) / 1000000;
     445    $s = "0$s" if $s < 10;
     446    return "${val}:$s";
    398447}
    399448
     
    406455sub ToDMS($$;$$)
    407456{
    408     my ($exifTool, $val, $doPrintConv, $ref) = @_;
    409     my ($fmt, $num);
    410 
     457    my ($et, $val, $doPrintConv, $ref) = @_;
     458    my ($fmt, @fmt, $num, $sign, $rtnVal);
     459
     460    unless (length $val) {
     461        # don't convert an empty value
     462        return $val if $doPrintConv and $doPrintConv eq '1';  # avoid hiding existing tag when extracting
     463        return undef; # avoid writing empty value
     464    }
    411465    if ($ref) {
    412466        if ($val < 0) {
    413467            $val = -$val;
    414468            $ref = {N => 'S', E => 'W'}->{$ref};
     469            $sign = '-';
     470        } else {
     471            $sign = '+';
    415472        }
    416473        $ref = " $ref" unless $doPrintConv and $doPrintConv eq '2';
     
    421478    if ($doPrintConv) {
    422479        if ($doPrintConv eq '1') {
    423             $fmt = ($exifTool->Options('CoordFormat') || q{%d deg %d' %.2f"}) . $ref;
     480            $fmt = $et->Options('CoordFormat');
     481            if (not $fmt) {
     482                $fmt = q{%d deg %d' %.2f"} . $ref;
     483            } elsif ($ref) {
     484                # use signed value instead of reference direction if specified
     485                $fmt =~ s/%\+/$sign%/g or $fmt .= $ref;
     486            } else {
     487                $fmt =~ s/%\+/%/g;  # don't know sign, so don't print it
     488            }
    424489        } else {
    425             $fmt = "%d,%.6f$ref";   # use XMP standard format
     490            $fmt = "%d,%.8f$ref";   # use XMP format with 8 decimal minutes
    426491        }
    427         # count the number of format specifiers
    428         $num = ($fmt =~ tr/%/%/);
     492        # count (and capture) the format specifiers (max 3)
     493        while ($fmt =~ /(%(%|[^%]*?[diouxXDOUeEfFgGcs]))/g) {
     494            next if $1 eq '%%';
     495            push @fmt, $1;
     496            last if @fmt >= 3;
     497        }
     498        $num = scalar @fmt;
    429499    } else {
    430500        $num = 3;
    431501    }
    432     my ($d, $m, $s);
    433     $d = $val;
     502    my @c;  # coordinates (D) or (D,M) or (D,M,S)
     503    $c[0] = $val;
    434504    if ($num > 1) {
    435         $d = int($d);
    436         $m = ($val - $d) * 60;
     505        $c[0] = int($c[0]);
     506        $c[1] = ($val - $c[0]) * 60;
    437507        if ($num > 2) {
    438             $m = int($m);
    439             $s = ($val - $d - $m / 60) * 3600;
     508            $c[1] = int($c[1]);
     509            $c[2] = ($val - $c[0] - $c[1] / 60) * 3600;
     510        }
     511        # handle round-off errors to ensure minutes and seconds are
     512        # less than 60 (eg. convert "72 59 60.00" to "73 0 0.00")
     513        $c[-1] = $doPrintConv ? sprintf($fmt[-1], $c[-1]) : ($c[-1] . '');
     514        if ($c[-1] >= 60) {
     515            $c[-1] -= 60;
     516            ($c[-2] += 1) >= 60 and $num > 2 and $c[-2] -= 60, $c[-3] += 1;
    440517        }
    441518    }
    442     return $doPrintConv ? sprintf($fmt, $d, $m, $s) : "$d $m $s$ref";
     519    if ($doPrintConv) {
     520        $rtnVal = sprintf($fmt, @c);
     521        # trim trailing zeros in XMP
     522        $rtnVal =~ s/(\d)0+$ref$/$1$ref/ if $doPrintConv eq '2';
     523    } else {
     524        $rtnVal = "@c$ref";
     525    }
     526    return $rtnVal;
    443527}
    444528
     
    451535{
    452536    my ($val, $doSign) = @_;
     537    return '' if $val =~ /\b(inf|undef)\b/; # ignore invalid values
    453538    # extract decimal or floating point values out of any other garbage
    454539    my ($d, $m, $s) = ($val =~ /((?:[+-]?)(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee][+-]\d+)?)/g);
    455     my $deg = ($d || 0) + (($m || 0) + ($s || 0)/60) / 60;
     540    return '' unless defined $d;
     541    my $deg = $d + (($m || 0) + ($s || 0)/60) / 60;
    456542    # make negative if S or W coordinate
    457543    $deg = -$deg if $doSign ? $val =~ /[^A-Z](S|W)$/i : $deg < 0;
     
    479565=head1 AUTHOR
    480566
    481 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     567Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    482568
    483569This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.