Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/GeoTiff.pm

    r24107 r34921  
    55#
    66# Revisions:    02/23/2004 - P. Harvey Created
    7 #               02/25/2004 - P. Harvey Added new codes from libgeotiff-1.2.1
    8 #               02/01/2007 - P. Harvey Added new codes from libgeotiff-1.2.3
     7#               02/25/2004 - PH Added new codes from libgeotiff-1.2.1
     8#               02/01/2007 - PH Added new codes from libgeotiff-1.2.3
     9#               01/22/2014 - PH Added new code from libgeotiff-1.4.0
     10#               01/19/2015 - PH Added ChartTIFF tags
    911#
    10 # Reference:    ftp://ftp.remotesensing.org/geotiff/libgeotiff/libgeotiff-1.1.4.tar.gz
     12# References:   1) ftp://ftp.remotesensing.org/geotiff/libgeotiff/libgeotiff-1.1.4.tar.gz
     13#               2) http://www.charttiff.com/whitepapers.shtml
    1114#------------------------------------------------------------------------------
    1215
     
    1720use Image::ExifTool qw(:DataAccess :Utils);
    1821
    19 $VERSION = '1.07';
     22$VERSION = '1.12';
    2023
    2124# format codes for geoTiff directory entries
    2225my %geoTiffFormat = (
    23     0      => 'int16u',
     26    0      => 'int16u', # (value is stored in offset, and count is 1)
     27    0x87af => 'int16u', # (value is stored after directory)
    2428    0x87b0 => 'double',
    2529    0x87b1 => 'string',
     
    5054    9107 => 'Angular DMS',
    5155    9108 => 'Angular DMS Hemisphere',
     56    32767 => 'User Defined',
     57);
     58
     59my %epsg_vertcs = (
     60    0 => 'Undefined',
     61    5001 => 'Airy 1830 ellipsoid',
     62    5002 => 'Airy Modified 1849 ellipsoid',
     63    5003 => 'ANS ellipsoid',
     64    5004 => 'Bessel 1841 ellipsoid',
     65    5005 => 'Bessel Modified ellipsoid',
     66    5006 => 'Bessel Namibia ellipsoid',
     67    5007 => 'Clarke 1858 ellipsoid',
     68    5008 => 'Clarke 1866 ellipsoid',
     69    5010 => 'Clarke 1880 Benoit ellipsoid',
     70    5011 => 'Clarke 1880 IGN ellipsoid',
     71    5012 => 'Clarke 1880 RGS ellipsoid',
     72    5013 => 'Clarke 1880 Arc ellipsoid',
     73    5014 => 'Clarke 1880 SGA 1922 ellipsoid',
     74    5015 => 'Everest 1830 1937 Adjustment ellipsoid',
     75    5016 => 'Everest 1830 1967 Definition ellipsoid',
     76    5017 => 'Everest 1830 1975 Definition ellipsoid',
     77    5018 => 'Everest 1830 Modified ellipsoid',
     78    5019 => 'GRS 1980 ellipsoid',
     79    5020 => 'Helmert 1906 ellipsoid',
     80    5021 => 'INS ellipsoid',
     81    5022 => 'International 1924 ellipsoid',
     82    5023 => 'International 1967 ellipsoid',
     83    5024 => 'Krassowsky 1940 ellipsoid',
     84    5025 => 'NWL 9D ellipsoid',
     85    5026 => 'NWL 10D ellipsoid',
     86    5027 => 'Plessis 1817 ellipsoid',
     87    5028 => 'Struve 1860 ellipsoid',
     88    5029 => 'War Office ellipsoid',
     89    5030 => 'WGS 84 ellipsoid',
     90    5031 => 'GEM 10C ellipsoid',
     91    5032 => 'OSU86F ellipsoid',
     92    5033 => 'OSU91A ellipsoid',
     93    5101 => 'Newlyn',
     94    5102 => 'North American Vertical Datum 1929',
     95    5103 => 'North American Vertical Datum 1988',
     96    5104 => 'Yellow Sea 1956',
     97    5105 => 'Baltic Sea',
     98    5106 => 'Caspian Sea',
    5299    32767 => 'User Defined',
    53100);
     
    505552    },
    506553    2061 => 'GeogPrimeMeridianLong',
     554    2062 => 'GeogToWGS84',
    507555    3072 => {
    508556        Name => 'ProjectedCSType',
     
    526574            3057 => 'ISN93 Lambert 1993',
    527575            3300 => 'Estonian Coordinate System of 1992',
     576            3786 => 'Popular Visualisation CRS / Mercator', #PH (NC)
     577            3857 => 'WGS 84 / Pseudo-Mercator', #PH (NC)
    528578            20137 => 'Adindan UTM zone 37N',
    529579            20138 => 'Adindan UTM zone 38N',
     
    556606            20538 => 'Afgooye UTM zone 38N',
    557607            20539 => 'Afgooye UTM zone 39N',
    558             20700 => 'Lisbon Portugese Grid',
     608            20700 => 'Lisbon Portuguese Grid',
    559609            20822 => 'Aratu UTM zone 22S',
    560610            20823 => 'Aratu UTM zone 23S',
     
    20062056    4096 => {
    20072057        Name => 'VerticalCSType',
    2008         PrintConv => {
    2009             # epsg_vertcs
    2010             5001 => 'Airy 1830 ellipsoid',
    2011             5002 => 'Airy Modified 1849 ellipsoid',
    2012             5003 => 'ANS ellipsoid',
    2013             5004 => 'Bessel 1841 ellipsoid',
    2014             5005 => 'Bessel Modified ellipsoid',
    2015             5006 => 'Bessel Namibia ellipsoid',
    2016             5007 => 'Clarke 1858 ellipsoid',
    2017             5008 => 'Clarke 1866 ellipsoid',
    2018             5010 => 'Clarke 1880 Benoit ellipsoid',
    2019             5011 => 'Clarke 1880 IGN ellipsoid',
    2020             5012 => 'Clarke 1880 RGS ellipsoid',
    2021             5013 => 'Clarke 1880 Arc ellipsoid',
    2022             5014 => 'Clarke 1880 SGA 1922 ellipsoid',
    2023             5015 => 'Everest 1830 1937 Adjustment ellipsoid',
    2024             5016 => 'Everest 1830 1967 Definition ellipsoid',
    2025             5017 => 'Everest 1830 1975 Definition ellipsoid',
    2026             5018 => 'Everest 1830 Modified ellipsoid',
    2027             5019 => 'GRS 1980 ellipsoid',
    2028             5020 => 'Helmert 1906 ellipsoid',
    2029             5021 => 'INS ellipsoid',
    2030             5022 => 'International 1924 ellipsoid',
    2031             5023 => 'International 1967 ellipsoid',
    2032             5024 => 'Krassowsky 1940 ellipsoid',
    2033             5025 => 'NWL 9D ellipsoid',
    2034             5026 => 'NWL 10D ellipsoid',
    2035             5027 => 'Plessis 1817 ellipsoid',
    2036             5028 => 'Struve 1860 ellipsoid',
    2037             5029 => 'War Office ellipsoid',
    2038             5030 => 'WGS 84 ellipsoid',
    2039             5031 => 'GEM 10C ellipsoid',
    2040             5032 => 'OSU86F ellipsoid',
    2041             5033 => 'OSU91A ellipsoid',
    2042             5101 => 'Newlyn',
    2043             5102 => 'North American Vertical Datum 1929',
    2044             5103 => 'North American Vertical Datum 1988',
    2045             5104 => 'Yellow Sea 1956',
    2046             5105 => 'Baltic Sea',
    2047             5106 => 'Caspian Sea',
    2048             32767 => 'User Defined',
    2049         },
     2058        SeparateTable => 'VerticalCS',
     2059        PrintConv => \%epsg_vertcs,
    20502060    },
    20512061    4097 => 'VerticalCitation',
    20522062    4098 => {
    20532063        Name => 'VerticalDatum',
    2054         PrintConv => {
    2055             1 => 'Vertical Datum Base',
    2056             32767 => 'User Defined',
    2057         },
     2064        SeparateTable => 'VerticalCS',
     2065        PrintConv => \%epsg_vertcs,
    20582066    },
    20592067    4099 => {
     
    20622070        PrintConv => \%epsg_units,
    20632071    },
     2072#
     2073# ChartTiff extensions (ref 2)
     2074#
     2075    47001 => {
     2076        Name => 'ChartFormat',
     2077        PrintConv => {
     2078            47500 => 'General',
     2079            47501 => 'Coastal',
     2080            47502 => 'Harbor',
     2081            47503 => 'SailingInternational',
     2082            47504 => 'SmallCraft Route',
     2083            47505 => 'SmallCraftArea',
     2084            47506 => 'SmallCraftFolio',
     2085            47507 => 'Topographic',
     2086            47508 => 'Recreation',
     2087            47509 => 'Index',
     2088            47510 => 'Inset',
     2089        },
     2090    },
     2091    47002 => 'ChartSource',
     2092    47003 => 'ChartSourceEdition',
     2093    47004 => 'ChartSourceDate',
     2094    47005 => 'ChartCorrDate',
     2095    47006 => 'ChartCountryOrigin',
     2096    47007 => 'ChartRasterEdition',
     2097    47008 => {
     2098        Name => 'ChartSoundingDatum',
     2099        PrintConv => {
     2100            47600 => 'Equatorial Spring Low Water',
     2101            47601 => 'Indian Spring Low Water',
     2102            47602 => 'Lowest Astronomical Tide',
     2103            47603 => 'Lowest Low Water',
     2104            47604 => 'Lowest Normal Low Water',
     2105            47605 => 'Mean Higher High Water',
     2106            47606 => 'Mean High Water',
     2107            47607 => 'Mean High Water Springs',
     2108            47608 => 'Mean Lower Low Water',
     2109            47609 => 'Mean Lower Low Water Springs',
     2110            47610 => 'Mean Low Water',
     2111            47611 => 'Mean Sea Level',
     2112            47612 => 'Tropic Higher High Water',
     2113            47613 => 'Tropic Lower Low Water',
     2114        },
     2115    },
     2116    47009 => {
     2117        Name => 'ChartDepthUnits',
     2118        SeparateTable => 'Units',
     2119        PrintConv => \%epsg_units,
     2120    },
     2121    47010 => 'ChartMagVar',
     2122    47011 => 'ChartMagVarYear',
     2123    47012 => 'ChartMagVarAnnChange',
     2124    47013 => 'ChartWGSNSShift',
     2125    47015 => 'InsetNWPixelX',
     2126    47016 => 'InsetNWPixelY',
     2127    47017 => 'ChartContourInterval',
    20642128);
    20652129
     
    20692133sub ProcessGeoTiff($)
    20702134{
    2071     my $exifTool = shift;
    2072     my $dirData = $exifTool->GetValue('GeoTiffDirectory', 'ValueConv') or return;
    2073     my $doubleData = $exifTool->GetValue('GeoTiffDoubleParams', 'ValueConv');
    2074     my $asciiData = $exifTool->GetValue('GeoTiffAsciiParams', 'ValueConv');
    2075     my $verbose = $exifTool->Options('Verbose');
    2076 
    2077     # restore or original EXIF byte order setting
     2135    my $et = shift;
     2136    my $dirData = $et->GetValue('GeoTiffDirectory', 'ValueConv') or return;
     2137
     2138    # avoid re-processing if another EXIF directory is found
     2139    $$et{DidGeoTiff} and $$et{DidGeoTiff} eq $dirData and return;
     2140    $$et{DidGeoTiff} = $dirData;
     2141
     2142    my $doubleData = $et->GetValue('GeoTiffDoubleParams', 'ValueConv');
     2143    my $asciiData = $et->GetValue('GeoTiffAsciiParams', 'ValueConv');
     2144    my $verbose = $et->Options('Verbose');
     2145
    20782146    if (length($$dirData) >= 8 and
    20792147        length($$dirData) >= 8 * (Get16u($dirData,6) + 1))
     
    20852153
    20862154        if ($verbose) {
    2087             $exifTool->{INDENT} .= '| ';
    2088             $exifTool->VerboseDir('GeoTiff',$numEntries);
     2155            $$et{INDENT} .= '| ';
     2156            $et->VerboseDir('GeoTiff',$numEntries);
    20892157        }
    20902158        # generate version number tag (not a real GeoTiff tag)
    20912159        my $tagTable = GetTagTable("Image::ExifTool::GeoTiff::Main");
    2092         my $tagInfo = $exifTool->GetTagInfo($tagTable, 1);
    2093         $tagInfo and $exifTool->FoundTag($tagInfo,"$version.$revision.$minorRev");
     2160        my $tagInfo = $et->GetTagInfo($tagTable, 1);
     2161        $tagInfo and $et->FoundTag($tagInfo,"$version.$revision.$minorRev");
    20942162
    20952163        my $i;
     
    20972165            my $pt = 8 * ($i + 1);
    20982166            my $tag    = Get16u($dirData, $pt);
    2099             $tagInfo   = $exifTool->GetTagInfo($tagTable, $tag) or next;
     2167            $tagInfo   = $et->GetTagInfo($tagTable, $tag) or next;
    21002168            my $loc    = Get16u($dirData, $pt+2);
    21012169            my $count  = Get16u($dirData, $pt+4);
     
    21032171            my $format = $geoTiffFormat{$loc};
    21042172            my ($val, $dataPt);
    2105             if ($format eq 'double') {          # in the double parms
     2173            if (not $format) {
     2174                $et->Warn("Unknown GeoTiff location ($loc) for $$tagInfo{Name}");
     2175                next;
     2176            } elsif ($format eq 'double') {     # in the double parms
    21062177                $dataPt = $doubleData;
    2107                 $offset *= 8;
    2108                 $val = Image::ExifTool::ReadValue($dataPt, $offset, $format,
    2109                                                   $count, length($$doubleData)-$offset);
    21102178            } elsif ($format eq 'string') {     # in the ASCII parms
    21112179                $dataPt = $asciiData;
    2112                 $val = substr($$dataPt, $offset, $count);
    2113                 $val =~ s/(\0|\|)$//;   # remove trailing terminator (NULL or '|')
    2114             } elsif ($format eq 'int16u') {     # use the offset as the value
     2180            } elsif ($format eq 'int16u') {     # in the GeoTiffDirectory data
    21152181                $dataPt = $dirData;
    2116                 $val = $offset;
    2117                 $offset = $pt+6;
    2118             } else {
    2119                 $exifTool->Warn("Unknown GeoTiff location: $loc");
     2182                unless ($loc) {                 # is value is stored in offset?
     2183                    $count = 1;                 # (implied by location of 0)
     2184                    $offset = ($pt + 6) / 2;    # offset of the "offset" value
     2185                }
     2186            }
     2187            my $size = Image::ExifTool::FormatSize($format);
     2188            if (not $dataPt or length($$dataPt) < $size*($offset+$count)) {
     2189                $et->Warn("Missing $format data for $$tagInfo{Name}");
    21202190                next;
    21212191            }
    2122             $verbose and $exifTool->VerboseInfo($tag, $tagInfo,
     2192            $offset *= $size;
     2193            $val = Image::ExifTool::ReadValue($dataPt, $offset, $format,
     2194                                              $count, length($$dataPt)-$offset);
     2195            # remove trailing terminator (NULL or '|') from string value
     2196            $val =~ s/(\0|\|)$// if $format eq 'string';
     2197            $verbose and $et->VerboseInfo($tag, $tagInfo,
    21232198                'Table'  => $tagTable,
    21242199                'Index'  => $i,
     
    21282203                'Format' => $format,
    21292204                'Count'  => $count,
    2130                 'Size'   => $count * Image::ExifTool::FormatSize($format),
     2205                'Size'   => $count * $size,
    21312206            );
    2132             $exifTool->FoundTag($tagInfo, $val);
     2207            $et->FoundTag($tagInfo, $val);
    21332208        }
    21342209        if ($verbose) {
    2135             $exifTool->{INDENT} =~ s/..$//;
     2210            $$et{INDENT} =~ s/..$//;
    21362211        }
    21372212    } else {
    2138         $exifTool->Warn('Bad GeoTIFF directory');
     2213        $et->Warn('Bad GeoTIFF directory');
    21392214    }
    2140     # must delete these tags once we've processed this information
    2141     # (to avoid re-processing if another EXIF directory is found)
    2142     $exifTool->DeleteTag('GeoTiffDirectory');
    2143     $exifTool->DeleteTag('GeoTiffDoubleParams');
    2144     $exifTool->DeleteTag('GeoTiffAsciiParams');
     2215    # extract block tags only if requested
     2216    unless ($$et{OPTIONS}{RequestAll}) {
     2217        $et->DeleteTag('GeoTiffDirectory')    unless $$et{REQ_TAG_LOOKUP}{geotiffdirectory};
     2218        $et->DeleteTag('GeoTiffDoubleParams') unless $$et{REQ_TAG_LOOKUP}{geotiffdoubleparams};
     2219        $et->DeleteTag('GeoTiffAsciiParams')  unless $$et{REQ_TAG_LOOKUP}{geotiffasciiparams};
     2220    }
    21452221}
    21462222
     
    21672243=head1 AUTHOR
    21682244
    2169 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     2245Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    21702246
    21712247This library is free software; you can redistribute it and/or modify it
     
    21782254=item ftp://ftp.remotesensing.org/geotiff/libgeotiff/libgeotiff-1.1.4.tar.gz
    21792255
     2256=item http://www.charttiff.com/whitepapers.shtml
     2257
    21802258=back
    21812259
Note: See TracChangeset for help on using the changeset viewer.