Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/JPEG.pm

    r24107 r34921  
    1212use Image::ExifTool qw(:DataAccess :Utils);
    1313
    14 $VERSION = '1.13';
    15 
    16 sub ProcessScalado($$$);
     14$VERSION = '1.30';
     15
    1716sub ProcessOcad($$$);
    18 
    19 # (this main JPEG table is for documentation purposes only)
     17sub ProcessJPEG_HDR($$$);
     18
     19# (most of the tags in this table are for documentation purposes only)
    2020%Image::ExifTool::JPEG::Main = (
    21     NOTES => 'This table lists information extracted by ExifTool from JPEG images.',
     21    NOTES => q{
     22        This table lists information extracted by ExifTool from JPEG images. See
     23        L<https://www.w3.org/Graphics/JPEG/jfif3.pdf> for the JPEG specification.
     24    },
    2225    APP0 => [{
    2326        Name => 'JFIF',
     
    5760        Condition => '$$valPt =~ /^QVCI\0/',
    5861        SubDirectory => { TagTable => 'Image::ExifTool::Casio::QVCI' },
     62      }, {
     63        Name => 'FLIR',
     64        Condition => '$$valPt =~ /^FLIR\0/',
     65        SubDirectory => { TagTable => 'Image::ExifTool::FLIR::FFF' },
     66      }, {
     67        Name => 'RawThermalImage', # (from Parrot Bebop-Pro Thermal drone)
     68        Condition => '$$valPt =~ /^PARROT\0(II\x2a\0|MM\0\x2a)/',
     69        Groups => { 0 => 'APP1', 1 => 'Parrot', 2 => 'Preview' },
     70        Notes => 'thermal image from Parrot Bebop-Pro Thermal drone',
     71        RawConv => 'substr($val, 7)',
     72        Binary => 1,
    5973    }],
    6074    APP2 => [{
     
    7286      }, {
    7387        Name => 'PreviewImage',
    74         Condition => '$$valPt =~ /^\xff\xd8\xff\xdb/',
    75         Notes => 'Samsung large preview',
     88        Condition => '$$valPt =~ /^(|QVGA\0|BGTH)\xff\xd8\xff\xdb/',
     89        Notes => 'Samsung APP2 preview image', # (Samsung/GoPro="", BenQ="QVGA\0", Digilife="BGTH")
    7690    }],
    7791    APP3 => [{
     
    8498        SubDirectory => { TagTable => 'Image::ExifTool::Stim::Main' },
    8599      }, {
     100        Name => 'ThermalData', # (written by DJI FLIR models)
     101        Condition => '$$self{Make} eq "DJI"',
     102        Notes => 'DJI raw thermal data',
     103        Groups => { 0 => 'APP3', 1 => 'DJI', 2 => 'Image' },
     104        Binary => 1,
     105      }, {
    86106        Name => 'PreviewImage', # (written by HP R837 and Samsung S1060)
    87107        Condition => '$$valPt =~ /^\xff\xd8\xff\xdb/',
    88         Notes => 'Hewlett-Packard or Samsung preview image',
     108        Notes => 'Samsung/HP preview image', # (Samsung, HP, BenQ)
    89109    }],
    90110    APP4 => [{
    91111        Name => 'Scalado',
    92112        Condition => '$$valPt =~ /^SCALADO\0/',
    93         SubDirectory => { TagTable => 'Image::ExifTool::JPEG::Scalado' },
     113        SubDirectory => { TagTable => 'Image::ExifTool::Scalado::Main' },
    94114      }, {
    95115        Name => 'FPXR', # (non-standard location written by some HP models)
     
    97117        SubDirectory => { TagTable => 'Image::ExifTool::FlashPix::Main' },
    98118      }, {
    99         Name => 'PreviewImage', # (written by S1060)
    100         Notes => 'Continued Samsung preview from APP3',
    101     }],
    102     APP5 => {
     119        Name => 'ThermalParams', # (written by DJI FLIR models)
     120        Condition => '$$self{Make} eq "DJI" and $$valPt =~ /^\xaa\x55\x12\x06/',
     121        SubDirectory => { TagTable => 'Image::ExifTool::DJI::ThermalParams' },
     122      }, {
     123        Name => 'PreviewImage', # (eg. Samsung S1060)
     124        Notes => 'continued from APP3',
     125    }],
     126    APP5 => [{
    103127        Name => 'RMETA',
    104128        Condition => '$$valPt =~ /^RMETA\0/',
    105129        SubDirectory => { TagTable => 'Image::ExifTool::Ricoh::RMETA' },
    106     },
     130      }, {
     131        Name => 'SamsungUniqueID',
     132        Condition => '$$valPt =~ /ssuniqueid\0/',
     133        SubDirectory => { TagTable => 'Image::ExifTool::Samsung::APP5' },
     134      }, {
     135        Name => 'ThermalCalibration', # (written by DJI FLIR models)
     136        Condition => '$$self{Make} eq "DJI"',
     137        Notes => 'DJI thermal calibration data',
     138        Groups => { 0 => 'APP5', 1 => 'DJI', 2 => 'Image' },
     139        Binary => 1,
     140      }, {
     141        Name => 'PreviewImage', # (eg. BenQ DC E1050)
     142        Notes => 'continued from APP4',
     143    }],
    107144    APP6 => [{
    108145        Name => 'EPPIM',
     
    117154        Condition => '$$valPt =~ /^TDHD\x01\0\0\0/',
    118155        SubDirectory => { TagTable => 'Image::ExifTool::HP::TDHD' },
     156      }, {
     157        Name => 'GoPro',
     158        Condition => '$$valPt =~ /^GoPro\0/',
     159        SubDirectory => { TagTable => 'Image::ExifTool::GoPro::GPMF' },
     160      # also seen Motorola APP6 "MMIMETA\0", with sub-types: AL3A,ALED,MMI0,MOTD,QC3A
     161      }, {
     162        Name => 'DJI_DTAT', # (written by ZH20T)
     163        Condition => '$$valPt =~ /^DTAT\0\0.\{/s',
     164        Groups => { 0 => 'APP6', 1 => 'DJI' },
     165        Notes => 'DJI Thermal Analysis Tool record',
     166        ValueConv => 'substr($val,7)',
     167    }],
     168    APP7 => [{
     169        Name => 'Pentax',
     170        Condition => '$$valPt =~ /^PENTAX \0/',
     171        SubDirectory => { TagTable => 'Image::ExifTool::Pentax::Main' },
     172      }, {
     173        Name => 'Huawei',
     174        Condition => '$$valPt =~ /^HUAWEI\0\0/',
     175        SubDirectory => { TagTable => 'Image::ExifTool::Unknown::Main' },
     176      }, {
     177        Name => 'Qualcomm',
     178        Condition => '$$valPt =~ /^\x1aQualcomm Camera Attributes/',
     179        SubDirectory => { TagTable => 'Image::ExifTool::Qualcomm::Main' },
    119180    }],
    120181    APP8 => {
     
    123184        SubDirectory => { TagTable => 'Image::ExifTool::JPEG::SPIFF' },
    124185    },
     186    APP9 => {
     187        Name => 'MediaJukebox',
     188        Condition => '$$valPt =~ /^Media Jukebox\0/',
     189        SubDirectory => { TagTable => 'Image::ExifTool::JPEG::MediaJukebox' },
     190    },
    125191    APP10 => {
    126192        Name => 'Comment',
    127193        Condition => '$$valPt =~ /^UNICODE\0/',
    128194        Notes => 'PhotoStudio Unicode comment',
     195    },
     196    APP11 => {
     197        Name => 'JPEG-HDR',
     198        Condition => '$$valPt =~ /^HDR_RI /',
     199        SubDirectory => { TagTable => 'Image::ExifTool::JPEG::HDR' },
    129200    },
    130201    APP12 => [{
     
    149220        Name => 'Adobe',
    150221        Condition => '$$valPt =~ /^Adobe/',
     222        Writable => 2,  # (for docs only)
    151223        SubDirectory => { TagTable => 'Image::ExifTool::JPEG::Adobe' },
    152224    },
     
    156228        SubDirectory => { TagTable => 'Image::ExifTool::JPEG::GraphConv' },
    157229    },
     230    # APP15 - Also unknown "TEXT\0" segment stored by Casio/FujiFilm
    158231    COM => {
    159232        Name => 'Comment',
    160233        # note: flag as writable for documentation, but it won't show up
    161234        # in the TagLookup as writable because there is no WRITE_PROC
    162         Writable => 1,
     235        Writable => 2,
    163236    },
    164237    SOF => {
     
    168241    DQT => {
    169242        Name => 'DefineQuantizationTable',
    170         Notes => 'used to calculate the Extra:JPEGDigest tag value',
     243        Notes => 'used to calculate the Extra JPEGDigest tag value',
    171244    },
    172245    Trailer => [{
     
    194267        SubDirectory => { TagTable => 'Image::ExifTool::MIE::Main' },
    195268      }, {
     269        Name => 'Samsung',
     270        Condition => '$$valPt =~ /QDIOBS$/',
     271        SubDirectory => { TagTable => 'Image::ExifTool::Samsung::Trailer' },
     272      }, {
     273        Name => 'EmbeddedVideo',
     274        Notes => 'extracted only when ExtractEmbedded option is used',
     275        Condition => '$$valPt =~ /^.{4}ftyp/s',
     276      }, {
     277        Name => 'Insta360',
     278        Condition => '$$valPt =~ /8db42d694ccc418790edff439fe026bf$/',
     279      }, {
    196280        Name => 'PreviewImage',
    197281        Condition => '$$valPt =~ /^\xff\xd8\xff/',
    198         Writable => 1,  # (for docs only)
     282        Writable => 2,  # (for docs only)
    199283    }],
    200284);
     
    210294        Name => 'PrintIM',
    211295        # must set Writable here so this tag will be saved with MakerNotes option
     296        # (but it isn't actually writable because there is no WRITE_PROC)
    212297        Writable => 'undef',
    213298        Description => 'Print Image Matching',
     
    218303);
    219304
    220 # SPIFF APP8 segment.  Refs:
     305# APP8 SPIFF segment.  Refs:
    221306# 1) http://www.fileformat.info/format/spiff/
    222307# 2) http://www.jpeg.org/public/spiff.pdf
     
    305390);
    306391
     392# APP9 Media Jukebox segment (ref PH)
     393%Image::ExifTool::JPEG::MediaJukebox = (
     394    GROUPS => { 0 => 'XML', 1 => 'MediaJukebox', 2 => 'Image' },
     395    VARS => { NO_ID => 1 },
     396    NOTES => 'Tags found in the XML metadata of the APP9 "Media Jukebox" segment.',
     397    Date => {
     398        Groups => { 2 => 'Time' },
     399        # convert from days since Dec 30, 1899 to seconds since Jan 1, 1970
     400        ValueConv => 'ConvertUnixTime(($val - (70 * 365 + 17 + 2)) * 24 * 3600)',
     401        PrintConv => '$self->ConvertDateTime($val)',
     402    },
     403    Album        => { },
     404    Caption      => { },
     405    Keywords     => { },
     406    Name         => { },
     407    People       => { },
     408    Places       => { },
     409    Tool_Name    => { },
     410    Tool_Version => { },
     411);
     412
     413# JPEG-HDR APP11 information (ref PH, guessed from http://anyhere.com/gward/papers/cic05.pdf)
     414%Image::ExifTool::JPEG::HDR = (
     415    GROUPS => { 0 => 'APP11', 1 => 'JPEG-HDR', 2 => 'Image' },
     416    PROCESS_PROC => \&ProcessJPEG_HDR,
     417    TAG_PREFIX => '', # (no prefix for unknown tags)
     418    NOTES => 'Information extracted from APP11 of a JPEG-HDR image.',
     419    ver => 'JPEG-HDRVersion',
     420    # (need names for the next 3 tags)
     421    ln0 => { Description => 'Ln0' },
     422    ln1 => { Description => 'Ln1' },
     423    s2n => { Description => 'S2n' },
     424    alp => { Name => 'Alpha' }, # (Alpha/Beta are saturation parameters)
     425    bet => { Name => 'Beta' },
     426    cor => { Name => 'CorrectionMethod' },
     427    RatioImage => {
     428        Groups => { 2 => 'Preview' },
     429        Notes => 'the embedded JPEG-compressed ratio image',
     430        Binary => 1,
     431    },
     432);
     433
    307434# AdobeCM APP13 (no references)
    308435%Image::ExifTool::JPEG::AdobeCM = (
     
    310437    GROUPS => { 0 => 'APP13', 1 => 'AdobeCM', 2 => 'Image' },
    311438    NOTES => q{
    312         The "Adobe_CM" APP13 segment presumably contains color management
     439        The APP13 "Adobe_CM" segment presumably contains color management
    313440        information, but the meaning of the data is currently unknown.  If anyone
    314441        has an idea about what this means, please let me know.
     
    324451    PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
    325452    GROUPS => { 0 => 'APP14', 1 => 'Adobe', 2 => 'Image' },
    326     NOTES => 'The "Adobe" APP14 segment stores image encoding information for DCT filters.',
     453    NOTES => q{
     454        The APP14 "Adobe" segment stores image encoding information for DCT filters.
     455        This segment may be copied or deleted as a block using the Extra "Adobe"
     456        tag, but note that it is not deleted by default when deleting all metadata
     457        because it may affect the appearance of the image.
     458    },
    327459    FORMAT => 'int16u',
    328460    0 => 'DCTEncodeVersion',
    329461    1 => {
    330462        Name => 'APP14Flags0',
    331         PrintConv => { BITMASK => {
    332             15 => 'Encoded with Blend=1 downsampling'
    333         } },
     463        PrintConv => {
     464            0 => '(none)',
     465            BITMASK => {
     466                15 => 'Encoded with Blend=1 downsampling'
     467            },
     468        },
    334469    },
    335470    2 => {
    336471        Name => 'APP14Flags1',
    337         PrintConv => { BITMASK => { } },
     472        PrintConv => {
     473            0 => '(none)',
     474            BITMASK => { },
     475        },
    338476    },
    339477    3 => {
     
    355493);
    356494
    357 # AVI1 APP0 segment (ref http://www.schnarff.com/file-formats/bmp/BMPDIB.TXT)
     495# APP0 AVI1 segment (ref http://www.schnarff.com/file-formats/bmp/BMPDIB.TXT)
    358496%Image::ExifTool::JPEG::AVI1 = (
    359497    PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
     
    368506            2 => 'Even',
    369507        },
    370     },   
    371 );
    372 
    373 # Ocad APP0 segment (ref PH)
     508    },
     509);
     510
     511# APP0 Ocad segment (ref PH)
    374512%Image::ExifTool::JPEG::Ocad = (
    375513    PROCESS_PROC => \&ProcessOcad,
    376514    GROUPS => { 0 => 'APP0', 1 => 'Ocad', 2 => 'Image' },
     515    TAG_PREFIX => 'Ocad',
    377516    FIRST_ENTRY => 0,
    378517    NOTES => q{
     
    386525);
    387526
    388 # NITF APP6 segment (National Imagery Transmission Format)
     527# APP6 NITF segment (National Imagery Transmission Format)
    389528# ref http://www.gwg.nga.mil/ntb/baseline/docs/n010697/bwcguide25aug98.pdf
    390529%Image::ExifTool::JPEG::NITF = (
     
    446585);
    447586
    448 # information written by Scalado software (PhotoFusion maybe?)
    449 %Image::ExifTool::JPEG::Scalado = (
    450     GROUPS => { 0 => 'APP4', 1 => 'Scalado', 2 => 'Image' },
    451     PROCESS_PROC => \&ProcessScalado,
    452     TAG_PREFIX => 'Scalado',
    453     FORMAT => 'int32s',
    454     # I presume this was written by
    455     NOTES => q{
    456         Tags extracted from the JPEG APP4 "SCALADO" segment (presumably written by
    457         Scalado mobile software, L<http://www.scalado.com/>).
    458     },
    459     SPMO => {
    460         Name => 'DataLength',
    461         Unkown => 1,
    462     },
    463     WDTH => {
    464         Name => 'PreviewImageWidth',
    465         ValueConv => '$val ? abs($val) : undef',
    466     },
    467     HGHT => {
    468         Name => 'PreviewImageHeight',
    469         ValueConv => '$val ? abs($val) : undef',
    470     },
    471     QUAL => {
    472         Name => 'PreviewQuality',
    473         ValueConv => '$val ? abs($val) : undef',
    474     },
    475     # tags not yet decoded with observed values:
    476     # CHKH: 0, -9010
    477     # CHKL: -2664, -12852
    478     # CLEN: -1024
    479     # CSPC: -2232593
    480     # DATA: (+ve data length)
    481     # HDEC: 0
    482     # MAIN: 0
    483     # SCI0: (+ve data length)
    484     # SCX1: (+ve data length)
    485     # WDEC: 0
    486 );
    487 
    488587#------------------------------------------------------------------------------
    489588# Extract information from the JPEG APP0 Ocad segment
     
    492591sub ProcessOcad($$$)
    493592{
    494     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     593    my ($et, $dirInfo, $tagTablePtr) = @_;
    495594    my $dataPt = $$dirInfo{DataPt};
    496     $exifTool->VerboseDir('APP0 Ocad', undef, length $$dataPt);
     595    $et->VerboseDir('APP0 Ocad', undef, length $$dataPt);
    497596    for (;;) {
    498597        last unless $$dataPt =~ /\$(\w+):([^\0\$]+)/g;
    499598        my ($tag, $val) = ($1, $2);
    500         $val =~ s/(^\s+|\s+$)//g;   # remove leading/trailing spaces
    501         unless ($$tagTablePtr{$tag}) {
    502             Image::ExifTool::AddTagToTable($tagTablePtr, $tag, { Name => "Ocad_$tag" });
    503         }
    504         $exifTool->HandleTag($tagTablePtr, $tag, $val);
     599        $val =~ s/^\s+//; $val =~ s/\s+$//;     # remove leading/trailing spaces
     600        AddTagToTable($tagTablePtr, $tag) unless $$tagTablePtr{$tag};
     601        $et->HandleTag($tagTablePtr, $tag, $val);
    505602    }
    506603    return 1;
     
    508605
    509606#------------------------------------------------------------------------------
    510 # Extract information from the JPEG APP4 SCALADO segment
     607# Extract information from the JPEG APP0 Ocad segment
    511608# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref
    512609# Returns: 1 on success
    513 sub ProcessScalado($$$)
     610sub ProcessJPEG_HDR($$$)
    514611{
    515     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     612    my ($et, $dirInfo, $tagTablePtr) = @_;
    516613    my $dataPt = $$dirInfo{DataPt};
    517     my $pos = 0;
    518     my $end = length $$dataPt;
    519     SetByteOrder('MM');
    520     $exifTool->VerboseDir('APP4 SCALADO', undef, $end);
    521     for (;;) {
    522         last if $pos + 12 > $end;
    523         my $tag = substr($$dataPt, $pos, 4);
    524         my $unk = Get32u($dataPt, $pos + 4); # (what is this?)
    525         $exifTool->HandleTag($tagTablePtr, $tag, undef,
    526             DataPt  => $dataPt,
    527             Start   => $pos + 8,
    528             Size    => 4,
    529             Extra   => ", unk $unk",
    530         );
    531         # shorten directory size by length of SPMO
    532         $end -= Get32u($dataPt, $pos + 8) if $tag eq 'SPMO';
    533         $pos += 12;
     614    $$dataPt =~ /~\0/g or $et->Warn('Unrecognized JPEG-HDR format'), return 0;
     615    my $pos = pos $$dataPt;
     616    my $meta = substr($$dataPt, 7, $pos-9);
     617    $et->VerboseDir('APP11 JPEG-HDR', undef, length $$dataPt);
     618    while ($meta =~ /(\w+)=([^,\s]*)/g) {
     619        my ($tag, $val) = ($1, $2);
     620        AddTagToTable($tagTablePtr, $tag) unless $$tagTablePtr{$tag};
     621        $et->HandleTag($tagTablePtr, $tag, $val);
    534622    }
     623    $et->HandleTag($tagTablePtr, 'RatioImage', substr($$dataPt, $pos));
    535624    return 1;
    536625}
     
    556645=head1 AUTHOR
    557646
    558 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     647Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    559648
    560649This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.