Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/BMP.pm

    r24107 r34921  
    88# References:   1) http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html
    99#               2) http://www.fourcc.org/rgb.php
     10#               3) https://msdn.microsoft.com/en-us/library/dd183381(v=vs.85).aspx
    1011#------------------------------------------------------------------------------
    1112
     
    1617use Image::ExifTool qw(:DataAccess :Utils);
    1718
    18 $VERSION = '1.07';
     19$VERSION = '1.09';
     20
     21# conversions for fixed-point 2.30 format values
     22my %fixed2_30 = (
     23    ValueConv => q{
     24        my @a = split ' ', $val;
     25        $_ /= 0x40000000 foreach @a;
     26        "@a";
     27    },
     28    PrintConv => q{
     29        my @a = split ' ', $val;
     30        $_ = sprintf('%.6f', $_) foreach @a;
     31        "@a";
     32    },
     33);
    1934
    2035# BMP chunks
     
    2641        of image related information.
    2742    },
    28     # 0 => size of bitmap structure:
    29     #        12  bytes => 'OS/2 V1',
    30     #        40  bytes => 'Windows V3',
    31     #        64  bytes => 'OS/2 V2',
    32     #        68  bytes => some bitmap structure in AVI videos
    33     #        108 bytes => 'Windows V4',
    34     #        124 bytes => 'Windows V5',
     43    0 => {
     44        Name => 'BMPVersion',
     45        Format => 'int32u',
     46        Notes => q{
     47            this is actually the size of the BMP header, but used to determine the BMP
     48            version
     49        },
     50        RawConv => '$$self{BMPVersion} = $val',
     51        PrintConv => {
     52            40  => 'Windows V3',
     53            68  => 'AVI BMP structure?', #PH (seen in AVI movies from some Casio and Nikon cameras)
     54            108 => 'Windows V4',
     55            124 => 'Windows V5',
     56        },
     57    },
    3558    4 => {
    3659        Name => 'ImageWidth',
     
    4568        Name => 'Planes',
    4669        Format => 'int16u',
     70        # values: 0,1,4,8,16,24,32
    4771    },
    4872    14 => {
     
    5377        Name => 'Compression',
    5478        Format => 'int32u',
     79        RawConv => '$$self{BMPCompression} = $val',
    5580        # (formatted as string[4] for some values in AVI images)
    5681        ValueConv => '$val > 256 ? unpack("A4",pack("V",$val)) : $val',
     
    7499        Name => 'ImageLength',
    75100        Format => 'int32u',
     101        RawConv => '$$self{BMPImageLength} = $val',
    76102    },
    77103    24 => {
     
    91117        Name => 'NumImportantColors',
    92118        Format => 'int32u',
     119        Hook => '$varSize += $size if $$self{BMPVersion} == 68', # (the rest is invalid for AVI BMP's)
    93120        PrintConv => '$val ? $val : "All"',
    94121    },
     122    40 => {
     123        Name => 'RedMask',
     124        Format => 'int32u',
     125        PrintConv => 'sprintf("0x%.8x",$val)',
     126    },
     127    44 => {
     128        Name => 'GreenMask',
     129        Format => 'int32u',
     130        PrintConv => 'sprintf("0x%.8x",$val)',
     131    },
     132    48 => {
     133        Name => 'BlueMask',
     134        Format => 'int32u',
     135        PrintConv => 'sprintf("0x%.8x",$val)',
     136    },
     137    52 => {
     138        Name => 'AlphaMask',
     139        Format => 'int32u',
     140        PrintConv => 'sprintf("0x%.8x",$val)',
     141    },
     142    56 => {
     143        Name => 'ColorSpace',
     144        Format => 'undef[4]',
     145        RawConv => '$$self{BMPColorSpace} = $val =~ /\0/ ? Get32u(\$val, 0) : pack("N",unpack("V",$val))',
     146        PrintConv => {
     147            0 => 'Calibrated RGB',
     148            1 => 'Device RGB',
     149            2 => 'Device CMYK',
     150            LINK => 'Linked Color Profile',
     151            MBED => 'Embedded Color Profile',
     152            sRGB => 'sRGB',
     153            'Win ' => 'Windows Color Space',
     154        },
     155    },
     156    60 => {
     157        Name => 'RedEndpoint',
     158        Condition => '$$self{BMPColorSpace} eq "0"',
     159        Format => 'int32u[3]',
     160        %fixed2_30,
     161    },
     162    72 => {
     163        Name => 'GreenEndpoint',
     164        Condition => '$$self{BMPColorSpace} eq "0"',
     165        Format => 'int32u[3]',
     166        %fixed2_30,
     167    },
     168    84 => {
     169        Name => 'BlueEndpoint',
     170        Condition => '$$self{BMPColorSpace} eq "0"',
     171        Format => 'int32u[3]',
     172        %fixed2_30,
     173    },
     174    96 => {
     175        Name => 'GammaRed',
     176        Condition => '$$self{BMPColorSpace} eq "0"',
     177        Format => 'fixed32u',
     178    },
     179    100 => {
     180        Name => 'GammaGreen',
     181        Condition => '$$self{BMPColorSpace} eq "0"',
     182        Format => 'fixed32u',
     183    },
     184    104 => {
     185        Name => 'GammaBlue',
     186        Condition => '$$self{BMPColorSpace} eq "0"',
     187        Format => 'fixed32u',
     188    },
     189    108 => {
     190        Name => 'RenderingIntent',
     191        Format => 'int32u',
     192        PrintConv => {
     193            1 => 'Graphic (LCS_GM_BUSINESS)',
     194            2 => 'Proof (LCS_GM_GRAPHICS)',
     195            4 => 'Picture (LCS_GM_IMAGES)',
     196            8 => 'Absolute Colorimetric (LCS_GM_ABS_COLORIMETRIC)',
     197        },
     198    },
     199    112 => {
     200        Name => 'ProfileDataOffset',
     201        Condition => '$$self{BMPColorSpace} eq "LINK" or $$self{BMPColorSpace} eq "MBED"',
     202        Format => 'int32u',
     203        RawConv => '$$self{BMPProfileOffset} = $val',
     204    },
     205    116 => {
     206        Name => 'ProfileSize',
     207        Condition => '$$self{BMPColorSpace} eq "LINK" or $$self{BMPColorSpace} eq "MBED"',
     208        Format => 'int32u',
     209        RawConv => '$$self{BMPProfileSize} = $val',
     210    },
     211    # 120 - reserved
    95212);
    96213
     
    100217    GROUPS => { 0 => 'File', 1 => 'File', 2 => 'Image' },
    101218    NOTES => 'Information extracted from OS/2-format BMP images.',
    102     # 0 => size of bitmap structure (12)
     219    0 => {
     220        Name => 'BMPVersion',
     221        Format => 'int32u',
     222        Notes => 'again, the header size is used to determine the BMP version',
     223        PrintConv => {
     224            12  => 'OS/2 V1',
     225            64  => 'OS/2 V2',
     226        },
     227    },
    103228    4  => { Name => 'ImageWidth',  Format => 'int16u' },
    104229    6  => { Name => 'ImageHeight', Format => 'int16u' },
    105230    8  => { Name => 'Planes',      Format => 'int16u' },
    106231    10 => { Name => 'BitDepth',    Format => 'int16u' },
     232);
     233
     234%Image::ExifTool::BMP::Extra = (
     235    GROUPS => { 0 => 'File', 1 => 'File', 2 => 'Image' },
     236    NOTES => 'Extra information extracted from some BMP images.',
     237    VARS => { NO_ID => 1 },
     238    LinkedProfileName => { },
     239    ICC_Profile => { SubDirectory => { TagTable => 'Image::ExifTool::ICC_Profile::Main' } },
     240    EmbeddedJPG => {
     241        Groups => { 2 => 'Preview' },
     242        Binary => 1,
     243    },
     244    EmbeddedPNG => {
     245        Groups => { 2 => 'Preview' },
     246        Binary => 1,
     247    },
    107248);
    108249
     
    113254sub ProcessBMP($$)
    114255{
    115     my ($exifTool, $dirInfo) = @_;
     256    my ($et, $dirInfo) = @_;
    116257    my $raf = $$dirInfo{RAF};
    117258    my ($buff, $tagTablePtr);
     
    122263    SetByteOrder('II');
    123264    my $len = Get32u(\$buff, 14);
    124     return 0 unless $len == 12 or $len >= 40;
     265    # len = v1:12, v4:108, v5:124
     266    return 0 unless $len == 12 or $len == 16 or ($len >= 40 and $len < 1000000);
    125267    return 0 unless $raf->Seek(-4, 1) and $raf->Read($buff, $len) == $len;
    126     $exifTool->SetFileType();   # set the FileType tag
     268    $et->SetFileType();   # set the FileType tag
     269#
     270# process the BMP header
     271#
    127272    my %dirInfo = (
    128273        DataPt => \$buff,
     
    130275        DirLen => length($buff),
    131276    );
    132     if ($len == 12) {   # old OS/2 format BMP
     277    if ($len == 12 or $len == 16 or $len == 64) {   # old OS/2 format BMP
    133278        $tagTablePtr = GetTagTable('Image::ExifTool::BMP::OS2');
    134279    } else {
    135280        $tagTablePtr = GetTagTable('Image::ExifTool::BMP::Main');
    136281    }
    137     $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     282    $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
     283#
     284# extract any embedded images
     285#
     286    my $extraTable = GetTagTable('Image::ExifTool::BMP::Extra');
     287    if ($$et{BMPCompression} and $$et{BMPImageLength} and
     288        ($$et{BMPCompression} == 4 or $$et{BMPCompression} == 5))
     289    {
     290        my $tag = $$et{BMPCompression} == 4 ? 'EmbeddedJPG' : 'EmbeddedPNG';
     291        my $val = $et->ExtractBinary($raf->Tell(), $$et{BMPImageLength}, $tag);
     292        if ($val) {
     293            $et->HandleTag($extraTable, $tag, $val);
     294        }
     295    }
     296#
     297# process profile data if it exists (v5 header only)
     298#
     299    if ($len == 124 and $$et{BMPProfileOffset}) {
     300        my $pos = $$et{BMPProfileOffset} + 14;  # (note the 14-byte shift!)
     301        my $size = $$et{BMPProfileSize};
     302        if ($raf->Seek($pos, 0) and $raf->Read($buff, $size) == $size) {
     303            my $tag;
     304            if ($$et{BMPColorSpace} eq 'LINK') {
     305                $buff =~ s/\0+$//;  # remove null terminator(s)
     306                $buff = $et->Decode($buff, 'Latin'); # convert from Latin
     307                $tag = 'LinkedProfileName';
     308            } else {
     309                $tag = 'ICC_Profile';
     310            }
     311            $et->HandleTag($extraTable, $tag => $buff, Size => $size, DataPos => $pos);
     312        } else {
     313            $et->Warn('Error loading profile data', 1);
     314        }
     315    }
    138316    return 1;
    139317}
     
    158336=head1 AUTHOR
    159337
    160 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     338Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    161339
    162340This library is free software; you can redistribute it and/or modify it
     
    169347=item L<http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html>
    170348
     349=item L<http://www.fourcc.org/rgb.php>
     350
     351=item L<https://msdn.microsoft.com/en-us/library/dd183381(v=vs.85).aspx>
     352
    171353=back
    172354
Note: See TracChangeset for help on using the changeset viewer.