Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/MPF.pm

    r24107 r34921  
    66# Revisions:    06/12/2009 - P. Harvey Created
    77#
    8 # References:   1) http://www.cipa.jp/english/hyoujunka/kikaku/pdf/DC-007_E.pdf
     8# References:   1) http://www.cipa.jp/std/documents/e/DC-007_E.pdf
    99#------------------------------------------------------------------------------
    1010
     
    1616use Image::ExifTool::Exif;
    1717
    18 $VERSION = '1.05';
     18$VERSION = '1.14';
    1919
    2020sub ProcessMPImageList($$$);
    2121
    22 # Tags found in MPF APP2 segment in JPEG images
     22# Tags found in APP2 MPF segment in JPEG images
    2323%Image::ExifTool::MPF::Main = (
    2424    GROUPS => { 0 => 'MPF', 1 => 'MPF0', 2 => 'Image'},
    2525    NOTES => q{
    2626        These tags are part of the CIPA Multi-Picture Format specification, and are
    27         found in the APP2 "MPF" segment of JPEG images.  See
    28         L<http://www.cipa.jp/english/hyoujunka/kikaku/pdf/DC-007_E.pdf> for the
    29         official specification.
     27        found in the APP2 "MPF" segment of JPEG images.  MPImage data referenced
     28        from this segment is stored as a JPEG trailer.  The MPF tags are not
     29        writable, however the MPF segment may be deleted as a group (with "MPF:All")
     30        but then the JPEG trailer should also be deleted (with "Trailer:All").  See
     31        L<https://web.archive.org/web/20190713230858/http://www.cipa.jp/std/documents/e/DC-007_E.pdf>
     32        for the official specification.
    3033    },
    3134    0xb000 => 'MPFVersion',
     
    7073            },
    7174        ],
    72     },         
     75    },
    7376    0xb202 => 'PanOverlapH',
    7477    0xb203 => 'PanOverlapV',
     
    9598        The first MPF "Large Thumbnail" image is extracted as PreviewImage, and the
    9699        rest of the embedded MPF images are extracted as MPImage#.  The
    97         ExtractEmbedded (-ee) option may be used to extract information from these
     100        L<ExtractEmbedded|../ExifTool.html#ExtractEmbedded> (-ee) option may be used to extract information from these
    98101        embedded images.
    99102    },
     
    103106        Mask => 0xf8000000,
    104107        PrintConv => { BITMASK => {
    105             29 => 'Representative image',
    106             30 => 'Dependent child image',
    107             31 => 'Dependent parent image',
     108            2 => 'Representative image',
     109            3 => 'Dependent child image',
     110            4 => 'Dependent parent image',
    108111        }},
    109112    },
     
    152155# extract MP Images as composite tags
    153156%Image::ExifTool::MPF::Composite = (
    154     GROUPS => { 2 => 'Image' },
     157    GROUPS => { 2 => 'Preview' },
    155158    MPImage => {
    156159        Require => {
     
    161164        Notes => q{
    162165            the first MPF "Large Thumbnail" is extracted as PreviewImage, and the rest
    163             of the embedded MPF images are extracted as MPImage#.  The ExtractEmbedded
     166            of the embedded MPF images are extracted as MPImage#.  The L<ExtractEmbedded|../ExifTool.html#ExtractEmbedded>
    164167            option may be used to extract information from these embedded images.
    165168        },
     
    167170        RawConv => q{
    168171            require Image::ExifTool::MPF;
     172            @grps = $self->GetGroup($$val{0});  # set groups from input tag
    169173            Image::ExifTool::MPF::ExtractMPImages($self);
    170174        },
     
    181185sub ExtractMPImages($)
    182186{
    183     my $exifTool = shift;
    184     my $ee = $exifTool->Options('ExtractEmbedded');
    185     my $saveBinary = $exifTool->Options('Binary');
     187    my $et = shift;
     188    my $ee = $et->Options('ExtractEmbedded');
     189    my $saveBinary = $et->Options('Binary');
    186190    my ($i, $didPreview, $xtra);
    187191
    188192    for ($i=1; $xtra or not defined $xtra; ++$i) {
    189193        # run through MP images in the same order they were extracted
    190         $xtra = defined $$exifTool{VALUE}{"MPImageStart ($i)"} ? " ($i)" : '';
    191         my $off = $exifTool->GetValue("MPImageStart$xtra");
    192         my $len = $exifTool->GetValue("MPImageLength$xtra");
     194        $xtra = defined $$et{VALUE}{"MPImageStart ($i)"} ? " ($i)" : '';
     195        my $off = $et->GetValue("MPImageStart$xtra", 'ValueConv');
     196        my $len = $et->GetValue("MPImageLength$xtra", 'ValueConv');
    193197        if ($off and $len) {
    194             my $type = $exifTool->GetValue("MPImageType$xtra", 'ValueConv');
     198            my $type = $et->GetValue("MPImageType$xtra", 'ValueConv');
    195199            my $tag = "MPImage$i";
    196200            # store first "Large Thumbnail" as a PreviewImage
     
    199203                $didPreview = 1;
    200204            }
    201             $exifTool->Options('Binary', 1) if $ee;
    202             my $val = Image::ExifTool::Exif::ExtractImage($exifTool, $off, $len, $tag);
    203             $exifTool->Options('Binary', $saveBinary) if $ee;
     205            $et->Options('Binary', 1) if $ee;
     206            my $val = Image::ExifTool::Exif::ExtractImage($et, $off, $len, $tag);
     207            $et->Options('Binary', $saveBinary) if $ee;
    204208            next unless defined $val;
    205209            unless ($Image::ExifTool::Extra{$tag}) {
    206                 Image::ExifTool::AddTagToTable(\%Image::ExifTool::Extra, $tag, {
     210                AddTagToTable(\%Image::ExifTool::Extra, $tag, {
    207211                    Name => $tag,
    208                     Groups => { 0 => 'Composite', 1 => 'Composite', 2 => 'Image'},
     212                    Groups => { 0 => 'Composite', 1 => 'Composite', 2 => 'Preview'},
    209213                });
    210214            }
    211             my $key = $exifTool->FoundTag($tag, $val);
    212             # set groups for PreviewImage
    213             if ($tag eq 'PreviewImage') {
    214                 $exifTool->SetGroup($key, 'Composite', 0);
    215                 $exifTool->SetGroup($key, 'Composite');
    216             }
     215            my $key = $et->FoundTag($tag, $val, $et->GetGroup("MPImageStart$xtra"));
    217216            # extract information from MP images if ExtractEmbedded option used
    218217            if ($ee) {
    219                 $$exifTool{DOC_NUM} = $i;
    220                 $exifTool->ExtractInfo($val, { ReEntry => 1 });
    221                 delete $$exifTool{DOC_NUM};
     218                my $oldBase = $$et{BASE};
     219                $$et{BASE} = $off;
     220                $$et{DOC_NUM} = $i;
     221                $et->ExtractInfo($val, { ReEntry => 1 });
     222                delete $$et{DOC_NUM};
     223                $$et{BASE} = $oldBase;
    222224            }
    223225        }
     
    232234sub ProcessMPImageList($$$)
    233235{
    234     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     236    my ($et, $dirInfo, $tagTablePtr) = @_;
    235237    my $num = int($$dirInfo{DirLen} / 16); # (16 bytes per MP Entry)
    236238    $$dirInfo{DirLen} = 16;
    237239    my ($i, $success);
    238     my $oldG1 = $$exifTool{SET_GROUP1};
     240    my $oldG1 = $$et{SET_GROUP1};
    239241    for ($i=0; $i<$num; ++$i) {
    240         $$exifTool{SET_GROUP1} = '+' . ($i + 1);
    241         $success = $exifTool->ProcessBinaryData($dirInfo, $tagTablePtr);
     242        $$et{SET_GROUP1} = '+' . ($i + 1);
     243        $success = $et->ProcessBinaryData($dirInfo, $tagTablePtr);
    242244        $$dirInfo{DirStart} += 16;
    243245    }
    244     $$exifTool{SET_GROUP1} = $oldG1;
     246    $$et{SET_GROUP1} = $oldG1;
    245247    return $success;
    246248}
     
    265267=head1 AUTHOR
    266268
    267 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     269Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    268270
    269271This library is free software; you can redistribute it and/or modify it
     
    274276=over 4
    275277
    276 =item L<http://www.cipa.jp/english/hyoujunka/kikaku/pdf/DC-007_E.pdf>
     278=item L<http://www.cipa.jp/std/documents/e/DC-007_E.pdf>
    277279
    278280=back
Note: See TracChangeset for help on using the changeset viewer.