Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Matroska.pm

    r24107 r34921  
    1515use Image::ExifTool qw(:DataAccess :Utils);
    1616
    17 $VERSION = '1.04';
     17$VERSION = '1.10';
    1818
    1919my %noYes = ( 0 => 'No', 1 => 'Yes' );
     
    2424%Image::ExifTool::Matroska::Main = (
    2525    GROUPS => { 2 => 'Video' },
     26    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
    2627    NOTES => q{
    2728        The following tags are extracted from Matroska multimedia container files.
    2829        This container format is used by file types such as MKA, MKV, MKS and WEBM.
    2930        For speed, ExifTool extracts tags only up to the first Cluster unless the
    30         Verbose (-v) or Unknown = 2 (-U) option is used.  See
     31        L<Verbose|../ExifTool.html#Verbose> (-v) or L<Unknown|../ExifTool.html#Unknown> = 2 (-U) option is used.  See
    3132        L<http://www.matroska.org/technical/specs/index.html> for the official
    3233        Matroska specification.
     
    151152    },
    152153    0x67 => {
    153         Name => 'Timecode',
     154        Name => 'TimeCode',
    154155        Format => 'unsigned',
    155156        Unknown => 1,
     
    221222    0x2e => {
    222223        Name => 'TrackEntry',
     224        # reset TrackType member at the start of each track
     225        Condition => 'delete $$self{TrackType}; 1',
    223226        SubDirectory => { TagTable => 'Image::ExifTool::Matroska::Main' },
    224227    },
     
    632635    0x487 => { Name => 'TagString',         Format => 'utf8' },
    633636    0x485 => { Name => 'TagBinary',         Binary => 1 },
     637#
     638# Spherical Video V2 (untested)
     639#
     640    0x7670 => {
     641        Name => 'Projection',
     642        SubDirectory => { TagTable => 'Image::ExifTool::Matroska::Projection' },
     643    },
     644);
     645
     646# Spherical video v2 projection tags (ref https://github.com/google/spatial-media/blob/master/docs/spherical-video-v2-rfc.md)
     647%Image::ExifTool::Matroska::Projection = (
     648    GROUPS => { 2 => 'Video' },
     649    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
     650    NOTES => q{
     651        Projection tags defined by the Spherical Video V2 specification.  See
     652        L<https://github.com/google/spatial-media/blob/master/docs/spherical-video-v2-rfc.md>
     653        for the specification.
     654    },
     655    0x7671 => {
     656        Name => 'ProjectionType',
     657        Format => 'unsigned',
     658        DataMember => 'ProjectionType',
     659        RawConv => '$$self{ProjectionType} = $val',
     660        PrintConv => {
     661            0 => 'Rectangular',
     662            1 => 'Equirectangular',
     663            2 => 'Cubemap',
     664            3 => 'Mesh',
     665        },
     666    },
     667    0x7672 => [{
     668        Name => 'EquirectangularProj',
     669        Condition => '$$self{ProjectionType} == 1',
     670        SubDirectory => { TagTable => 'Image::ExifTool::QuickTime::equi' },
     671    },{
     672        Name => 'CubemapProj',
     673        Condition => '$$self{ProjectionType} == 2',
     674        SubDirectory => { TagTable => 'Image::ExifTool::QuickTime::cbmp' },
     675    }],
     676    0x7673 => { Name => 'ProjectionPosYaw',   Format => 'float' },
     677    0x7674 => { Name => 'ProjectionPosPitch', Format => 'float' },
     678    0x7675 => { Name => 'ProjectionPosRoll',  Format => 'float' },
    634679);
    635680
     
    673718sub ProcessMKV($$)
    674719{
    675     my ($exifTool, $dirInfo) = @_;
     720    my ($et, $dirInfo) = @_;
    676721    my $raf = $$dirInfo{RAF};
    677722    my ($buff, $buf2, @dirEnd, $trackIndent, %trackTypes);
     
    688733    my $hlen = GetVInt($buff, $pos);
    689734    return 0 unless $hlen and $hlen > 0;
    690     $pos + $hlen > $dataLen and $exifTool->Warn('Truncated Matroska header'), return 1;
    691     $exifTool->SetFileType();
     735    $pos + $hlen > $dataLen and $et->Warn('Truncated Matroska header'), return 1;
     736    $et->SetFileType();
    692737    SetByteOrder('MM');
    693738    my $tagTablePtr = GetTagTable('Image::ExifTool::Matroska::Main');
    694739
    695740    # set flag to process entire file (otherwise we stop at the first Cluster)
    696     my $verbose = $exifTool->Options('Verbose');
    697     my $processAll = ($verbose or $exifTool->Options('Unknown') > 1);
    698     $$exifTool{TrackTypes} = \%trackTypes;  # store Track types reference
    699     my $oldIndent = $$exifTool{INDENT};
     741    my $verbose = $et->Options('Verbose');
     742    my $processAll = ($verbose or $et->Options('Unknown') > 1);
     743    $$et{TrackTypes} = \%trackTypes;  # store Track types reference
     744    my $oldIndent = $$et{INDENT};
    700745    my $chapterNum = 0;
    701746
     
    705750            pop @dirEnd;
    706751            # use INDENT to decide whether or not we are done this Track element
    707             delete $$exifTool{SET_GROUP1} if $trackIndent and $trackIndent eq $$exifTool{INDENT};
    708             $$exifTool{INDENT} = substr($$exifTool{INDENT}, 0, -2);
     752            delete $$et{SET_GROUP1} if $trackIndent and $trackIndent eq $$et{INDENT};
     753            $$et{INDENT} = substr($$et{INDENT}, 0, -2);
    709754        }
    710755        # read more if we are getting close to the end of our buffer
     
    724769        $size < 0 and $unknownSize = 1, $size = 1e20;
    725770        if (@dirEnd and $pos + $dataPos + $size > $dirEnd[-1][0]) {
    726             $exifTool->Warn("Invalid or corrupted $dirEnd[-1][1] master element");
     771            $et->Warn("Invalid or corrupted $dirEnd[-1][1] master element");
    727772            $pos = $dirEnd[-1][0] - $dataPos;
    728773            if ($pos < 0 or $pos > $dataLen) {
     
    735780            next;
    736781        }
    737         my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $tag);
     782        my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag);
    738783        # just fall through into the contained EBML elements
    739784        if ($tagInfo and $$tagInfo{SubDirectory}) {
    740785            # stop processing at first cluster unless we are in verbose mode
    741786            last if $$tagInfo{Name} eq 'Cluster' and not $processAll;
    742             $$exifTool{INDENT} .= '| ';
    743             $exifTool->VerboseDir($$tagTablePtr{$tag}{Name}, undef, $size);
     787            $$et{INDENT} .= '| ';
     788            $et->VerboseDir($$tagTablePtr{$tag}{Name}, undef, $size);
    744789            push @dirEnd, [ $pos + $dataPos + $size, $$tagInfo{Name} ];
    745790            if ($$tagInfo{Name} eq 'ChapterAtom') {
    746                 $$exifTool{SET_GROUP1} = 'Chapter' . (++$chapterNum);
    747                 $trackIndent = $$exifTool{INDENT};
     791                $$et{SET_GROUP1} = 'Chapter' . (++$chapterNum);
     792                $trackIndent = $$et{INDENT};
    748793            }
    749794            next;
     
    756801            if (not $tagInfo or $more > 10000000) {
    757802                # don't try to skip very large blocks unless LargeFileSupport is enabled
    758                 last if $more > 0x80000000 and not $exifTool->Options('LargeFileSupport');
     803                last if $more >= 0x80000000 and not $et->Options('LargeFileSupport');
    759804                $raf->Seek($more, 1) or last;
    760805                $buff = '';
     
    786831            if ($fmt eq 'string' or $fmt eq 'utf8') {
    787832                ($val = substr($buff, $pos, $size)) =~ s/\0.*//s;
    788                 $val = $exifTool->Decode($val, 'UTF8') if $fmt eq 'utf8';
     833                $val = $et->Decode($val, 'UTF8') if $fmt eq 'utf8';
    789834            } elsif ($fmt eq 'float') {
    790835                if ($size == 4) {
     
    793838                    $val = GetDouble(\$buff, $pos);
    794839                } else {
    795                     $exifTool->Warn("Illegal float size ($size)");
     840                    $et->Warn("Illegal float size ($size)");
    796841                }
    797842            } else {
     
    821866            # set group1 to Track/Chapter number
    822867            if ($$tagInfo{Name} eq 'TrackNumber') {
    823                 $$exifTool{SET_GROUP1} = 'Track' . $val;
    824                 $trackIndent = $$exifTool{INDENT};
     868                $$et{SET_GROUP1} = 'Track' . $val;
     869                $trackIndent = $$et{INDENT};
    825870            }
    826871        }
     
    832877        );
    833878        if ($$tagInfo{NoSave}) {
    834             $exifTool->VerboseInfo($tag, $tagInfo, Value => $val, %parms) if $verbose;
     879            $et->VerboseInfo($tag, $tagInfo, Value => $val, %parms) if $verbose;
    835880        } else {
    836             $exifTool->HandleTag($tagTablePtr, $tag, $val, %parms);
     881            $et->HandleTag($tagTablePtr, $tag, $val, %parms);
    837882        }
    838883        $pos += $size;  # step to next element
    839884    }
    840     $$exifTool{INDENT} = $oldIndent;
    841     delete $$exifTool{SET_GROUP1};
     885    $$et{INDENT} = $oldIndent;
     886    delete $$et{SET_GROUP1};
    842887    # override file type if necessary based on existing track types
    843888    unless ($trackTypes{0x01} or $trackTypes{0x03}) {   # video or complex?
    844889        if ($trackTypes{0x02}) {                        # audio?
    845             $exifTool->OverrideFileType('MKA');
     890            $et->OverrideFileType('MKA');
    846891        } elsif ($trackTypes{0x11}) {                   # subtitle?
    847             $exifTool->OverrideFileType('MKS');
     892            $et->OverrideFileType('MKS');
    848893        }
    849894    }
     
    870915=head1 AUTHOR
    871916
    872 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     917Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    873918
    874919This library is free software; you can redistribute it and/or modify it
     
    889934
    890935=cut
    891 
Note: See TracChangeset for help on using the changeset viewer.