Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/FLAC.pm

    r24107 r34921  
    1515use Image::ExifTool qw(:DataAccess :Utils);
    1616
    17 $VERSION = '1.03';
     17$VERSION = '1.08';
    1818
    1919sub ProcessBitStream($$$);
     
    2929        SubDirectory => { TagTable => 'Image::ExifTool::FLAC::StreamInfo' },
    3030    },
     31    1 => { Name => 'Padding',     Binary => 1, Unknown => 1 },
     32    2 => { Name => 'Application', Binary => 1, Unknown => 1 },
     33    3 => { Name => 'SeekTable',   Binary => 1, Unknown => 1 },
    3134    4 => {
    3235        Name => 'VorbisComment',
    3336        SubDirectory => { TagTable => 'Image::ExifTool::Vorbis::Comments' },
    3437    },
     38    5 => { Name => 'CueSheet',    Binary => 1, Unknown => 1 },
    3539    6 => {
    3640        Name => 'Picture',
    3741        SubDirectory => { TagTable => 'Image::ExifTool::FLAC::Picture' },
    3842    },
     43    # 7-126 - Reserved
     44    # 127 - Invalid
    3945);
    4046
     
    5763    },
    5864    'Bit108-143' => 'TotalSamples',
     65    'Bit144-271' => { #Tim Eliseo
     66        Name => 'MD5Signature',
     67        Format => 'undef',
     68        ValueConv => 'unpack("H*",$val)',
     69    },
    5970);
    6071
     
    105116    8 => {
    106117        Name => 'Picture',
     118        Groups => { 2 => 'Preview' },
    107119        Format => 'undef[$val{7}]',
    108120        Binary => 1,
     
    117129            1 => 'FLAC:TotalSamples',
    118130        },
    119         ValueConv => '$val[0] and $val[1] ? $val[1] / $val[0] : undef',
     131        ValueConv => '($val[0] and $val[1]) ? $val[1] / $val[0] : undef',
    120132        PrintConv => 'ConvertDuration($val)',
    121133    },
     
    131143# Notes: Byte order is used to determine the ordering of bits in the stream:
    132144# 'MM' = bit 0 is most significant, 'II' = bit 0 is least significant
    133 # - can handle arbitrarily wide values (ie. 8-byte or larger integers)
     145# - can handle arbitrarily wide values (eg. 8-byte or larger integers)
    134146sub ProcessBitStream($$$)
    135147{
    136     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     148    my ($et, $dirInfo, $tagTablePtr) = @_;
    137149    my $dataPt   = $$dirInfo{DataPt};
    138150    my $dataPos  = $$dirInfo{DataPos};
    139151    my $dirStart = $$dirInfo{DirStart} || 0;
    140152    my $dirLen   = $$dirInfo{DirLen} || (length($$dataPt) - $dirStart);
    141     my $verbose  = $exifTool->Options('Verbose');
     153    my $verbose  = $et->Options('Verbose');
    142154    my $byteOrder = GetByteOrder();
    143155    my $tag;
    144156
    145157    if ($verbose) {
    146         $exifTool->VPrint(0, "  + [BitStream directory, $dirLen bytes, '$byteOrder' order]\n");
     158        $et->VPrint(0, "  + [BitStream directory, $dirLen bytes, '${byteOrder}' order]\n");
    147159    }
    148160    foreach $tag (sort keys %$tagTablePtr) {
     
    152164        my ($f1, $f2) = ($b1 % 8, $b2 % 8); # start/end bit numbers within each byte
    153165        last if $i2 >= $dirLen;
    154         my $val = 0;
    155         my ($i, $mask, $extra);
    156         $extra = ', Mask=0x' if $verbose and ($f1 != 0 or $f2 != 7);
    157         if ($byteOrder eq 'MM') {
    158             # loop from high byte to low byte
    159             for ($i=$i1; $i<=$i2; ++$i) {
    160                 $mask = 0xff;
    161                 if ($i == $i1 and $f1) {
    162                     # mask off high bits in first word (0 is high bit)
    163                     foreach ((8-$f1) .. 7) { $mask ^= (1 << $_) }
     166        my ($val, $extra);
     167        # if Format is unspecified, convert the specified number of bits to an unsigned integer,
     168        # otherwise allow HandleTag to convert whole bytes the normal way (via undefined $val)
     169        if (ref $$tagTablePtr{$tag} ne 'HASH' or not $$tagTablePtr{$tag}{Format}) {
     170            my ($i, $mask);
     171            $val = 0;
     172            $extra = ', Mask=0x' if $verbose and ($f1 != 0 or $f2 != 7);
     173            if ($byteOrder eq 'MM') {
     174                # loop from high byte to low byte
     175                for ($i=$i1; $i<=$i2; ++$i) {
     176                    $mask = 0xff;
     177                    if ($i == $i1 and $f1) {
     178                        # mask off high bits in first word (0 is high bit)
     179                        foreach ((8-$f1) .. 7) { $mask ^= (1 << $_) }
     180                    }
     181                    if ($i == $i2 and $f2 < 7) {
     182                        # mask off low bits in last word (7 is low bit)
     183                        foreach (0 .. (6-$f2)) { $mask ^= (1 << $_) }
     184                    }
     185                    $val = $val * 256 + ($mask & Get8u($dataPt, $i + $dirStart));
     186                    $extra .= sprintf('%.2x', $mask) if $extra;
    164187                }
    165                 if ($i == $i2 and $f2 < 7) {
    166                     # mask off low bits in last word (7 is low bit)
    167                     foreach (0 .. (6-$f2)) { $mask ^= (1 << $_) }
     188            } else {
     189                # (FLAC is big-endian, but support little-endian bit streams
     190                #  so this routine can be used by other modules)
     191                # loop from high byte to low byte
     192                for ($i=$i2; $i>=$i1; --$i) {
     193                    $mask = 0xff;
     194                    if ($i == $i1 and $f1) {
     195                        # mask off low bits in first word (0 is low bit)
     196                        foreach (0 .. ($f1-1)) { $mask ^= (1 << $_) }
     197                    }
     198                    if ($i == $i2 and $f2 < 7) {
     199                        # mask off high bits in last word (7 is high bit)
     200                        foreach (($f2+1) .. 7) { $mask ^= (1 << $_) }
     201                    }
     202                    $val = $val * 256 + ($mask & Get8u($dataPt, $i + $dirStart));
     203                    $extra .= sprintf('%.2x', $mask) if $extra;
    168204                }
    169                 $val = $val * 256 + ($mask & Get8u($dataPt, $i + $dirStart));
    170                 $extra .= sprintf('%.2x', $mask) if $extra;
    171205            }
    172         } else {
    173             # (FLAC is big-endian, but support little-endian bit streams
    174             #  so this routine can be used by other modules)
    175             # loop from high byte to low byte
    176             for ($i=$i2; $i>=$i1; --$i) {
    177                 $mask = 0xff;
    178                 if ($i == $i1 and $f1) {
    179                     # mask off low bits in first word (0 is low bit)
    180                     foreach (0 .. ($f1-1)) { $mask ^= (1 << $_) }
    181                 }
    182                 if ($i == $i2 and $f2 < 7) {
    183                     # mask off high bits in last word (7 is high bit)
    184                     foreach (($f2+1) .. 7) { $mask ^= (1 << $_) }
    185                 }
    186                 $val = $val * 256 + ($mask & Get8u($dataPt, $i + $dirStart));
    187                 $extra .= sprintf('%.2x', $mask) if $extra;
     206            # shift word down until low bit is in position 0
     207            until ($mask & 0x01) {
     208                $val /= 2;
     209                $mask >>= 1;
    188210            }
    189211        }
    190         # shift word down until low bit is in position 0
    191         until ($mask & 0x01) {
    192             $val /= 2;
    193             $mask >>= 1;
    194         }
    195         $exifTool->HandleTag($tagTablePtr, $tag, $val,
     212        $et->HandleTag($tagTablePtr, $tag, $val,
    196213            DataPt  => $dataPt,
    197214            DataPos => $dataPos,
     
    210227sub ProcessFLAC($$)
    211228{
    212     my ($exifTool, $dirInfo) = @_;
     229    my ($et, $dirInfo) = @_;
    213230
    214231    # must first check for leading/trailing ID3 information
    215     unless ($exifTool->{DoneID3}) {
     232    unless ($$et{DoneID3}) {
    216233        require Image::ExifTool::ID3;
    217         Image::ExifTool::ID3::ProcessID3($exifTool, $dirInfo) and return 1;
     234        Image::ExifTool::ID3::ProcessID3($et, $dirInfo) and return 1;
    218235    }
    219236    my $raf = $$dirInfo{RAF};
    220     my $verbose = $exifTool->Options('Verbose');
    221     my $out = $exifTool->Options('TextOut');
     237    my $verbose = $et->Options('Verbose');
     238    my $out = $et->Options('TextOut');
    222239    my ($buff, $err);
    223240
    224241    # check FLAC signature
    225242    $raf->Read($buff, 4) == 4 and $buff eq 'fLaC' or return 0;
    226     $exifTool->SetFileType();
     243    $et->SetFileType();
    227244    SetByteOrder('MM');
    228245    my $tagTablePtr = GetTagTable('Image::ExifTool::FLAC::Main');
     
    237254        if ($verbose) {
    238255            print $out "FLAC metadata block, type $tag:\n";
    239             $exifTool->VerboseDump(\$buff, DataPos => $raf->Tell() - $size);
     256            $et->VerboseDump(\$buff, DataPos => $raf->Tell() - $size);
    240257        }
    241         $exifTool->HandleTag($tagTablePtr, $tag, undef,
     258        $et->HandleTag($tagTablePtr, $tag, undef,
    242259            DataPt  => \$buff,
    243260            DataPos => $raf->Tell() - $size,
     
    245262        last if $last;   # all done if  is set
    246263    }
    247     $err and $exifTool->Warn('Format error in FLAC file');
     264    $err and $et->Warn('Format error in FLAC file');
    248265    return 1;
    249266}
     
    268285=head1 AUTHOR
    269286
    270 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     287Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    271288
    272289This library is free software; you can redistribute it and/or modify it
     
    284301
    285302L<Image::ExifTool::TagNames/FLAC Tags>,
     303L<Image::ExifTool::TagNames/Ogg Tags>,
    286304L<Image::ExifTool(3pm)|Image::ExifTool>
    287305
Note: See TracChangeset for help on using the changeset viewer.