Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/LNK.pm

    r24107 r34921  
    1616use Image::ExifTool qw(:DataAccess :Utils);
    1717
    18 $VERSION = '1.03';
     18$VERSION = '1.07';
    1919
    2020sub ProcessItemID($$$);
     
    2525    PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
    2626    GROUPS => { 2 => 'Other' },
     27    VARS => { HEX_ID => 1 },    # print hex ID's in documentation
    2728    NOTES => 'Information extracted from MS Shell Link (Windows shortcut) files.',
    2829    # maybe the Flags aren't very useful to the user (since they are
     
    236237    PROCESS_PROC => \&ProcessItemID,
    237238    # (can't find any documentation on these items)
    238     0x0032 => { 
     239    0x0032 => {
    239240        Name => 'Item0032',
    240241        SubDirectory => { TagTable => 'Image::ExifTool::LNK::Item0032' },
     
    380381        RawConv => q{
    381382            $val = $self->Decode($val, 'UCS2');
    382             $val =~ s/\0.*//;
     383            $val =~ s/\0.*//s;
    383384            return length($val) ? $val : undef;
    384385        },
     
    460461sub ProcessItemID($$$)
    461462{
    462     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     463    my ($et, $dirInfo, $tagTablePtr) = @_;
    463464    my $dataPt = $$dirInfo{DataPt};
    464465    my $dataLen = length $$dataPt;
     
    468469        DataPos => $$dirInfo{DataPos},
    469470    );
    470     $exifTool->VerboseDir('ItemID', undef, $dataLen);
     471    $et->VerboseDir('ItemID', undef, $dataLen);
    471472    for (;;) {
    472473        last if $pos + 4 >= $dataLen;
     
    474475        last if $size < 2 or $pos + $size > $dataLen;
    475476        my $tag = Get16u($dataPt, $pos+2); # (just a guess -- may not be a tag at all)
    476         Image::ExifTool::AddTagToTable($tagTablePtr, $tag, {
     477        AddTagToTable($tagTablePtr, $tag, {
    477478            Name => sprintf('Item%.4x', $tag),
    478479            SubDirectory => { TagTable => 'Image::ExifTool::LNK::UnknownData' },
    479480        }) unless $$tagTablePtr{$tag};
    480         $exifTool->HandleTag($tagTablePtr, $tag, undef, %opts, Start => $pos, Size => $size);
     481        $et->HandleTag($tagTablePtr, $tag, undef, %opts, Start => $pos, Size => $size);
    481482        $pos += $size;
    482483    }
     
    489490sub ProcessLinkInfo($$$)
    490491{
    491     my ($exifTool, $dirInfo, $tagTablePtr) = @_;
     492    my ($et, $dirInfo, $tagTablePtr) = @_;
    492493    my $dataPt = $$dirInfo{DataPt};
    493494    my $dataLen = length $$dataPt;
     
    501502    );
    502503    my ($off, $unicode, $pos, $val, $size);
    503     $exifTool->VerboseDir('LinkInfo', undef, $dataLen);
     504    $et->VerboseDir('LinkInfo', undef, $dataLen);
    504505    if ($lif & 0x01) {
    505506        # read Volume ID
     
    507508        if ($off + 0x20 <= $dataLen) {
    508509            # my $len = Get32u($dataPt, $off);
    509             $exifTool->HandleTag($tagTablePtr, 'DriveType', undef, %opts, Start=>$off+4);
     510            $et->HandleTag($tagTablePtr, 'DriveType', undef, %opts, Start=>$off+4);
    510511            $pos = Get32u($dataPt, $off + 0x0c);
    511512            if ($pos == 0x14) {
     
    518519            if (defined $val) {
    519520                $size = length $val;
    520                 $val = $exifTool->Decode($val, 'UCS2') if $unicode;
    521                 $exifTool->HandleTag($tagTablePtr, 'VolumeLabel', $val, %opts, Start=>$pos, Size=>$size);
     521                $val = $et->Decode($val, 'UCS2') if $unicode;
     522                $et->HandleTag($tagTablePtr, 'VolumeLabel', $val, %opts, Start=>$pos, Size=>$size);
    522523            }
    523524        }
     
    533534        if (defined $val) {
    534535            $size = length $val;
    535             $val = $exifTool->Decode($val, 'UCS2') if $unicode;
    536             $exifTool->HandleTag($tagTablePtr, 'LocalBasePath', $val, %opts, Start=>$pos, Size=>$size);
     536            $val = $et->Decode($val, 'UCS2') if $unicode;
     537            $et->HandleTag($tagTablePtr, 'LocalBasePath', $val, %opts, Start=>$pos, Size=>$size);
    537538        }
    538539    }
     
    552553            if (defined $val) {
    553554                $size = length $val;
    554                 $val = $exifTool->Decode($val, 'UCS2') if $unicode;
    555                 $exifTool->HandleTag($tagTablePtr, 'NetName', $val, %opts, Start=>$pos, Size=>$size);
     555                $val = $et->Decode($val, 'UCS2') if $unicode;
     556                $et->HandleTag($tagTablePtr, 'NetName', $val, %opts, Start=>$pos, Size=>$size);
    556557            }
    557558            my $flg = Get32u($dataPt, $off + 0x04);
     
    567568                if (defined $val) {
    568569                    $size = length $val;
    569                     $val = $exifTool->Decode($val, 'UCS2') if $unicode;
    570                     $exifTool->HandleTag($tagTablePtr, 'DeviceName', $val, %opts, Start=>$pos, Size=>$size);
     570                    $val = $et->Decode($val, 'UCS2') if $unicode;
     571                    $et->HandleTag($tagTablePtr, 'DeviceName', $val, %opts, Start=>$pos, Size=>$size);
    571572                }
    572573            }
    573574            if ($flg & 0x02) {
    574575                $val = Get32u($dataPt, $off + 0x10);
    575                 $exifTool->HandleTag($tagTablePtr, 'NetProviderType', $val, %opts, Start=>$off + 0x10);
     576                $et->HandleTag($tagTablePtr, 'NetProviderType', $val, %opts, Start=>$off + 0x10);
    576577            }
    577578        }
     
    586587sub ProcessLNK($$)
    587588{
    588     my ($exifTool, $dirInfo) = @_;
     589    my ($et, $dirInfo) = @_;
    589590    my $raf = $$dirInfo{RAF};
    590591    my ($buff, $buf2, $len, $i);
     
    599600        $buff .= $buf2;
    600601    }
    601     $exifTool->SetFileType('Windows Shortcut');
     602    $et->SetFileType();
    602603    SetByteOrder('II');
    603604
     
    609610        DirLen => length $buff,
    610611    );
    611     $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
     612    $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
    612613
    613614    my $flags = Get32u(\$buff, 0x14);
     
    618619        $len = unpack('v', $buff);
    619620        $raf->Read($buff, $len) == $len or return 1;
    620         $exifTool->HandleTag($tagTablePtr, 0x10000, undef,
     621        $et->HandleTag($tagTablePtr, 0x10000, undef,
    621622            DataPt  => \$buff,
    622623            DataPos => $raf->Tell() - $len,
     
    632633        $raf->Read($buf2, $len - 4) == $len - 4 or return 1;
    633634        $buff .= $buf2;
    634         $exifTool->HandleTag($tagTablePtr, 0x20000, undef,
     635        $et->HandleTag($tagTablePtr, 0x20000, undef,
    635636            DataPt  => \$buff,
    636637            DataPos => $raf->Tell() - $len,
     
    647648        $raf->Read($buff, 2) or return 1;
    648649        $len = unpack('v', $buff);
    649         $len *= 2 if $flags & 0x80;  # characters are 2 bytes if Unicode flag is set   
     650        $len *= 2 if $flags & 0x80;  # characters are 2 bytes if Unicode flag is set
    650651        $raf->Read($buff, $len) or return 1;
    651652        my $val;
    652         $val = $exifTool->Decode($buff, 'UCS2') if $flags & 0x80;
    653         $exifTool->HandleTag($tagTablePtr, 0x30000 | $mask, $val,
     653        $val = $et->Decode($buff, 'UCS2') if $flags & 0x80;
     654        $et->HandleTag($tagTablePtr, 0x30000 | $mask, $val,
    654655            DataPt  => \$buff,
    655656            DataPos => $raf->Tell() - $len,
     
    671672            $tagInfo = $$tagTablePtr{0xa0000000};
    672673        }
    673         $exifTool->HandleTag($tagTablePtr, $tag, undef,
     674        $et->HandleTag($tagTablePtr, $tag, undef,
    674675            DataPt  => \$buff,
    675676            DataPos => $raf->Tell() - $len - 4,
     
    699700=head1 AUTHOR
    700701
    701 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     702Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    702703
    703704This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.