Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/RSRC.pm

    r24107 r34921  
    1515use Image::ExifTool qw(:DataAccess :Utils);
    1616
    17 $VERSION = '1.02';
     17$VERSION = '1.09';
     18
     19sub ProcessRSRC($$);
    1820
    1921# Information decoded from Mac OS resources
    2022%Image::ExifTool::RSRC::Main = (
    2123    GROUPS => { 2 => 'Document' },
     24    PROCESS_PROC => \&ProcessRSRC,
    2225    NOTES => q{
    23         Tags extracted from Mac OS resource files and DFONT files.  These tags may
    24         also be extracted from the resource fork of any file in OS X, either by
    25         adding "/rsrc" to the filename to process the resource fork alone, or by
    26         using the ExtractEmbedded (-ee) option to process the resource fork as a
    27         sub-document of the main file.
     26        Tags extracted from Mac OS resource files, DFONT files and "._" sidecar
     27        files.  These tags may also be extracted from the resource fork of any file
     28        in OS X, either by adding "/..namedfork/rsrc" to the filename to process the
     29        resource fork alone, or by using the L<ExtractEmbedded|../ExifTool.html#ExtractEmbedded> (-ee) option to process
     30        the resource fork as a sub-document of the main file.  When writing,
     31        ExifTool preserves the Mac OS resource fork by default, but it may deleted
     32        with C<-rsrc:all=> on the command line.
    2833    },
    2934    '8BIM' => {
     
    3439        Name => 'Font',
    3540        SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' },
     41    },
     42    # my samples of postscript-type DFONT files have a POST resource
     43    # with ID 0x1f5 and the same format as a PostScript file
     44    'POST_0x01f5' => {
     45        Name => 'PostscriptFont',
     46        SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' },
    3647    },
    3748    'usro_0x0000' => 'OpenWithApplication',
     
    5566sub ProcessRSRC($$)
    5667{
    57     my ($exifTool, $dirInfo) = @_;
     68    my ($et, $dirInfo) = @_;
    5869    my $raf = $$dirInfo{RAF};
    5970    my ($hdr, $map, $buff, $i, $j);
     71
     72    # allow access with data reference
     73    $raf or $raf = new File::RandomAccess($$dirInfo{DataPt});
    6074
    6175    # attempt to validate the format as thoroughly as practical
     
    7488    my $typeOff = Get16u(\$map, 24);
    7589    my $nameOff = Get16u(\$map, 26);
    76     my $numTypes = Get16u(\$map, 28);
     90    my $numTypes = (Get16u(\$map, 28) + 1) & 0xffff;
    7791
    7892    # validate offsets in the resource map
    7993    return 0 if $typeOff < 28 or $nameOff < 30;
    8094
    81     $exifTool->SetFileType('RSRC') unless $$exifTool{IN_RESOURCE};
    82     my $verbose = $exifTool->Options('Verbose');
     95    $et->SetFileType('RSRC') unless $$et{IN_RESOURCE};
     96    my $verbose = $et->Options('Verbose');
    8397    my $tagTablePtr = GetTagTable('Image::ExifTool::RSRC::Main');
     98    $et->VerboseDir('RSRC', $numTypes);
    8499
    85100    # parse resource type list
    86     for ($i=0; $i<=$numTypes; ++$i) {
     101    for ($i=0; $i<$numTypes; ++$i) {
    87102        my $off = $typeOff + 2 + 8 * $i;    # offset of entry in type list
    88103        last if $off + 8 > $mapLen;
     
    109124            if ($tagInfo or $verbose) {
    110125                unless ($raf->Seek($resOff, 0) and $raf->Read($buff, 4) == 4 and
    111                         ($valLen = unpack('N', $buff)) < 1024000 and # arbitrary size limit
     126                        ($valLen = unpack('N', $buff)) < 100000000 and # arbitrary size limit (100MB)
    112127                        $raf->Read($val, $valLen) == $valLen)
    113128                {
    114                     $exifTool->Warn("Error reading $resType resource");
     129                    $et->Warn("Error reading $resType resource");
    115130                    next;
    116131                }
     
    120135                $resName = '' unless $raf->Seek($resNameOff, 0) and $raf->Read($buff, 1) and
    121136                    ($nameLen = ord $buff) != 0 and $raf->Read($resName, $nameLen) == $nameLen;
    122                 $exifTool->VPrint(0,sprintf("$resType resource ID 0x%.4x (offset 0x%.4x, $valLen bytes, name='$resName'):\n", $id, $resOff));
    123                 $exifTool->VerboseDump(\$val);
     137                $et->VPrint(0,sprintf("%s resource ID 0x%.4x (offset 0x%.4x, $valLen bytes, name='%s'):\n",
     138                    $resType, $id, $resOff, $resName));
     139                $et->VerboseDump(\$val);
    124140            }
    125141            next unless $tagInfo;
     
    133149                next if $p + $vlen > $valLen;
    134150                my $tagTablePtr = GetTagTable('Image::ExifTool::RSRC::Main');
    135                 $val = $exifTool->Decode(substr($val, $p, $vlen), 'MacRoman');
     151                $val = $et->Decode(substr($val, $p, $vlen), 'MacRoman');
    136152            } elsif ($resType eq 'sfnt') {
    137153                # parse the OTF font block
     
    139155                $$dirInfo{Base} = $resOff + 4;
    140156                require Image::ExifTool::Font;
    141                 unless (Image::ExifTool::Font::ProcessOTF($exifTool, $dirInfo)) {
    142                     $exifTool->Warn('Unrecognized sfnt resource format');
     157                unless (Image::ExifTool::Font::ProcessOTF($et, $dirInfo)) {
     158                    $et->Warn('Unrecognized sfnt resource format');
    143159                }
    144                 $exifTool->OverrideFileType('DFONT');
     160                # assume this is a DFONT file unless processing the rsrc fork
     161                $et->OverrideFileType('DFONT') unless $$et{DOC_NUM};
    145162                next;
    146163            } elsif ($resType eq '8BIM') {
    147164                my $ttPtr = GetTagTable('Image::ExifTool::Photoshop::Main');
    148                 $exifTool->HandleTag($ttPtr, $id, $val,
     165                $et->HandleTag($ttPtr, $id, $val,
    149166                    DataPt  => \$val,
    150167                    DataPos => $resOff + 4,
     
    177194                }
    178195                $val = \@vals;
     196            } elsif ($resType eq 'POST') {
     197                # assume this is a DFONT file unless processing the rsrc fork
     198                $et->OverrideFileType('DFONT') unless $$et{DOC_NUM};
     199                $val = substr $val, 2;
    179200            } elsif ($resType ne 'TEXT') {
    180201                next;
    181202            }
    182             $exifTool->HandleTag($tagTablePtr, $tag, $val);
     203            $et->HandleTag($tagTablePtr, $tag, $val);
    183204        }
    184205    }
     
    205226=head1 AUTHOR
    206227
    207 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     228Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    208229
    209230This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.