source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/RSRC.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 9.6 KB
Line 
1#------------------------------------------------------------------------------
2# File: RSRC.pm
3#
4# Description: Read Mac OS Resource information
5#
6# Revisions: 2010/03/17 - P. Harvey Created
7#
8# References: 1) http://developer.apple.com/legacy/mac/library/documentation/mac/MoreToolbox/MoreToolbox-99.html
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::RSRC;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16
17$VERSION = '1.09';
18
19sub ProcessRSRC($$);
20
21# Information decoded from Mac OS resources
22%Image::ExifTool::RSRC::Main = (
23 GROUPS => { 2 => 'Document' },
24 PROCESS_PROC => \&ProcessRSRC,
25 NOTES => q{
26 Tags extracted from Mac OS resource files, DFONT files and "._" sidecar
27 files. These tags may also be extracted from the resource fork of any file
28 in OS X, either by adding "/..namedfork/rsrc" to the filename to process the
29 resource fork alone, or by using the L<ExtractEmbedded|../ExifTool.html#ExtractEmbedded> (-ee) option to process
30 the resource fork as a sub-document of the main file. When writing,
31 ExifTool preserves the Mac OS resource fork by default, but it may deleted
32 with C<-rsrc:all=> on the command line.
33 },
34 '8BIM' => {
35 Name => 'PhotoshopInfo',
36 SubDirectory => { TagTable => 'Image::ExifTool::Photoshop::Main' },
37 },
38 'sfnt' => {
39 Name => 'Font',
40 SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' },
41 },
42 # my samples of postscript-type DFONT files have a POST resource
43 # with ID 0x1f5 and the same format as a PostScript file
44 'POST_0x01f5' => {
45 Name => 'PostscriptFont',
46 SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' },
47 },
48 'usro_0x0000' => 'OpenWithApplication',
49 'vers_0x0001' => 'ApplicationVersion',
50 'STR _0xbff3' => 'ApplicationMissingMsg',
51 'STR _0xbff4' => 'CreatorApplication',
52 # the following written by Photoshop
53 # (ref http://www.adobe.ca/devnet/photoshop/psir/ps_image_resources.pdf)
54 'STR#_0x0080' => 'Keywords',
55 'TEXT_0x0080' => 'Description',
56 # don't extract PICT's because the clip region isn't set properly
57 # in the PICT resource for some reason. Also, a dummy 512-byte
58 # header would have to be added to create a valid PICT file.
59 # 'PICT' => { Name => 'PreviewPICT', Binary => 1 },
60);
61
62#------------------------------------------------------------------------------
63# Read information from a Mac resource file (ref 1)
64# Inputs: 0) ExifTool ref, 1) dirInfo ref
65# Returns: 1 on success, 0 if this wasn't a valid resource file
66sub ProcessRSRC($$)
67{
68 my ($et, $dirInfo) = @_;
69 my $raf = $$dirInfo{RAF};
70 my ($hdr, $map, $buff, $i, $j);
71
72 # allow access with data reference
73 $raf or $raf = new File::RandomAccess($$dirInfo{DataPt});
74
75 # attempt to validate the format as thoroughly as practical
76 return 0 unless $raf->Read($hdr, 30) == 30;
77 my ($datOff, $mapOff, $datLen, $mapLen) = unpack('N*', $hdr);
78 return 0 unless $raf->Seek(0, 2);
79 my $fLen = $raf->Tell();
80 return 0 if $datOff < 0x10 or $datOff + $datLen > $fLen;
81 return 0 if $mapOff < 0x10 or $mapOff + $mapLen > $fLen or $mapLen < 30;
82 return 0 if $datOff < $mapOff and $datOff + $datLen > $mapOff;
83 return 0 if $mapOff < $datOff and $mapOff + $mapLen > $datOff;
84
85 # read the resource map
86 $raf->Seek($mapOff, 0) and $raf->Read($map, $mapLen) == $mapLen or return 0;
87 SetByteOrder('MM');
88 my $typeOff = Get16u(\$map, 24);
89 my $nameOff = Get16u(\$map, 26);
90 my $numTypes = (Get16u(\$map, 28) + 1) & 0xffff;
91
92 # validate offsets in the resource map
93 return 0 if $typeOff < 28 or $nameOff < 30;
94
95 $et->SetFileType('RSRC') unless $$et{IN_RESOURCE};
96 my $verbose = $et->Options('Verbose');
97 my $tagTablePtr = GetTagTable('Image::ExifTool::RSRC::Main');
98 $et->VerboseDir('RSRC', $numTypes);
99
100 # parse resource type list
101 for ($i=0; $i<$numTypes; ++$i) {
102 my $off = $typeOff + 2 + 8 * $i; # offset of entry in type list
103 last if $off + 8 > $mapLen;
104 my $resType = substr($map,$off,4); # resource type
105 my $resNum = Get16u(\$map,$off+4); # number of resources - 1
106 my $refOff = Get16u(\$map,$off+6) + $typeOff; # offset to first resource reference
107 # loop through all resources
108 for ($j=0; $j<=$resNum; ++$j) {
109 my $roff = $refOff + 12 * $j;
110 last if $roff + 12 > $mapLen;
111 # read only the 24-bit resource data offset
112 my $id = Get16u(\$map,$roff);
113 my $resOff = (Get32u(\$map,$roff+4) & 0x00ffffff) + $datOff;
114 my $resNameOff = Get16u(\$map,$roff+2) + $nameOff + $mapOff;
115 my ($tag, $val, $valLen);
116 my $tagInfo = $$tagTablePtr{$resType};
117 if ($tagInfo) {
118 $tag = $resType;
119 } else {
120 $tag = sprintf('%s_0x%.4x', $resType, $id);
121 $tagInfo = $$tagTablePtr{$tag};
122 }
123 # read the resource data if necessary
124 if ($tagInfo or $verbose) {
125 unless ($raf->Seek($resOff, 0) and $raf->Read($buff, 4) == 4 and
126 ($valLen = unpack('N', $buff)) < 100000000 and # arbitrary size limit (100MB)
127 $raf->Read($val, $valLen) == $valLen)
128 {
129 $et->Warn("Error reading $resType resource");
130 next;
131 }
132 }
133 if ($verbose) {
134 my ($resName, $nameLen);
135 $resName = '' unless $raf->Seek($resNameOff, 0) and $raf->Read($buff, 1) and
136 ($nameLen = ord $buff) != 0 and $raf->Read($resName, $nameLen) == $nameLen;
137 $et->VPrint(0,sprintf("%s resource ID 0x%.4x (offset 0x%.4x, $valLen bytes, name='%s'):\n",
138 $resType, $id, $resOff, $resName));
139 $et->VerboseDump(\$val);
140 }
141 next unless $tagInfo;
142 if ($resType eq 'vers') {
143 # parse the 'vers' resource to get the long version string
144 next unless $valLen > 8;
145 # long version string is after short version
146 my $p = 7 + Get8u(\$val, 6);
147 next if $p >= $valLen;
148 my $vlen = Get8u(\$val, $p++);
149 next if $p + $vlen > $valLen;
150 my $tagTablePtr = GetTagTable('Image::ExifTool::RSRC::Main');
151 $val = $et->Decode(substr($val, $p, $vlen), 'MacRoman');
152 } elsif ($resType eq 'sfnt') {
153 # parse the OTF font block
154 $raf->Seek($resOff + 4, 0) or next;
155 $$dirInfo{Base} = $resOff + 4;
156 require Image::ExifTool::Font;
157 unless (Image::ExifTool::Font::ProcessOTF($et, $dirInfo)) {
158 $et->Warn('Unrecognized sfnt resource format');
159 }
160 # assume this is a DFONT file unless processing the rsrc fork
161 $et->OverrideFileType('DFONT') unless $$et{DOC_NUM};
162 next;
163 } elsif ($resType eq '8BIM') {
164 my $ttPtr = GetTagTable('Image::ExifTool::Photoshop::Main');
165 $et->HandleTag($ttPtr, $id, $val,
166 DataPt => \$val,
167 DataPos => $resOff + 4,
168 Size => $valLen,
169 Start => 0,
170 Parent => 'RSRC',
171 );
172 next;
173 } elsif ($resType eq 'STR ' and $valLen > 1) {
174 # extract Pascal string
175 my $len = ord $val;
176 next unless $valLen >= $len + 1;
177 $val = substr($val, 1, $len);
178 } elsif ($resType eq 'usro' and $valLen > 4) {
179 my $len = unpack('N', $val);
180 next unless $valLen >= $len + 4;
181 ($val = substr($val, 4, $len)) =~ s/\0.*//g; # truncate at null
182 } elsif ($resType eq 'STR#' and $valLen > 2) {
183 # extract list of strings (ref http://simtech.sourceforge.net/tech/strings.html)
184 my $num = unpack('n', $val);
185 next if $num & 0xf000; # (ignore special-format STR# resources)
186 my ($i, @vals);
187 my $pos = 2;
188 for ($i=0; $i<$num; ++$i) {
189 last if $pos >= $valLen;
190 my $len = ord substr($val, $pos++, 1);
191 last if $pos + $len > $valLen;
192 push @vals, substr($val, $pos, $len);
193 $pos += $len;
194 }
195 $val = \@vals;
196 } elsif ($resType eq 'POST') {
197 # assume this is a DFONT file unless processing the rsrc fork
198 $et->OverrideFileType('DFONT') unless $$et{DOC_NUM};
199 $val = substr $val, 2;
200 } elsif ($resType ne 'TEXT') {
201 next;
202 }
203 $et->HandleTag($tagTablePtr, $tag, $val);
204 }
205 }
206 return 1;
207}
208
2091; # end
210
211__END__
212
213=head1 NAME
214
215Image::ExifTool::RSRC - Read Mac OS Resource information
216
217=head1 SYNOPSIS
218
219This module is used by Image::ExifTool
220
221=head1 DESCRIPTION
222
223This module contains routines required by Image::ExifTool to read Mac OS
224resource files.
225
226=head1 AUTHOR
227
228Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
229
230This library is free software; you can redistribute it and/or modify it
231under the same terms as Perl itself.
232
233=head1 REFERENCES
234
235=over 4
236
237=item L<http://developer.apple.com/legacy/mac/library/documentation/mac/MoreToolbox/MoreToolbox-99.html>
238
239=back
240
241=head1 SEE ALSO
242
243L<Image::ExifTool::TagNames/RSRC Tags>,
244L<Image::ExifTool(3pm)|Image::ExifTool>
245
246=cut
247
Note: See TracBrowser for help on using the repository browser.