source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/BigTIFF.pm

Last change on this file was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 11.3 KB
Line 
1#------------------------------------------------------------------------------
2# File: BigTIFF.pm
3#
4# Description: Read Big TIFF meta information
5#
6# Revisions: 07/03/2007 - P. Harvey Created
7#
8# References: 1) http://www.awaresystems.be/imaging/tiff/bigtiff.html
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::BigTIFF;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16use Image::ExifTool::Exif;
17
18$VERSION = '1.07';
19
20my $maxOffset = 0x7fffffff; # currently supported maximum data offset/size
21
22#------------------------------------------------------------------------------
23# Process Big IFD directory
24# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref
25# Returns: 1 on success, otherwise returns 0 and sets a Warning
26sub ProcessBigIFD($$$)
27{
28 my ($et, $dirInfo, $tagTablePtr) = @_;
29 my $raf = $$dirInfo{RAF};
30 my $verbose = $$et{OPTIONS}{Verbose};
31 my $htmlDump = $$et{HTML_DUMP};
32 my $dirName = $$dirInfo{DirName};
33 my $dirStart = $$dirInfo{DirStart};
34 my ($offName, $nextOffName);
35
36 if ($htmlDump) {
37 $verbose = -1; # mix htmlDump into verbose so we can test for both at once
38 $offName = $$dirInfo{OffsetName};
39 }
40
41 # loop through IFD chain
42 for (;;) {
43 if ($dirStart > $maxOffset and not $et->Options('LargeFileSupport')) {
44 $et->Warn('Huge offsets not supported (LargeFileSupport not set)');
45 last;
46 }
47 unless ($raf->Seek($dirStart, 0)) {
48 $et->Warn("Bad $dirName offset");
49 return 0;
50 }
51 my ($dirBuff, $index);
52 unless ($raf->Read($dirBuff, 8) == 8) {
53 $et->Warn("Truncated $dirName count");
54 return 0;
55 }
56 my $numEntries = Image::ExifTool::Get64u(\$dirBuff, 0);
57 $verbose > 0 and $et->VerboseDir($dirName, $numEntries);
58 my $bsize = $numEntries * 20;
59 if ($bsize > $maxOffset) {
60 $et->Warn('Huge directory counts not yet supported');
61 last;
62 }
63 my $bufPos = $raf->Tell();
64 unless ($raf->Read($dirBuff, $bsize) == $bsize) {
65 $et->Warn("Truncated $dirName directory");
66 return 0;
67 }
68 my $nextIFD;
69 $raf->Read($nextIFD, 8) == 8 or undef $nextIFD; # try to read next IFD pointer
70 if ($htmlDump) {
71 $et->HDump($bufPos-8, 8, "$dirName entries", "Entry count: $numEntries", undef, $offName);
72 if (defined $nextIFD) {
73 my $off = Image::ExifTool::Get64u(\$nextIFD, 0);
74 my $tip = sprintf("Offset: 0x%.8x", $off);
75 my $id = $offName;
76 ($nextOffName, $id) = Image::ExifTool::Exif::NextOffsetName($et, $id) if $off;
77 $et->HDump($bufPos + 20 * $numEntries, 8, "Next IFD", $tip, 0, $id);
78 }
79 }
80 # loop through all entries in this BigTIFF IFD
81 for ($index=0; $index<$numEntries; ++$index) {
82 my $entry = 20 * $index;
83 my $tagID = Get16u(\$dirBuff, $entry);
84 my $format = Get16u(\$dirBuff, $entry+2);
85 my $count = Image::ExifTool::Get64u(\$dirBuff, $entry+4);
86 my $formatSize = $Image::ExifTool::Exif::formatSize[$format];
87 unless (defined $formatSize) {
88 $et->HDump($bufPos+$entry,20,"[invalid IFD entry]",
89 "Bad format value: $format", 1, $offName);
90 # warn unless the IFD was just padded with zeros
91 $et->Warn(sprintf("Unknown format ($format) for $dirName tag 0x%x",$tagID));
92 return 0; # assume corrupted IFD
93 }
94 my $formatStr = $Image::ExifTool::Exif::formatName[$format];
95 my $size = $count * $formatSize;
96 my $tagInfo = $et->GetTagInfo($tagTablePtr, $tagID);
97 next unless defined $tagInfo or $verbose;
98 my $valuePtr = $entry + 12;
99 my ($valBuff, $valBase, $rational, $subOffName);
100 if ($size > 8) {
101 if ($size > $maxOffset) {
102 $et->Warn("Can't handle $dirName entry $index (huge size)");
103 next;
104 }
105 $valuePtr = Image::ExifTool::Get64u(\$dirBuff, $valuePtr);
106 if ($valuePtr > $maxOffset and not $et->Options('LargeFileSupport')) {
107 $et->Warn("Can't handle $dirName entry $index (LargeFileSupport not set)");
108 next;
109 }
110 unless ($raf->Seek($valuePtr, 0) and $raf->Read($valBuff, $size) == $size) {
111 $et->Warn("Error reading $dirName entry $index");
112 next;
113 }
114 $valBase = 0;
115 } else {
116 $valBuff = substr($dirBuff, $valuePtr, $size);
117 $valBase = $bufPos;
118 }
119 if (defined $tagInfo and not $tagInfo) {
120 # GetTagInfo() required the value for a Condition
121 $tagInfo = $et->GetTagInfo($tagTablePtr, $tagID, \$valBuff);
122 }
123 my $val = ReadValue(\$valBuff, 0, $formatStr, $count, $size, \$rational);
124 if ($htmlDump) {
125 my $tval = $val;
126 # show numerator/denominator separately for rational numbers
127 $tval .= " ($rational)" if defined $rational;
128 my ($tagName, $colName);
129 if ($tagID == 0x927c and $dirName eq 'ExifIFD') {
130 $tagName = 'MakerNotes';
131 } elsif ($tagInfo) {
132 $tagName = $$tagInfo{Name};
133 } else {
134 $tagName = sprintf("Tag 0x%.4x",$tagID);
135 }
136 my $dname = sprintf("$dirName-%.2d", $index);
137 # build our tool tip
138 my $tip = sprintf("Tag ID: 0x%.4x\n", $tagID) .
139 "Format: $formatStr\[$count]\nSize: $size bytes\n";
140 if ($size > 8) {
141 $tip .= sprintf("Value offset: 0x%.8x\n", $valuePtr);
142 $colName = "<span class=H>$tagName</span>";
143 } else {
144 $colName = $tagName;
145 }
146 $tval = substr($tval,0,28) . '[...]' if length($tval) > 32;
147 if ($formatStr =~ /^(string|undef|binary)/) {
148 # translate non-printable characters
149 $tval =~ tr/\x00-\x1f\x7f-\xff/./;
150 } elsif ($tagInfo and Image::ExifTool::IsInt($tval)) {
151 if ($$tagInfo{IsOffset}) {
152 $tval = sprintf('0x%.4x', $tval);
153 } elsif ($$tagInfo{PrintHex}) {
154 $tval = sprintf('0x%x', $tval);
155 }
156 }
157 $tip .= "Value: $tval";
158 my ($id, $sid);
159 if ($tagInfo and $$tagInfo{SubIFD}) {
160 ($subOffName, $id, $sid) = Image::ExifTool::Exif::NextOffsetName($et, $offName);
161 } else {
162 $id = $offName;
163 }
164 $et->HDump($entry+$bufPos, 20, "$dname $colName", $tip, 1, $id);
165 if ($size > 8) {
166 # add value data block
167 my $flg = ($tagInfo and $$tagInfo{SubDirectory} and $$tagInfo{MakerNotes}) ? 4 : 0;
168 $et->HDump($valuePtr,$size,"$tagName value",'SAME', $flg, $sid);
169 }
170 }
171 if ($tagInfo and $$tagInfo{SubIFD}) {
172 # process all SubIFD's as BigTIFF
173 $verbose > 0 and $et->VerboseInfo($tagID, $tagInfo,
174 Table => $tagTablePtr,
175 Index => $index,
176 Value => $val,
177 DataPt => \$valBuff,
178 DataPos => $valBase + $valuePtr,
179 Start => 0,
180 Size => $size,
181 Format => $formatStr,
182 Count => $count,
183 );
184 my @offsets = split ' ', $val;
185 my $i;
186 for ($i=0; $i<scalar(@offsets); ++$i) {
187 my $subdirName = $$tagInfo{Name};
188 $subdirName .= $i if $i;
189 my %subdirInfo = (
190 RAF => $raf,
191 DataPos => 0,
192 DirStart => $offsets[$i],
193 DirName => $subdirName,
194 Parent => $dirName,
195 OffsetName => $subOffName,
196 );
197 $et->ProcessDirectory(\%subdirInfo, $tagTablePtr, \&ProcessBigIFD);
198 }
199 } else {
200 my $tagKey = $et->HandleTag($tagTablePtr, $tagID, $val,
201 Index => $index,
202 DataPt => \$valBuff,
203 DataPos => $valBase + $valuePtr,
204 Start => 0,
205 Size => $size,
206 Format => $formatStr,
207 TagInfo => $tagInfo,
208 RAF => $raf,
209 );
210 $tagKey and $et->SetGroup($tagKey, $dirName);
211 }
212 }
213 last unless $dirName =~ /^(IFD|SubIFD)(\d*)$/;
214 $dirName = $1 . (($2 || 0) + 1);
215 defined $nextIFD or $et->Warn("Bad $dirName pointer"), return 0;
216 $dirStart = Image::ExifTool::Get64u(\$nextIFD, 0);
217 $dirStart or last;
218 $offName = $nextOffName;
219 }
220 return 1;
221}
222
223#------------------------------------------------------------------------------
224# Extract meta information from a BigTIFF image
225# Inputs: 0) ExifTool object reference, 1) dirInfo reference
226# Returns: 1 on success, 0 if this wasn't a valid BigTIFF image
227sub ProcessBTF($$)
228{
229 my ($et, $dirInfo) = @_;
230 my $raf = $$dirInfo{RAF};
231 my $buff;
232
233 return 0 unless $raf->Read($buff, 16) == 16;
234 return 0 unless $buff =~ /^(MM\0\x2b\0\x08\0\0|II\x2b\0\x08\0\0\0)/;
235 if ($$dirInfo{OutFile}) {
236 $et->Error('ExifTool does not support writing of BigTIFF images');
237 return 1;
238 }
239 $et->SetFileType('BTF'); # set the FileType tag
240 SetByteOrder(substr($buff, 0, 2));
241 my $offset = Image::ExifTool::Get64u(\$buff, 8);
242 if ($$et{HTML_DUMP}) {
243 my $o = (GetByteOrder() eq 'II') ? 'Little' : 'Big';
244 $et->HDump(0, 8, "BigTIFF header", "Byte order: $o endian", 0);
245 $et->HDump(8, 8, "IFD0 pointer", sprintf("Offset: 0x%.8x",$offset), 0);
246 }
247 my %dirInfo = (
248 RAF => $raf,
249 DataPos => 0,
250 DirStart => $offset,
251 DirName => 'IFD0',
252 Parent => 'BigTIFF',
253 );
254 my $tagTablePtr = GetTagTable('Image::ExifTool::Exif::Main');
255 $et->ProcessDirectory(\%dirInfo, $tagTablePtr, \&ProcessBigIFD);
256 return 1;
257}
258
2591; # end
260
261__END__
262
263=head1 NAME
264
265Image::ExifTool::BigTIFF - Read Big TIFF meta information
266
267=head1 SYNOPSIS
268
269This module is used by Image::ExifTool
270
271=head1 DESCRIPTION
272
273This module contains routines required by Image::ExifTool to read meta
274information in BigTIFF images.
275
276=head1 AUTHOR
277
278Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
279
280This library is free software; you can redistribute it and/or modify it
281under the same terms as Perl itself.
282
283=head1 REFERENCES
284
285=over 4
286
287=item L<http://www.awaresystems.be/imaging/tiff/bigtiff.html>
288
289=back
290
291=head1 SEE ALSO
292
293L<Image::ExifTool::TagNames/EXIF Tags>,
294L<Image::ExifTool(3pm)|Image::ExifTool>
295
296=cut
297
Note: See TracBrowser for help on using the repository browser.