source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Validate.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 29.7 KB
Line 
1#------------------------------------------------------------------------------
2# File: Validate.pm
3#
4# Description: Additional metadata validation
5#
6# Created: 2017/01/18 - P. Harvey
7#
8# Notes: My apologies for the convoluted logic contained herein, but it
9# is done this way to retro-fit the Validate feature into the
10# existing ExifTool code while reducing the possibility of
11# introducing bugs or slowing down processing when this feature
12# is not used.
13#------------------------------------------------------------------------------
14
15package Image::ExifTool::Validate;
16
17use strict;
18use vars qw($VERSION %exifSpec);
19
20$VERSION = '1.18';
21
22use Image::ExifTool qw(:Utils);
23use Image::ExifTool::Exif;
24
25# EXIF table tag ID's which are part of the EXIF 2.32 specification
26# (with ExifVersion numbers for tags where I can determine the version)
27# (also used by BuildTagLookup to add underlines in HTML version of EXIF Tag Table)
28%exifSpec = (
29 0x1 => 210,
30 0x100 => 1, 0x8298 => 1, 0x9207 => 1, 0xa217 => 1,
31 0x101 => 1, 0x829a => 1, 0x9208 => 1, 0xa300 => 1,
32 0x102 => 1, 0x829d => 1, 0x9209 => 1, 0xa301 => 1,
33 0x103 => 1, 0x8769 => 1, 0x920a => 1, 0xa302 => 1,
34 0x106 => 1, 0x8822 => 1, 0x9214 => 220, 0xa401 => 220,
35 0x10e => 1, 0x8824 => 1, 0x927c => 1, 0xa402 => 220,
36 0x10f => 1, 0x8825 => 200, 0x9286 => 1, 0xa403 => 220,
37 0x110 => 1, 0x8827 => 1, 0x9290 => 1, 0xa404 => 220,
38 0x111 => 1, 0x8828 => 1, 0x9291 => 1, 0xa405 => 220,
39 0x112 => 1, 0x8830 => 230, 0x9292 => 1, 0xa406 => 220,
40 0x115 => 1, 0x8831 => 230, 0x9400 => 231, 0xa407 => 220,
41 0x116 => 1, 0x8832 => 230, 0x9401 => 231, 0xa408 => 220,
42 0x117 => 1, 0x8833 => 230, 0x9402 => 231, 0xa409 => 220,
43 0x11a => 1, 0x8834 => 230, 0x9403 => 231, 0xa40a => 220,
44 0x11b => 1, 0x8835 => 230, 0x9404 => 231, 0xa40b => 220,
45 0x11c => 1, 0x9000 => 1, 0x9405 => 231, 0xa40c => 220,
46 0x128 => 1, 0x9003 => 1, 0xa000 => 1, 0xa460 => 232,
47 0x12d => 1, 0x9004 => 1, 0xa001 => 1, 0xa461 => 232,
48 0x131 => 1, 0x9010 => 231, 0xa002 => 1, 0xa462 => 232,
49 0x132 => 1, 0x9011 => 231, 0xa003 => 1, 0xa420 => 220,
50 0x13b => 1, 0x9012 => 231, 0xa004 => 1, 0xa430 => 230,
51 0x13e => 1, 0x9101 => 1, 0xa005 => 210, 0xa431 => 230,
52 0x13f => 1, 0x9102 => 1, 0xa20b => 1, 0xa432 => 230,
53 0x201 => 1, 0x9201 => 1, 0xa20c => 1, 0xa433 => 230,
54 0x202 => 1, 0x9202 => 1, 0xa20e => 1, 0xa434 => 230,
55 0x211 => 1, 0x9203 => 1, 0xa20f => 1, 0xa435 => 230,
56 0x212 => 1, 0x9204 => 1, 0xa210 => 1, 0xa500 => 221,
57 0x213 => 1, 0x9205 => 1, 0xa214 => 1,
58 0x214 => 1, 0x9206 => 1, 0xa215 => 1,
59);
60
61# GPSVersionID numbers when each tag was introduced
62my %gpsVer = (
63 0x01 => 2000, 0x09 => 2000, 0x11 => 2000, 0x19 => 2000,
64 0x02 => 2000, 0x0a => 2000, 0x12 => 2000, 0x1a => 2000,
65 0x03 => 2000, 0x0b => 2000, 0x13 => 2000, 0x1b => 2200,
66 0x04 => 2000, 0x0c => 2000, 0x14 => 2000, 0x1c => 2200,
67 0x05 => 2000, 0x0d => 2000, 0x15 => 2000, 0x1d => 2200,
68 0x06 => 2000, 0x0e => 2000, 0x16 => 2000, 0x1e => 2200,
69 0x07 => 2000, 0x0f => 2000, 0x17 => 2000, 0x1f => 2300,
70 0x08 => 2000, 0x10 => 2000, 0x18 => 2000,
71);
72
73# lookup to check version numbers
74my %verCheck = (
75 ExifIFD => { ExifVersion => \%exifSpec },
76 InteropIFD => { ExifVersion => \%exifSpec },
77 GPS => { GPSVersionID => \%gpsVer },
78);
79
80# tags standard in various RAW file formats
81my %otherSpec = (
82 CR2 => { 0xc5d8 => 1, 0xc5d9 => 1, 0xc5e0 => 1, 0xc640 => 1, 0xc6dc => 1, 0xc6dd => 1 },
83 NEF => { 0x9216 => 1, 0x9217 => 1 },
84 DNG => { 0x882a => 1, 0x9211 => 1, 0x9216 => 1 },
85 ARW => { 0x7000 => 1, 0x7001 => 1, 0x7010 => 1, 0x7011 => 1, 0x7020 => 1,
86 0x7031 => 1, 0x7032 => 1, 0x7034 => 1, 0x7035 => 1, 0x7036 => 1, 0x7037 => 1,
87 0x7310 => 1, 0x7313 => 1, 0x7316 => 1, 0x74c7 => 1, 0x74c8 => 1, 0xa500 => 1 },
88 RW2 => { All => 1 }, # ignore all unknown tags in RW2
89 RWL => { All => 1 },
90 RAF => { All => 1 }, # (temporary)
91 DCR => { All => 1 },
92 KDC => { All => 1 },
93 JXR => { All => 1 },
94 SRW => { 0xa010 => 1, 0xa011 => 1, 0xa101 => 1, 0xa102 => 1 },
95 NRW => { 0x9216 => 1, 0x9217 => 1 },
96 X3F => { 0xa500 => 1 },
97);
98
99# standard format for tags (not necessary for exifSpec or GPS tags where Writable is defined)
100my %stdFormat = (
101 ExifIFD => {
102 0xa002 => 'int(16|32)u',
103 0xa003 => 'int(16|32)u',
104 },
105 InteropIFD => {
106 0x01 => 'string',
107 0x02 => 'undef',
108 0x1000 => 'string',
109 0x1001 => 'int(16|32)u',
110 0x1002 => 'int(16|32)u',
111 },
112 IFD => {
113 # TIFF, EXIF, XMP, IPTC, ICC_Profile and PrintIM standard tags:
114 0xfe => 'int32u', 0x11f => 'rational64u', 0x14a => 'int32u', 0x205 => 'int16u',
115 0xff => 'int16u', 0x120 => 'int32u', 0x14c => 'int16u', 0x206 => 'int16u',
116 0x100 => 'int(16|32)u', 0x121 => 'int32u', 0x14d => 'string', 0x207 => 'int32u',
117 0x101 => 'int(16|32)u', 0x122 => 'int16u', 0x14e => 'int16u', 0x208 => 'int32u',
118 0x107 => 'int16u', 0x123 => 'int16u', 0x150 => 'int(8|16)u', 0x209 => 'int32u',
119 0x108 => 'int16u', 0x124 => 'int32u', 0x151 => 'string', 0x211 => 'rational64u',
120 0x109 => 'int16u', 0x125 => 'int32u', 0x152 => 'int16u', 0x212 => 'int16u',
121 0x10a => 'int16u', 0x129 => 'int16u', 0x153 => 'int16u', 0x213 => 'int16u',
122 0x10d => 'string', 0x13c => 'string', 0x154 => '.*', 0x214 => 'rational64u',
123 0x111 => 'int(16|32)u', 0x13d => 'int16u', 0x155 => '.*', 0x2bc => 'int8u',
124 0x116 => 'int(16|32)u', 0x140 => 'int16u', 0x156 => 'int16u', 0x828d => 'int16u',
125 0x117 => 'int(16|32)u', 0x141 => 'int16u', 0x15b => 'undef', 0x828e => 'int8u',
126 0x118 => 'int16u', 0x142 => 'int(16|32)u', 0x200 => 'int16u', 0x83bb => 'int32u',
127 0x119 => 'int16u', 0x143 => 'int(16|32)u', 0x201 => 'int32u', 0x8649 => 'int8u',
128 0x11d => 'string', 0x144 => 'int32u', 0x202 => 'int32u', 0x8773 => 'undef',
129 0x11e => 'rational64u', 0x145 => 'int(16|32)u', 0x203 => 'int16u', 0xc4a5 => 'undef',
130 # Windows Explorer tags:
131 0x9c9b => 'int8u', 0x9c9d => 'int8u', 0x9c9f => 'int8u',
132 0x9c9c => 'int8u', 0x9c9e => 'int8u',
133 # GeoTiff tags:
134 0x830e => 'double', 0x8482 => 'double', 0x87af => 'int16u', 0x87b1 => 'string',
135 0x8480 => 'double', 0x85d8 => 'double', 0x87b0 => 'double',
136 # DNG tags:
137 0xc615 => '(string|int8u)', 0xc6d3 => '',
138 0xc61a => '(int16u|int32u|rational64u)', 0xc6f4 => '(string|int8u)',
139 0xc61d => 'int(16|32)u', 0xc6f6 => '(string|int8u)',
140 0xc61f => '(int16u|int32u|rational64u)', 0xc6f8 => '(string|int8u)',
141 0xc620 => '(int16u|int32u|rational64u)', 0xc6fe => '(string|int8u)',
142 0xc628 => '(int16u|rational64u)', 0xc716 => '(string|int8u)',
143 0xc634 => 'int8u', 0xc717 => '(string|int8u)',
144 0xc640 => '', 0xc718 => '(string|int8u)',
145 0xc660 => '', 0xc71e => 'int(16|32)u',
146 0xc68b => '(string|int8u)', 0xc71f => 'int(16|32)u',
147 0xc68d => 'int(16|32)u', 0xc791 => 'int(16|32)u',
148 0xc68e => 'int(16|32)u', 0xc792 => 'int(16|32)u',
149 0xc6d2 => '', 0xc793 => '(int16u|int32u|rational64u)',
150 },
151);
152
153# generate lookup for any IFD
154my %stdFormatAnyIFD = map { %{$stdFormat{$_}} } keys %stdFormat;
155
156# tag values to validate based on file type (from EXIF specification)
157# - validation code may access $val and %val, and returns 1 on success,
158# or error message otherwise ('' for a generic message)
159# - entry is undef if tag must not exist (same as 'not defined $val' in code)
160my %validValue = (
161 JPEG => {
162 IFD0 => {
163 0x100 => undef, # ImageWidth
164 0x101 => undef, # ImageLength
165 0x102 => undef, # BitsPerSample
166 0x103 => undef, # Compression
167 0x106 => undef, # PhotometricInterpretation
168 0x111 => undef, # StripOffsets
169 0x115 => undef, # SamplesPerPixel
170 0x116 => undef, # RowsPerStrip
171 0x117 => undef, # StripByteCounts
172 0x11a => 'defined $val', # XResolution
173 0x11b => 'defined $val', # YResolution
174 0x11c => undef, # PlanarConfiguration
175 0x128 => '$val =~ /^[123]$/', # ResolutionUnit
176 0x201 => undef, # JPEGInterchangeFormat
177 0x202 => undef, # JPEGInterchangeFormatLength
178 0x212 => undef, # YCbCrSubSampling
179 0x213 => '$val =~ /^[12]$/', # YCbCrPositioning
180 },
181 IFD1 => {
182 0x100 => undef, # ImageWidth
183 0x101 => undef, # ImageLength
184 0x102 => undef, # BitsPerSample
185 0x103 => '$val == 6', # Compression
186 0x106 => undef, # PhotometricInterpretation
187 0x111 => undef, # StripOffsets
188 0x115 => undef, # SamplesPerPixel
189 0x116 => undef, # RowsPerStrip
190 0x117 => undef, # StripByteCounts
191 0x11a => 'defined $val', # XResolution
192 0x11b => 'defined $val', # YResolution
193 0x11c => undef, # PlanarConfiguration
194 0x128 => '$val =~ /^[123]$/', # ResolutionUnit
195 0x201 => 'defined $val', # JPEGInterchangeFormat
196 0x202 => 'defined $val', # JPEGInterchangeFormatLength
197 0x212 => undef, # YCbCrSubSampling
198 },
199 ExifIFD => {
200 0x9000 => 'defined $val and $val =~ /^\d{4}$/', # ExifVersion
201 0x9101 => 'defined $val', # ComponentsConfiguration
202 0xa000 => 'defined $val', # FlashpixVersion
203 0xa001 => '$val == 1 or $val == 0xffff', # ColorSpace
204 0xa002 => 'defined $val', # PixelXDimension
205 0xa003 => 'defined $val', # PixelYDimension
206 },
207 GPS => {
208 0x00 => 'defined $val and $val =~ /^\d \d \d \d$/', # GPSVersionID
209 0x1b => 'not defined $val or $val =~ /^(GPS|CELLID|WLAN|MANUAL)$/', # GPSProcessingMethod
210 },
211 InteropIFD => { }, # (needed for ExifVersion check)
212 },
213 TIFF => {
214 IFD0 => {
215 0x100 => 'defined $val', # ImageWidth
216 0x101 => 'defined $val', # ImageLength
217 # (default is 1) 0x102 => 'defined $val', # BitsPerSample
218 0x103 => q{
219 not defined $val or $val =~ /^(1|5|6|32773)$/ or
220 ($val == 2 and (not defined $val{0x102} or $val{0x102} == 1));
221 }, # Compression
222 0x106 => '$val =~ /^[0123]$/', # PhotometricInterpretation
223 0x111 => 'defined $val', # StripOffsets
224 # SamplesPerPixel
225 0x115 => q{
226 my $pi = $val{0x106} || 0;
227 my $xtra = ($val{0x152} ? scalar(split ' ', $val{0x152}) : 0);
228 if ($pi == 2 or $pi == 6) {
229 return $val == 3 + $xtra;
230 } elsif ($pi == 5) {
231 return $val == 4 + $xtra;
232 } else {
233 return 1;
234 }
235 },
236 0x116 => 'defined $val', # RowsPerStrip
237 0x117 => 'defined $val', # StripByteCounts
238 0x11a => 'defined $val', # XResolution
239 0x11b => 'defined $val', # YResolution
240 0x128 => 'not defined $val or $val =~ /^[123]$/', # ResolutionUnit
241 # ColorMap (must be palette image with correct number of colors)
242 0x140 => q{
243 return '' if defined $val{0x106} and $val{0x106} == 3 xor defined $val;
244 return 1 if not defined $val or length($val) == 6 * 2 ** ($val{0x102} || 0);
245 return 'Invalid count for';
246 },
247 0x201 => undef, # JPEGInterchangeFormat
248 0x202 => undef, # JPEGInterchangeFormatLength
249 },
250 ExifIFD => {
251 0x9000 => 'defined $val', # ExifVersion
252 0x9101 => undef, # ComponentsConfiguration
253 0x9102 => undef, # CompressedBitsPerPixel
254 0xa000 => 'defined $val', # FlashpixVersion
255 0xa001 => '$val == 1 or $val == 0xffff', # ColorSpace
256 0xa002 => undef, # PixelXDimension
257 0xa003 => undef, # PixelYDimension
258 },
259 InteropIFD => {
260 0x0001 => undef, # InteropIndex
261 },
262 GPS => {
263 0x00 => 'defined $val and $val =~ /^\d \d \d \d$/', # GPSVersionID
264 0x1b => '$val =~ /^(GPS|CELLID|WLAN|MANUAL)$/', # GPSProcessingMethod
265 },
266 },
267);
268
269# validity ranges for constrained date/time fields
270my @validDateField = (
271 [ 'Month', 1, 12 ],
272 [ 'Day', 1, 31 ],
273 [ 'Hour', 0, 23 ],
274 [ 'Minutes', 0, 59 ],
275 [ 'Seconds', 0, 59 ],
276 [ 'TZhr', 0, 14 ],
277 [ 'TZmin', 0, 59 ],
278);
279
280# "Validate" tag information
281my %validateInfo = (
282 Groups => { 0 => 'ExifTool', 1 => 'ExifTool', 2 => 'ExifTool' },
283 Notes => q{
284 generated only if specifically requested. Requesting this tag automatically
285 enables the API L<Validate|../ExifTool.html#Validate> option, imposing
286 additional validation checks when extracting metadata. Returns the number
287 of errors, warnings and minor warnings encountered. Note that the Validate
288 feature focuses mainly on validation of EXIF/TIFF metadata
289 },
290 PrintConv => {
291 '0 0 0' => 'OK',
292 OTHER => sub {
293 my @val = split ' ', shift;
294 my @rtn;
295 push @rtn, sprintf('%d Error%s', $val[0], $val[0] == 1 ? '' : 's') if $val[0];
296 push @rtn, sprintf('%d Warning%s', $val[1], $val[1] == 1 ? '' : 's') if $val[1];
297 if ($val[2]) {
298 my $str = ($val[1] == $val[2] ? ($val[1] == 1 ? '' : 'all ') : "$val[2] ");
299 $rtn[-1] .= " (${str}minor)";
300 }
301 return join(' and ', @rtn);
302 },
303 },
304);
305
306# add "Validate" tag to Extra table
307AddTagToTable(\%Image::ExifTool::Extra, Validate => \%validateInfo, 1);
308
309#------------------------------------------------------------------------------
310# Validate the raw value of a tag
311# Inputs: 0) ExifTool ref, 1) tag key, 2) raw tag value
312# Returns: nothing, but issues a minor Warning if a problem was detected
313sub ValidateRaw($$$)
314{
315 my ($self, $tag, $val) = @_;
316 my $tagInfo = $$self{TAG_INFO}{$tag};
317 my $wrn;
318
319 # evaluate Validate code if specified
320 if ($$tagInfo{Validate}) {
321 local $SIG{'__WARN__'} = \&Image::ExifTool::SetWarning;
322 undef $Image::ExifTool::evalWarning;
323 #### eval Validate ($self, $val, $tagInfo)
324 my $wrn = eval $$tagInfo{Validate};
325 my $err = $Image::ExifTool::evalWarning || $@;
326 if ($wrn or $err) {
327 my $name = $$tagInfo{Table}{GROUPS}{0} . ':' . Image::ExifTool::GetTagName($tag);
328 $self->Warn("Validate $name: $err", 1) if $err;
329 $self->Warn("$wrn for $name", 1) if $wrn;
330 }
331 }
332 # check for unknown values in PrintConv lookup for all standard EXIF tags
333 if (ref $$tagInfo{PrintConv} eq 'HASH' and ($$tagInfo{Table}{SHORT_NAME} eq 'GPS::Main' or
334 ($$tagInfo{Table} eq \%Image::ExifTool::Exif::Main and $exifSpec{$$tagInfo{TagID}})))
335 {
336 my $prt = $self->GetValue($tag, 'PrintConv');
337 $wrn = 'Unknown value for' if $prt and $prt =~ /^Unknown \(/;
338 }
339 $wrn = 'Undefined value for' if $val eq 'undef';
340 if ($wrn) {
341 my $name = $$self{DIR_NAME} . ':' . Image::ExifTool::GetTagName($tag);
342 $self->Warn("$wrn $name", 1);
343 }
344}
345
346#------------------------------------------------------------------------------
347# Validate raw EXIF date/time value
348# Inputs: 0) date/time value
349# Returns: error string
350sub ValidateExifDate($)
351{
352 my $val = shift;
353 if ($val =~ /^\d{4}:(\d{2}):(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) {
354 my @a = ($1,$2,$3,$4,$5);
355 my ($i, @bad);
356 for ($i=0; $i<@a; ++$i) {
357 next if $a[$i] eq ' ' or ($a[$i] >= $validDateField[$i][1] and $a[$i] <= $validDateField[$i][2]);
358 push @bad, $validDateField[$i][0];
359 }
360 return join('+', @bad) . ' out of range' if @bad;
361 # the EXIF specification allows blank fields or an entire blank value
362 } elsif ($val ne ' : : : : ' and $val ne ' ') {
363 return 'Invalid date/time format';
364 }
365 return undef; # OK!
366}
367
368#------------------------------------------------------------------------------
369# Validate EXIF-reformatted XMP date/time value
370# Inputs: 0) date/time value
371# Returns: error string
372sub ValidateXMPDate($)
373{
374 my $val = shift;
375 if ($val =~ /^\d{4}$/ or
376 $val =~ /^\d{4}:(\d{2})$/ or
377 $val =~ /^\d{4}:(\d{2}):(\d{2})$/ or
378 $val =~ /^\d{4}:(\d{2}):(\d{2}) (\d{2}):(\d{2})()(Z|[-+](\d{2}):(\d{2}))?$/ or
379 $val =~ /^\d{4}:(\d{2}):(\d{2}) (\d{2}):(\d{2}):(\d{2})(Z|[-+](\d{2}):(\d{2}))?$/ or
380 $val =~ /^\d{4}:(\d{2}):(\d{2}) (\d{2}):(\d{2}):(\d{2})\.?\d*(Z|[-+](\d{2}):(\d{2}))?$/)
381 {
382 my @a = ($1,$2,$3,$4,$5,$7,$8);
383 my ($i, @bad);
384 for ($i=0; $i<@a; ++$i) {
385 last unless defined $a[$i];
386 next if $a[$i] eq '' or ($a[$i] >= $validDateField[$i][1] and $a[$i] <= $validDateField[$i][2]);
387 push @bad, $validDateField[$i][0];
388 }
389 return join('+', @bad) . ' out of range' if @bad;
390 } else {
391 return 'Invalid date/time format';
392 }
393 return undef; # OK!
394}
395
396#------------------------------------------------------------------------------
397# Validate EXIF tag
398# Inputs: 0) ExifTool ref, 1) tag table ref, 2) tag ID, 3) tagInfo ref,
399# 4) previous tag ID, 5) IFD name, 6) number of values, 7) value format string
400# Returns: Nothing, but sets Warning tags if any problems are found
401sub ValidateExif($$$$$$$$)
402{
403 my ($et, $tagTablePtr, $tag, $tagInfo, $lastTag, $ifd, $count, $formatStr) = @_;
404
405 $et->WarnOnce("Entries in $ifd are out of order") if $tag <= $lastTag;
406
407 # (get tagInfo for unknown tags if Unknown option not used)
408 if (not defined $tagInfo and $$tagTablePtr{$tag} and ref $$tagTablePtr{$tag} eq 'HASH') {
409 $tagInfo = $$tagTablePtr{$tag};
410 }
411 if (defined $tagInfo) {
412 my $ti = $tagInfo || $$tagTablePtr{$tag};
413 $ti = $$ti[-1] if ref $ti eq 'ARRAY';
414 my $stdFmt = $stdFormat{$ifd} || $stdFormat{IFD};
415 if (defined $$stdFmt{All} or ($tagTablePtr eq \%Image::ExifTool::Exif::Main and
416 ($exifSpec{$tag} or $$stdFmt{$tag} or
417 ($tag >= 0xc612 and $tag <= 0xc7b5 and not defined $$stdFmt{$tag}))) or # (DNG tags)
418 $$tagTablePtr{SHORT_NAME} eq 'GPS::Main')
419 {
420 my $wgp = $$ti{WriteGroup} || $$tagTablePtr{WRITE_GROUP};
421 if ($wgp and $wgp ne $ifd and $wgp ne 'All' and not $$ti{OffsetPair} and
422 ($ifd =~ /^(Sub|Profile)?IFD\d*$/ xor $wgp =~ /^(Sub)?IFD\d*$/) and
423 ($$ti{Writable} or $$ti{WriteGroup}) and $ifd !~ /^SRF\d+$/)
424 {
425 $et->Warn(sprintf('Wrong IFD for 0x%.4x %s (should be %s not %s)', $tag, $$ti{Name}, $wgp, $ifd));
426 }
427 my $fmt = $$stdFmt{$tag} || $$ti{Writable};
428 if ($fmt and $formatStr !~ /^$fmt$/ and (not $tagInfo or
429 not $$tagInfo{IsOffset} or $Image::ExifTool::Exif::intFormat{$formatStr}))
430 {
431 $et->Warn(sprintf('Non-standard format (%s) for %s 0x%.4x %s', $formatStr, $ifd, $tag, $$ti{Name}))
432 }
433 } elsif ($stdFormatAnyIFD{$tag}) {
434 if ($$ti{Writable} || $$ti{WriteGroup}) {
435 my $wgp = $$ti{WriteGroup} || $$tagTablePtr{WRITE_GROUP};
436 if ($wgp and $wgp ne $ifd) {
437 $et->Warn(sprintf('Wrong IFD for 0x%.4x %s (should be %s not %s)', $tag, $$ti{Name}, $wgp, $ifd));
438 }
439 }
440 } elsif (not $otherSpec{$$et{VALUE}{FileType}} or
441 (not $otherSpec{$$et{VALUE}{FileType}}{$tag} and not $otherSpec{$$et{VALUE}{FileType}}{All}))
442 {
443 if ($tagTablePtr eq \%Image::ExifTool::Exif::Main or $$tagInfo{Unknown}) {
444 $et->Warn(sprintf('Non-standard %s tag 0x%.4x %s', $ifd, $tag, $$ti{Name}), 1);
445 }
446 }
447 # change expected count from read Format to Writable size
448 my $tiCount = $$ti{Count};
449 if ($tiCount) {
450 if ($$ti{Format} and $$ti{Writable} and
451 $Image::ExifTool::Exif::formatNumber{$$ti{Format}} and
452 $Image::ExifTool::Exif::formatNumber{$$ti{Writable}})
453 {
454 my $s1 = $Image::ExifTool::Exif::formatSize[$Image::ExifTool::Exif::formatNumber{$$ti{Format}}];
455 my $s2 = $Image::ExifTool::Exif::formatSize[$Image::ExifTool::Exif::formatNumber{$$ti{Writable}}];
456 $tiCount = int($tiCount * $s1 / $s2);
457 }
458 if ($tiCount > 0 and $count != $tiCount) {
459 $et->Warn(sprintf('Non-standard count (%d) for %s 0x%.4x %s', $count, $ifd, $tag, $$ti{Name}));
460 }
461 }
462 } elsif (not $otherSpec{$$et{VALUE}{FileType}} or
463 (not $otherSpec{$$et{VALUE}{FileType}}{$tag} and not $otherSpec{$$et{VALUE}{FileType}}{All}))
464 {
465 $et->Warn(sprintf('Unknown %s tag 0x%.4x', $ifd, $tag), 1);
466 }
467}
468
469#------------------------------------------------------------------------------
470# Validate image data offsets/sizes
471# Inputs: 0) ExifTool ref, 1) offset info hash ref (arrays of tagInfo/value pairs, keyed by tagID)
472# 2) directory name, 3) optional flag for minor warning
473sub ValidateOffsetInfo($$$;$)
474{
475 local $_;
476 my ($et, $offsetInfo, $dirName, $minor) = @_;
477
478 my $fileSize = $$et{VALUE}{FileSize} or return;
479
480 # (don't test RWZ files and some other file types)
481 return if $$et{DontValidateImageData};
482 # (Minolta A200 uses wrong byte order for these)
483 return if $$et{TIFF_TYPE} eq 'MRW' and $dirName eq 'IFD0' and $$et{Model} =~ /^DiMAGE A200/;
484 # (don't test 3FR, RWL or RW2 files)
485 return if $$et{TIFF_TYPE} =~ /^(3FR|RWL|RW2)$/;
486
487 Image::ExifTool::Exif::ValidateImageData($et, $offsetInfo, $dirName);
488
489 # loop through all offsets
490 while (%$offsetInfo) {
491 my ($id1) = sort keys %$offsetInfo;
492 my $offsets = $$offsetInfo{$id1};
493 delete $$offsetInfo{$id1};
494 next unless ref $offsets eq 'ARRAY';
495 my $id2 = $$offsets[0]{OffsetPair};
496 unless (defined $id2 and $$offsetInfo{$id2}) {
497 unless ($$offsets[0]{NotRealPair} or (defined $id2 and $id2 == -1)) {
498 my $corr = $$offsets[0]{IsOffset} ? 'size' : 'offset';
499 $et->Warn("$dirName:$$offsets[0]{Name} is missing the corresponding $corr tag") unless $minor;
500 }
501 next;
502 }
503 my $sizes = $$offsetInfo{$id2};
504 delete $$offsetInfo{$id2};
505 ($sizes, $offsets) = ($offsets, $sizes) if $$sizes[0]{IsOffset};
506 my @offsets = split ' ', $$offsets[1];
507 my @sizes = split ' ', $$sizes[1];
508 if (@sizes != @offsets) {
509 $et->Warn(sprintf('Wrong number of values in %s 0x%.4x %s',
510 $dirName, $$offsets[0]{TagID}, $$offsets[0]{Name}), $minor);
511 next;
512 }
513 while (@offsets) {
514 my $start = pop @offsets;
515 my $end = $start + pop @sizes;
516 $et->WarnOnce("$dirName:$$offsets[0]{Name} is zero", $minor) if $start == 0;
517 $et->WarnOnce("$dirName:$$sizes[0]{Name} is zero", $minor) if $start == $end;
518 next unless $end > $fileSize;
519 if ($start >= $fileSize) {
520 if ($start == 0xffffffff) {
521 $et->Warn("$dirName:$$offsets[0]{Name} is invalid (0xffffffff)", $minor);
522 } else {
523 $et->Warn("$dirName:$$offsets[0]{Name} is past end of file", $minor);
524 }
525 } else {
526 $et->Warn("$dirName:$$offsets[0]{Name}+$$sizes[0]{Name} runs past end of file", $minor);
527 }
528 last;
529 }
530 }
531}
532
533#------------------------------------------------------------------------------
534# Finish Validating tags
535# Inputs: 0) ExifTool ref, 1) True to generate Validate tag
536sub FinishValidate($$)
537{
538 local $_;
539 my ($et, $mkTag) = @_;
540
541 my $fileType = $$et{FILE_TYPE} || '';
542 $fileType = $$et{TIFF_TYPE} if $fileType eq 'TIFF';
543
544 if ($validValue{$fileType}) {
545 my ($grp, $tag, %val);
546 local $SIG{'__WARN__'} = \&Image::ExifTool::SetWarning;
547 foreach $grp (sort keys %{$validValue{$fileType}}) {
548 next unless $$et{FOUND_DIR}{$grp};
549 my ($key, %val, %info, $minor, $verTag, $ver, $vstr);
550 my $verCheck = $verCheck{$grp};
551 if ($verCheck) {
552 ($verTag) = keys %$verCheck;
553 ($ver = $$et{VALUE}{$verTag}) =~ tr/0-9//dc; # (remove non-digits)
554 undef $ver unless $ver =~ /^\d{4}$/; # (already warned if invalid version)
555 }
556 # get all tags in this group
557 foreach $key (sort keys %{$$et{VALUE}}) {
558 next unless $et->GetGroup($key, 1) eq $grp;
559 next if $$et{TAG_EXTRA}{$key} and $$et{TAG_EXTRA}{$key}{G3}; # ignore sub-documents
560 # fill in %val lookup with values based on tag ID
561 my $tag = $$et{TAG_INFO}{$key}{TagID};
562 $val{$tag} = $$et{VALUE}{$key};
563 # save TagInfo ref for later
564 $info{$tag} = $$et{TAG_INFO}{$key};
565 next unless defined $ver;
566 my $chk = $$verCheck{$verTag};
567 next if not defined $$chk{$tag} or $$chk{$tag} == 1 or $ver >= $$chk{$tag};
568 if ($verTag eq 'GPSVersionID') {
569 ($vstr = $$chk{$tag}) =~ s/^(\d)(\d)(\d)/$1.$2.$3./;
570 } else {
571 $vstr = sprintf('%.4d', $$chk{$tag});
572 }
573 $et->Warn(sprintf('%s tag 0x%.4x %s requires %s %s or higher',
574 $grp, $tag, $$et{TAG_INFO}{$key}{Name}, $verTag, $vstr));
575 }
576 # make quick lookup for values based on tag ID
577 my $validValue = $validValue{$fileType}{$grp};
578 foreach $tag (sort { $a <=> $b } keys %$validValue) {
579 my $val = $val{$tag};
580 my ($pre, $post);
581 if (defined $$validValue{$tag}) {
582 #### eval ($val, %val)
583 my $result = eval $$validValue{$tag};
584 if (not defined $result) {
585 $pre = 'Internal error validating';
586 } elsif ($result eq '') {
587 $pre = defined $val ? 'Invalid value for' : "Missing required $fileType";
588 } else {
589 next if $result eq '1';
590 $pre = $result;
591 }
592 } else {
593 next unless defined $val;
594 $post = "is not allowed in $fileType";
595 $minor = 1;
596 }
597 my $name;
598 if ($info{$tag}) {
599 $name = $info{$tag}{Name};
600 } else {
601 my $table = 'Image::ExifTool::'.($grp eq 'GPS' ? 'GPS' : 'Exif').'::Main';
602 my $tagInfo = GetTagTable($table)->{$tag};
603 $tagInfo = $$tagInfo[0] if ref $tagInfo eq 'ARRAY';
604 $name = $tagInfo ? $$tagInfo{Name} : '<unknown>';
605 }
606 next if $$et{WrongFormat} and $$et{WrongFormat}{"$grp:$name"};
607 $pre ? ($pre .= ' ') : ($pre = '');
608 $post ? ($post = ' '.$post) : ($post = '');
609 $et->Warn(sprintf('%s%s tag 0x%.4x %s%s', $pre, $grp, $tag, $name, $post), $minor);
610 }
611 }
612 }
613 # validate file extension
614 if ($$et{FILENAME} ne '') {
615 my $fileExt = ($$et{FILENAME} =~ /^.*\.([^.]+)$/s) ? uc($1) : '';
616 my $extFileType = Image::ExifTool::GetFileType($fileExt);
617 if ($extFileType and $extFileType ne $fileType) {
618 my $normExt = $$et{VALUE}{FileTypeExtension};
619 if ($normExt and $normExt ne $fileExt) {
620 my $lkup = $Image::ExifTool::fileTypeLookup{$fileExt};
621 if (ref $lkup or $lkup ne $normExt) {
622 $et->Warn("File has wrong extension (should be $normExt, not $fileExt)");
623 }
624 }
625 }
626 }
627 # issue warning if FastScan option used
628 $et->Warn('Validation incomplete because FastScan option used') if $et->Options('FastScan');
629
630 # generate Validate tag if necessary
631 if ($mkTag) {
632 my (@num, $key);
633 push @num, $$et{VALUE}{Error} ? ($$et{DUPL_TAG}{Error} || 0) + 1 : 0,
634 $$et{VALUE}{Warning} ? ($$et{DUPL_TAG}{Warning} || 0) + 1 : 0, 0;
635 for ($key = 'Warning'; ; ) {
636 ++$num[2] if $$et{VALUE}{$key} and $$et{VALUE}{$key} =~ /^\[minor\]/i;
637 $key = $et->NextTagKey($key) or last;
638 }
639 $et->FoundTag(Validate => "@num");
640 }
641}
642
6431; # end
644
645__END__
646
647=head1 NAME
648
649Image::ExifTool::Validate - Additional metadata validation
650
651=head1 SYNOPSIS
652
653This module is used by Image::ExifTool
654
655=head1 DESCRIPTION
656
657This module contains additional routines and definitions used when the
658ExifTool Validate option is enabled.
659
660=head1 AUTHOR
661
662Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
663
664This library is free software; you can redistribute it and/or modify it
665under the same terms as Perl itself.
666
667=head1 SEE ALSO
668
669L<Image::ExifTool(3pm)|Image::ExifTool>,
670L<Image::ExifTool::TagNames/Extra Tags>
671
672=cut
Note: See TracBrowser for help on using the repository browser.