source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/BPG.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 7.5 KB
Line 
1#------------------------------------------------------------------------------
2# File: BPG.pm
3#
4# Description: Read BPG meta information
5#
6# Revisions: 2016-07-05 - P. Harvey Created
7#
8# References: 1) http://bellard.org/bpg/
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::BPG;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16
17$VERSION = '1.01';
18
19# BPG information
20%Image::ExifTool::BPG::Main = (
21 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
22 GROUPS => { 0 => 'File', 1 => 'File', 2 => 'Image' },
23 NOTES => q{
24 The information listed below is extracted from BPG (Better Portable
25 Graphics) images. See L<http://bellard.org/bpg/> for the specification.
26 },
27 4 => {
28 Name => 'PixelFormat',
29 Format => 'int16u',
30 Mask => 0xe000,
31 PrintConv => {
32 0 => 'Grayscale',
33 1 => '4:2:0 (chroma at 0.5, 0.5)',
34 2 => '4:2:2 (chroma at 0.5, 0)',
35 3 => '4:4:4',
36 4 => '4:2:0 (chroma at 0, 0.5)',
37 5 => '4:2:2 (chroma at 0, 0)',
38 },
39 },
40 4.1 => {
41 Name => 'Alpha',
42 Format => 'int16u',
43 Mask => 0x1004,
44 BitShift => 0,
45 PrintHex => 1,
46 PrintConv => {
47 0x0000 => 'No Alpha Plane',
48 0x1000 => 'Alpha Exists (color not premultiplied)',
49 0x1004 => 'Alpha Exists (color premultiplied)',
50 0x0004 => 'Alpha Exists (W color component)',
51 },
52 },
53 4.2 => {
54 Name => 'BitDepth',
55 Format => 'int16u',
56 Mask => 0x0f00,
57 ValueConv => '$val + 8',
58 },
59 4.3 => {
60 Name => 'ColorSpace',
61 Format => 'int16u',
62 Mask => 0x00f0,
63 PrintConv => {
64 0 => 'YCbCr (BT 601)',
65 1 => 'RGB',
66 2 => 'YCgCo',
67 3 => 'YCbCr (BT 709)',
68 4 => 'YCbCr (BT 2020)',
69 5 => 'BT 2020 Constant Luminance',
70 },
71 },
72 4.4 => {
73 Name => 'Flags',
74 Format => 'int16u',
75 Mask => 0x000b,
76 PrintConv => { BITMASK => {
77 0 => 'Animation',
78 1 => 'Limited Range',
79 3 => 'Extension Present',
80 }},
81 },
82 6 => { Name => 'ImageWidth', Format => 'var_ue7' },
83 7 => { Name => 'ImageHeight', Format => 'var_ue7' },
84 # length of image data or 0 to EOF
85 # (must be decoded so we know where the extension data starts)
86 8 => { Name => 'ImageLength', Format => 'var_ue7' },
87);
88
89%Image::ExifTool::BPG::Extensions = (
90 GROUPS => { 0 => 'File', 1 => 'File', 2 => 'Image' },
91 VARS => { ALPHA_FIRST => 1 },
92 1 => {
93 Name => 'EXIF',
94 SubDirectory => {
95 TagTable => 'Image::ExifTool::Exif::Main',
96 ProcessProc => \&Image::ExifTool::ProcessTIFF,
97 },
98 },
99 2 => {
100 Name => 'ICC_Profile',
101 SubDirectory => { TagTable => 'Image::ExifTool::ICC_Profile::Main' },
102 },
103 3 => {
104 Name => 'XMP',
105 SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' },
106 },
107 4 => {
108 Name => 'ThumbnailBPG',
109 Binary => 1,
110 },
111 5 => {
112 Name => 'AnimationControl',
113 Binary => 1,
114 Unknown => 1,
115 },
116);
117
118#------------------------------------------------------------------------------
119# Get ue7 integer from binary data (max 32 bits)
120# Inputs: 0) data ref, 1) location in data (undef for 0)
121# Returns: 0) ue7 as integer or undef on error, 1) length of ue7 in bytes
122sub Get_ue7($;$)
123{
124 my $dataPt = shift;
125 my $pos = shift || 0;
126 my $size = length $$dataPt;
127 my $val = 0;
128 my $i;
129 for ($i=0; ; ) {
130 return() if $pos+$i >= $size or $i >= 5;
131 my $byte = Get8u($dataPt, $pos + $i);
132 $val = ($val << 7) | ($byte & 0x7f);
133 unless ($byte & 0x80) {
134 return() if $i == 4 and $byte & 0x70; # error if bits 32-34 are set
135 last; # this was the last byte
136 }
137 return() if $i == 0 and $byte == 0x80; # error if first byte is 0x80
138 ++$i; # step to the next byte
139 }
140 return($val, $i+1);
141}
142
143#------------------------------------------------------------------------------
144# Extract EXIF information from a BPG image
145# Inputs: 0) ExifTool object reference, 1) dirInfo reference
146# Returns: 1 on success, 0 if this wasn't a valid BPG file
147sub ProcessBPG($$)
148{
149 local $_;
150 my ($et, $dirInfo) = @_;
151 my $raf = $$dirInfo{RAF};
152 my ($buff, $size, $n, $len, $pos);
153
154 # verify this is a valid BPG file
155 return 0 unless $raf->Read($buff, 21) == 21; # (21 bytes is maximum header length)
156 return 0 unless $buff =~ /^BPG\xfb/;
157 $et->SetFileType(); # set the FileType tag
158
159 SetByteOrder('MM');
160 my %dirInfo = (
161 DataPt => \$buff,
162 DirStart => 0,
163 DirLen => length($buff),
164 VarFormatData => [ ],
165 );
166 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::BPG::Main'));
167
168 return 1 unless $$et{VALUE}{Flags} & 0x0008; # all done unless extension flag is set
169
170 # add varSize from last entry in VarFormatData to determine
171 # the current read position in the file
172 my $dataPos = 9 + $dirInfo{VarFormatData}[-1][1];
173 # read extension length
174 unless ($raf->Seek($dataPos, 0) and $raf->Read($buff, 5) == 5) {
175 $et->Warn('Missing BPG extension data');
176 return 1;
177 }
178 ($size, $n) = Get_ue7(\$buff);
179 defined $size or $et->Warn('Corrupted BPG extension length'), return 1;
180 $dataPos += $n;
181 $size > 10000000 and $et->Warn('BPG extension is too large'), return 1;
182 unless ($raf->Seek($dataPos, 0) and $raf->Read($buff, $size) == $size) {
183 $et->Warn('Truncated BPG extension');
184 return 1;
185 }
186 my $tagTablePtr = GetTagTable('Image::ExifTool::BPG::Extensions');
187 # loop through the individual extensions
188 for ($pos=0; $pos<$size; $pos+=$len) {
189 my $type = Get8u(\$buff, $pos);
190 # get length of this extension
191 ($len, $n) = Get_ue7(\$buff, ++$pos);
192 defined $len or $et->Warn('Corrupted BPG extension'), last;
193 $pos += $n; # point to start of data for this extension
194 $pos + $len > $size and $et->Warn('Invalid BPG extension size'), last;
195 $$tagTablePtr{$type} or $et->Warn("Unrecognized BPG extension $type ($len bytes)", 1), next;
196 # libbpg (in my opinion) incorrectly copies the padding byte after the
197 # "EXIF\0" APP1 header to the start of the BPG EXIF extension, so issue a
198 # minor warning and ignore the padding if we find it before the TIFF header
199 if ($type == 1 and $len > 3 and substr($buff,$pos,3)=~/^.(II|MM)/s) {
200 $et->Warn("Ignored extra byte at start of EXIF extension", 1);
201 ++$pos;
202 --$len;
203 }
204 $et->HandleTag($tagTablePtr, $type, undef,
205 DataPt => \$buff,
206 DataPos => $dataPos,
207 Start => $pos,
208 Size => $len,
209 Parent => 'BPG',
210 );
211 }
212 return 1;
213}
214
2151; # end
216
217__END__
218
219=head1 NAME
220
221Image::ExifTool::BPG - Read BPG meta information
222
223=head1 SYNOPSIS
224
225This module is used by Image::ExifTool
226
227=head1 DESCRIPTION
228
229This module contains definitions required by Image::ExifTool to read BPG
230(Better Portable Graphics) images.
231
232=head1 AUTHOR
233
234Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
235
236This library is free software; you can redistribute it and/or modify it
237under the same terms as Perl itself.
238
239=head1 REFERENCES
240
241=over 4
242
243=item L<http://bellard.org/bpg/>
244
245=back
246
247=head1 SEE ALSO
248
249L<Image::ExifTool::TagNames/BPG Tags>,
250L<Image::ExifTool(3pm)|Image::ExifTool>
251
252=cut
253
Note: See TracBrowser for help on using the repository browser.