source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/AIFF.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 9.6 KB
Line 
1#------------------------------------------------------------------------------
2# File: AIFF.pm
3#
4# Description: Read AIFF meta information
5#
6# Revisions: 01/06/2006 - P. Harvey Created
7# 09/22/2008 - PH Added DjVu support
8#
9# References: 1) http://developer.apple.com/documentation/QuickTime/INMAC/SOUND/imsoundmgr.30.htm#pgfId=3190
10# 2) http://astronomy.swin.edu.au/~pbourke/dataformats/aiff/
11# 3) http://www.mactech.com/articles/mactech/Vol.06/06.01/SANENormalized/
12#------------------------------------------------------------------------------
13
14package Image::ExifTool::AIFF;
15
16use strict;
17use vars qw($VERSION);
18use Image::ExifTool qw(:DataAccess :Utils);
19use Image::ExifTool::ID3;
20
21$VERSION = '1.11';
22
23# information for time/date-based tags (time zero is Jan 1, 1904)
24my %timeInfo = (
25 Groups => { 2 => 'Time' },
26 ValueConv => 'ConvertUnixTime($val - ((66 * 365 + 17) * 24 * 3600))',
27 PrintConv => '$self->ConvertDateTime($val)',
28);
29
30# AIFF info
31%Image::ExifTool::AIFF::Main = (
32 GROUPS => { 2 => 'Audio' },
33 NOTES => q{
34 Tags extracted from Audio Interchange File Format (AIFF) files. See
35 L<http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/AIFF/AIFF.html> for
36 the AIFF specification.
37 },
38# FORM => 'Format',
39 FVER => {
40 Name => 'FormatVersion',
41 SubDirectory => { TagTable => 'Image::ExifTool::AIFF::FormatVers' },
42 },
43 COMM => {
44 Name => 'Common',
45 SubDirectory => { TagTable => 'Image::ExifTool::AIFF::Common' },
46 },
47 COMT => {
48 Name => 'Comment',
49 SubDirectory => { TagTable => 'Image::ExifTool::AIFF::Comment' },
50 },
51 NAME => {
52 Name => 'Name',
53 ValueConv => '$self->Decode($val, "MacRoman")',
54 },
55 AUTH => {
56 Name => 'Author',
57 Groups => { 2 => 'Author' },
58 ValueConv => '$self->Decode($val, "MacRoman")',
59 },
60 '(c) ' => {
61 Name => 'Copyright',
62 Groups => { 2 => 'Author' },
63 ValueConv => '$self->Decode($val, "MacRoman")',
64 },
65 ANNO => {
66 Name => 'Annotation',
67 ValueConv => '$self->Decode($val, "MacRoman")',
68 },
69 'ID3 ' => {
70 Name => 'ID3',
71 SubDirectory => {
72 TagTable => 'Image::ExifTool::ID3::Main',
73 ProcessProc => \&Image::ExifTool::ID3::ProcessID3,
74 },
75 },
76 APPL => 'ApplicationData', # (first 4 bytes are the application signature)
77# SSND => 'SoundData',
78# MARK => 'Marker',
79# INST => 'Instrument',
80# MIDI => 'MidiData',
81# AESD => 'AudioRecording',
82);
83
84%Image::ExifTool::AIFF::Common = (
85 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
86 GROUPS => { 2 => 'Audio' },
87 FORMAT => 'int16u',
88 0 => 'NumChannels',
89 1 => { Name => 'NumSampleFrames', Format => 'int32u' },
90 3 => 'SampleSize',
91 4 => { Name => 'SampleRate', Format => 'extended' }, #3
92 9 => {
93 Name => 'CompressionType',
94 Format => 'string[4]',
95 PrintConv => {
96 NONE => 'None',
97 ACE2 => 'ACE 2-to-1',
98 ACE8 => 'ACE 8-to-3',
99 MAC3 => 'MAC 3-to-1',
100 MAC6 => 'MAC 6-to-1',
101 sowt => 'Little-endian, no compression',
102 alaw => 'a-law',
103 ALAW => 'A-law',
104 ulaw => 'mu-law',
105 ULAW => 'Mu-law',
106 'GSM '=> 'GSM',
107 G722 => 'G722',
108 G726 => 'G726',
109 G728 => 'G728',
110 },
111 },
112 11 => { #PH
113 Name => 'CompressorName',
114 Format => 'pstring',
115 ValueConv => '$self->Decode($val, "MacRoman")',
116 },
117);
118
119%Image::ExifTool::AIFF::FormatVers = (
120 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
121 FORMAT => 'int32u',
122 0 => { Name => 'FormatVersionTime', %timeInfo },
123);
124
125%Image::ExifTool::AIFF::Comment = (
126 PROCESS_PROC => \&Image::ExifTool::AIFF::ProcessComment,
127 GROUPS => { 2 => 'Audio' },
128 0 => { Name => 'CommentTime', %timeInfo },
129 1 => 'MarkerID',
130 2 => {
131 Name => 'Comment',
132 ValueConv => '$self->Decode($val, "MacRoman")',
133 },
134);
135
136%Image::ExifTool::AIFF::Composite = (
137 Duration => {
138 Require => {
139 0 => 'AIFF:SampleRate',
140 1 => 'AIFF:NumSampleFrames',
141 },
142 RawConv => '($val[0] and $val[1]) ? $val[1] / $val[0] : undef',
143 PrintConv => 'ConvertDuration($val)',
144 },
145);
146
147# add our composite tags
148Image::ExifTool::AddCompositeTags('Image::ExifTool::AIFF');
149
150
151#------------------------------------------------------------------------------
152# Process AIFF Comment chunk
153# Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
154# Returns: 1 on success
155sub ProcessComment($$$)
156{
157 my ($et, $dirInfo, $tagTablePtr) = @_;
158 my $dataPt = $$dirInfo{DataPt};
159 my $dirLen = $$dirInfo{DirLen};
160 my $verbose = $et->Options('Verbose');
161 return 0 unless $dirLen > 2;
162 my $numComments = unpack('n',$$dataPt);
163 my $pos = 2;
164 my $i;
165 $verbose and $et->VerboseDir('Comment', $numComments);
166 for ($i=0; $i<$numComments; ++$i) {
167 last if $pos + 8 > $dirLen;
168 my ($time, $markerID, $size) = unpack("x${pos}Nnn", $$dataPt);
169 $et->HandleTag($tagTablePtr, 0, $time);
170 $et->HandleTag($tagTablePtr, 1, $markerID) if $markerID;
171 $pos += 8;
172 last if $pos + $size > $dirLen;
173 my $val = substr($$dataPt, $pos, $size);
174 $et->HandleTag($tagTablePtr, 2, $val);
175 ++$size if $size & 0x01; # account for padding byte if necessary
176 $pos += $size;
177 }
178}
179
180#------------------------------------------------------------------------------
181# Extract information from a AIFF file
182# Inputs: 0) ExifTool object reference, 1) DirInfo reference
183# Returns: 1 on success, 0 if this wasn't a valid AIFF file
184sub ProcessAIFF($$)
185{
186 my ($et, $dirInfo) = @_;
187 my $raf = $$dirInfo{RAF};
188 my ($buff, $err, $tagTablePtr, $page, $type, $n);
189
190 # verify this is a valid AIFF file
191 return 0 unless $raf->Read($buff, 12) == 12;
192 my $fast3 = $$et{OPTIONS}{FastScan} && $$et{OPTIONS}{FastScan} == 3;
193 my $pos = 12;
194 # check for DjVu image
195 if ($buff =~ /^AT&TFORM/) {
196 # http://www.djvu.org/
197 # http://djvu.sourceforge.net/specs/djvu3changes.txt
198 my $buf2;
199 return 0 unless $raf->Read($buf2, 4) == 4 and $buf2 =~ /^(DJVU|DJVM)/;
200 $pos += 4;
201 $buff = substr($buff, 4) . $buf2;
202 $et->SetFileType('DJVU');
203 return 1 if $fast3;
204 $tagTablePtr = GetTagTable('Image::ExifTool::DjVu::Main');
205 # modify FileType to indicate a multi-page document
206 $$et{VALUE}{FileType} .= " (multi-page)" if $buf2 eq 'DJVM';
207 $type = 'DjVu';
208 } else {
209 return 0 unless $buff =~ /^FORM....(AIF(F|C))/s;
210 $et->SetFileType($1);
211 return 1 if $fast3;
212 $tagTablePtr = GetTagTable('Image::ExifTool::AIFF::Main');
213 $type = 'AIFF';
214 }
215 SetByteOrder('MM');
216 my $verbose = $et->Options('Verbose');
217#
218# Read through the IFF chunks
219#
220 for ($n=0;;++$n) {
221 $raf->Read($buff, 8) == 8 or last;
222 $pos += 8;
223 my ($tag, $len) = unpack('a4N', $buff);
224 my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag);
225 $et->VPrint(0, "AIFF '${tag}' chunk ($len bytes of data): ", $raf->Tell(),"\n");
226 # AIFF chunks are padded to an even number of bytes
227 my $len2 = $len + ($len & 0x01);
228 if ($len2 > 100000000) {
229 if ($len2 >= 0x80000000 and not $et->Options('LargeFileSupport')) {
230 $et->Warn('End of processing at large chunk (LargeFileSupport not enabled)');
231 last;
232 }
233 if ($tagInfo) {
234 $et->Warn("Skipping large $$tagInfo{Name} chunk (> 100 MB)");
235 undef $tagInfo;
236 }
237 }
238 if ($tagInfo) {
239 if ($$tagInfo{TypeOnly}) {
240 $len = $len2 = 4;
241 $page = ($page || 0) + 1;
242 $et->VPrint(0, $$et{INDENT} . "Page $page:\n");
243 }
244 $raf->Read($buff, $len2) >= $len or $err=1, last;
245 unless ($$tagInfo{SubDirectory} or $$tagInfo{Binary}) {
246 $buff =~ s/\0+$//; # remove trailing nulls
247 }
248 $et->HandleTag($tagTablePtr, $tag, $buff,
249 DataPt => \$buff,
250 DataPos => $pos,
251 Start => 0,
252 Size => $len,
253 );
254 } elsif (not $len) {
255 next if ++$n < 100;
256 $et->Warn('Aborting scan. Too many empty chunks');
257 last;
258 } elsif ($verbose > 2 and $len2 < 1024000) {
259 $raf->Read($buff, $len2) == $len2 or $err = 1, last;
260 $et->VerboseDump(\$buff);
261 } else {
262 $raf->Seek($len2, 1) or $err=1, last;
263 }
264 $pos += $len2;
265 $n = 0;
266 }
267 $err and $et->Warn("Error reading $type file (corrupted?)");
268 return 1;
269}
270
2711; # end
272
273__END__
274
275=head1 NAME
276
277Image::ExifTool::AIFF - Read AIFF meta information
278
279=head1 SYNOPSIS
280
281This module is used by Image::ExifTool
282
283=head1 DESCRIPTION
284
285This module contains routines required by Image::ExifTool to extract
286information from AIFF (Audio Interchange File Format) audio files.
287
288=head1 AUTHOR
289
290Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
291
292This library is free software; you can redistribute it and/or modify it
293under the same terms as Perl itself.
294
295=head1 REFERENCES
296
297=over 4
298
299=item L<http://developer.apple.com/documentation/QuickTime/INMAC/SOUND/imsoundmgr.30.htm#pgfId=3190>
300
301=item L<http://astronomy.swin.edu.au/~pbourke/dataformats/aiff/>
302
303=item L<http://www.mactech.com/articles/mactech/Vol.06/06.01/SANENormalized/>
304
305=back
306
307=head1 SEE ALSO
308
309L<Image::ExifTool::TagNames/AIFF Tags>,
310L<Image::ExifTool(3pm)|Image::ExifTool>
311
312=cut
Note: See TracBrowser for help on using the repository browser.