source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Ogg.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 8.8 KB
Line 
1#------------------------------------------------------------------------------
2# File: Ogg.pm
3#
4# Description: Read Ogg meta information
5#
6# Revisions: 2011/07/13 - P. Harvey Created (split from Vorbis.pm)
7# 2016/07/14 - PH Added Ogg Opus support
8#
9# References: 1) http://www.xiph.org/vorbis/doc/
10# 2) http://flac.sourceforge.net/ogg_mapping.html
11# 3) http://www.theora.org/doc/Theora.pdf
12#------------------------------------------------------------------------------
13
14package Image::ExifTool::Ogg;
15
16use strict;
17use vars qw($VERSION);
18use Image::ExifTool qw(:DataAccess :Utils);
19
20$VERSION = '1.02';
21
22my $MAX_PACKETS = 2; # maximum packets to scan from each stream at start of file
23
24# Information types recognizedi in Ogg files
25%Image::ExifTool::Ogg::Main = (
26 NOTES => q{
27 ExifTool extracts the following types of information from Ogg files. See
28 L<http://www.xiph.org/vorbis/doc/> for the Ogg specification.
29 },
30 # (these are for documentation purposes only, and aren't used by the code below)
31 vorbis => { SubDirectory => { TagTable => 'Image::ExifTool::Vorbis::Main' } },
32 theora => { SubDirectory => { TagTable => 'Image::ExifTool::Theora::Main' } },
33 Opus => { SubDirectory => { TagTable => 'Image::ExifTool::Opus::Main' } },
34 FLAC => { SubDirectory => { TagTable => 'Image::ExifTool::FLAC::Main' } },
35 ID3 => { SubDirectory => { TagTable => 'Image::ExifTool::ID3::Main' } },
36);
37
38#------------------------------------------------------------------------------
39# Process Ogg packet
40# Inputs: 0) ExifTool object ref, 1) data ref
41# Returns: 1 on success
42sub ProcessPacket($$)
43{
44 my ($et, $dataPt) = @_;
45 my $rtnVal = 0;
46 if ($$dataPt =~ /^(.)(vorbis|theora)/s or $$dataPt =~ /^(OpusHead|OpusTags)/) {
47 my ($tag, $type, $pos) = $2 ? (ord($1), ucfirst($2), 7) : ($1, 'Opus', 8);
48 # this is an OGV file if it contains Theora video
49 $et->OverrideFileType('OGV') if $type eq 'Theora' and $$et{FILE_TYPE} eq 'OGG';
50 $et->OverrideFileType('OPUS') if $type eq 'Opus' and $$et{FILE_TYPE} eq 'OGG';
51 my $tagTablePtr = GetTagTable("Image::ExifTool::${type}::Main");
52 my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag);
53 return 0 unless $tagInfo and $$tagInfo{SubDirectory};
54 my $subdir = $$tagInfo{SubDirectory};
55 my %dirInfo = (
56 DataPt => $dataPt,
57 DirName => $$tagInfo{Name},
58 DirStart => $pos,
59 );
60 my $table = GetTagTable($$subdir{TagTable});
61 # set group1 so Theoris comments can be distinguised from Vorbis comments
62 $$et{SET_GROUP1} = $type if $type eq 'Theora';
63 SetByteOrder($$subdir{ByteOrder}) if $$subdir{ByteOrder};
64 $rtnVal = $et->ProcessDirectory(\%dirInfo, $table);
65 SetByteOrder('II');
66 delete $$et{SET_GROUP1};
67 }
68 return $rtnVal;
69}
70
71#------------------------------------------------------------------------------
72# Extract information from an Ogg file
73# Inputs: 0) ExifTool object reference, 1) dirInfo reference
74# Returns: 1 on success, 0 if this wasn't a valid Ogg file
75sub ProcessOGG($$)
76{
77 my ($et, $dirInfo) = @_;
78
79 # must first check for leading/trailing ID3 information
80 unless ($$et{DoneID3}) {
81 require Image::ExifTool::ID3;
82 Image::ExifTool::ID3::ProcessID3($et, $dirInfo) and return 1;
83 }
84 my $raf = $$dirInfo{RAF};
85 my $verbose = $et->Options('Verbose');
86 my $out = $et->Options('TextOut');
87 my ($success, $page, $packets, $streams, $stream) = (0,0,0,0,'');
88 my ($buff, $flag, %val, $numFlac, %streamPage);
89
90 for (;;) {
91 # must read ahead to next page to see if it is a continuation
92 # (this code would be a lot simpler if the continuation flag
93 # was on the leading instead of the trailing page!)
94 if ($raf and $raf->Read($buff, 28) == 28) {
95 # validate magic number
96 unless ($buff =~ /^OggS/) {
97 $success and $et->Warn('Lost synchronization');
98 last;
99 }
100 unless ($success) {
101 # set file type and initialize on first page
102 $success = 1;
103 $et->SetFileType();
104 SetByteOrder('II');
105 }
106 $flag = Get8u(\$buff, 5); # page flag
107 $stream = Get32u(\$buff, 14); # stream serial number
108 if ($flag & 0x02) {
109 ++$streams; # count start-of-stream pages
110 $streamPage{$stream} = $page = 0;
111 } else {
112 $page = $streamPage{$stream};
113 }
114 ++$packets unless $flag & 0x01; # keep track of packet count
115 } else {
116 # all done unless we have to process our last packet
117 last unless %val;
118 ($stream) = sort keys %val; # take a stream
119 $flag = 0; # no continuation
120 undef $raf; # flag for done reading
121 }
122
123 if (defined $numFlac) {
124 # stop to process FLAC headers if we hit the end of file
125 last unless $raf;
126 --$numFlac; # one less header packet to read
127 } else {
128 # can finally process previous packet from this stream
129 # unless this is a continuation page
130 if (defined $val{$stream} and not $flag & 0x01) {
131 ProcessPacket($et, \$val{$stream});
132 delete $val{$stream};
133 # only read the first $MAX_PACKETS packets from each stream
134 if ($packets > $MAX_PACKETS * $streams or not defined $raf) {
135 last unless %val; # all done (success!)
136 }
137 }
138 # stop processing Ogg if we have scanned enough packets
139 last if $packets > $MAX_PACKETS * $streams and not %val;
140 }
141
142 # continue processing the current page
143 my $pageNum = Get32u(\$buff, 18); # page sequence number
144 my $nseg = Get8u(\$buff, 26); # number of segments
145 # calculate total data length
146 my $dataLen = Get8u(\$buff, 27);
147 if ($nseg) {
148 $raf->Read($buff, $nseg-1) == $nseg-1 or last;
149 my @segs = unpack('C*', $buff);
150 # could check that all these (but the last) are 255...
151 foreach (@segs) { $dataLen += $_ }
152 }
153 if (defined $page) {
154 if ($page == $pageNum) {
155 $streamPage{$stream} = ++$page;
156 } else {
157 $et->Warn('Missing page(s) in Ogg file');
158 undef $page;
159 delete $streamPage{$stream};
160 }
161 }
162 # read page data
163 $raf->Read($buff, $dataLen) == $dataLen or last;
164 if ($verbose > 1) {
165 printf $out "Page %d, stream 0x%x, flag 0x%x (%d bytes)\n",
166 $pageNum, $stream, $flag, $dataLen;
167 $et->VerboseDump(\$buff, DataPos => $raf->Tell() - $dataLen);
168 }
169 if (defined $val{$stream}) {
170 $val{$stream} .= $buff; # add this continuation page
171 } elsif (not $flag & 0x01) { # ignore remaining pages of a continued packet
172 # ignore the first page of any packet we aren't parsing
173 if ($buff =~ /^(.(vorbis|theora)|Opus(Head|Tags))/s) {
174 $val{$stream} = $buff; # save this page
175 } elsif ($buff =~ /^\x7fFLAC..(..)/s) {
176 $numFlac = unpack('n',$1);
177 $val{$stream} = substr($buff, 9);
178 }
179 }
180 if (defined $numFlac) {
181 # stop to process FLAC headers if we have them all
182 last if $numFlac <= 0;
183 } elsif (defined $val{$stream} and $flag & 0x04) {
184 # process Ogg packet now if end-of-stream bit is set
185 ProcessPacket($et, \$val{$stream});
186 delete $val{$stream};
187 }
188 }
189 if (defined $numFlac and defined $val{$stream}) {
190 # process FLAC headers as if it was a complete FLAC file
191 require Image::ExifTool::FLAC;
192 my %dirInfo = ( RAF => new File::RandomAccess(\$val{$stream}) );
193 Image::ExifTool::FLAC::ProcessFLAC($et, \%dirInfo);
194 }
195 return $success;
196}
197
1981; # end
199
200__END__
201
202=head1 NAME
203
204Image::ExifTool::Ogg - Read Ogg meta information
205
206=head1 SYNOPSIS
207
208This module is used by Image::ExifTool
209
210=head1 DESCRIPTION
211
212This module contains definitions required by Image::ExifTool to extract meta
213information from Ogg bitstream container files.
214
215=head1 AUTHOR
216
217Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
218
219This library is free software; you can redistribute it and/or modify it
220under the same terms as Perl itself.
221
222=head1 REFERENCES
223
224=over 4
225
226=item L<http://www.xiph.org/vorbis/doc/>
227
228=item L<http://flac.sourceforge.net/ogg_mapping.html>
229
230=item L<http://www.theora.org/doc/Theora.pdf>
231
232=back
233
234=head1 SEE ALSO
235
236L<Image::ExifTool::TagNames/Ogg Tags>,
237L<Image::ExifTool(3pm)|Image::ExifTool>
238
239=cut
240
Note: See TracBrowser for help on using the repository browser.