1 | #------------------------------------------------------------------------------
|
---|
2 | # File: DjVu.pm
|
---|
3 | #
|
---|
4 | # Description: Read DjVu archive meta information
|
---|
5 | #
|
---|
6 | # Revisions: 09/25/2008 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://djvu.sourceforge.net/ (DjVu v3 specification, Nov 2005)
|
---|
9 | # 2) http://www.djvu.org/
|
---|
10 | #
|
---|
11 | # Notes: DjVu files are recognized and the IFF structure is processed
|
---|
12 | # by Image::ExifTool::AIFF
|
---|
13 | #------------------------------------------------------------------------------
|
---|
14 |
|
---|
15 | package Image::ExifTool::DjVu;
|
---|
16 |
|
---|
17 | use strict;
|
---|
18 | use vars qw($VERSION);
|
---|
19 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
20 |
|
---|
21 | $VERSION = '1.03';
|
---|
22 |
|
---|
23 | sub ParseAnt($);
|
---|
24 | sub ProcessAnt($$$);
|
---|
25 | sub ProcessMeta($$$);
|
---|
26 | sub ProcessBZZ($$$);
|
---|
27 |
|
---|
28 | # DjVu chunks that we parse (ref 4)
|
---|
29 | %Image::ExifTool::DjVu::Main = (
|
---|
30 | GROUPS => { 2 => 'Image' },
|
---|
31 | NOTES => 'Information is extracted from the following chunks in DjVu images.',
|
---|
32 | INFO => {
|
---|
33 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Info' },
|
---|
34 | },
|
---|
35 | FORM => {
|
---|
36 | TypeOnly => 1, # extract chunk type only, then descend into chunk
|
---|
37 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Form' },
|
---|
38 | },
|
---|
39 | ANTa => {
|
---|
40 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Ant' },
|
---|
41 | },
|
---|
42 | ANTz => {
|
---|
43 | Name => 'CompressedAnnotation',
|
---|
44 | SubDirectory => {
|
---|
45 | TagTable => 'Image::ExifTool::DjVu::Ant',
|
---|
46 | ProcessProc => \&ProcessBZZ,
|
---|
47 | }
|
---|
48 | },
|
---|
49 | INCL => 'IncludedFileID',
|
---|
50 | );
|
---|
51 |
|
---|
52 | # information in the DjVu INFO chunk
|
---|
53 | %Image::ExifTool::DjVu::Info = (
|
---|
54 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
55 | GROUPS => { 2 => 'Image' },
|
---|
56 | FORMAT => 'int8u',
|
---|
57 | PRIORITY => 0, # first INFO block takes priority
|
---|
58 | 0 => {
|
---|
59 | Name => 'ImageWidth',
|
---|
60 | Format => 'int16u',
|
---|
61 | },
|
---|
62 | 2 => {
|
---|
63 | Name => 'ImageHeight',
|
---|
64 | Format => 'int16u',
|
---|
65 | },
|
---|
66 | 4 => {
|
---|
67 | Name => 'DjVuVersion',
|
---|
68 | Description => 'DjVu Version',
|
---|
69 | Format => 'int8u[2]',
|
---|
70 | # (this may be just one byte as with version 0.16)
|
---|
71 | ValueConv => '$val=~/(\d+) (\d+)/ ? "$2.$1" : "0.$val"',
|
---|
72 | },
|
---|
73 | 6 => {
|
---|
74 | Name => 'SpatialResolution',
|
---|
75 | Format => 'int16u',
|
---|
76 | ValueConv => '(($val & 0xff)<<8) + ($val>>8)', # (little-endian!)
|
---|
77 | },
|
---|
78 | 8 => {
|
---|
79 | Name => 'Gamma',
|
---|
80 | ValueConv => '$val / 10',
|
---|
81 | },
|
---|
82 | 9 => {
|
---|
83 | Name => 'Orientation',
|
---|
84 | Mask => 0x07, # (upper 5 bits reserved)
|
---|
85 | PrintConv => {
|
---|
86 | 1 => 'Horizontal (normal)',
|
---|
87 | 2 => 'Rotate 180',
|
---|
88 | 5 => 'Rotate 90 CW',
|
---|
89 | 6 => 'Rotate 270 CW',
|
---|
90 | },
|
---|
91 | },
|
---|
92 | );
|
---|
93 |
|
---|
94 | # information in the DjVu FORM chunk
|
---|
95 | %Image::ExifTool::DjVu::Form = (
|
---|
96 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
97 | GROUPS => { 2 => 'Image' },
|
---|
98 | 0 => {
|
---|
99 | Name => 'SubfileType',
|
---|
100 | Format => 'undef[4]',
|
---|
101 | Priority => 0,
|
---|
102 | PrintConv => {
|
---|
103 | DJVU => 'Single-page image',
|
---|
104 | DJVM => 'Multi-page document',
|
---|
105 | PM44 => 'Color IW44',
|
---|
106 | BM44 => 'Grayscale IW44',
|
---|
107 | DJVI => 'Shared component',
|
---|
108 | THUM => 'Thumbnail image',
|
---|
109 | },
|
---|
110 | },
|
---|
111 | );
|
---|
112 |
|
---|
113 | # tags found in the DjVu annotation chunk (ANTz or ANTa)
|
---|
114 | %Image::ExifTool::DjVu::Ant = (
|
---|
115 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessAnt,
|
---|
116 | GROUPS => { 2 => 'Image' },
|
---|
117 | NOTES => 'Information extracted from annotation chunks.',
|
---|
118 | # Note: For speed, ProcessAnt() pre-scans for known tag ID's, so if any
|
---|
119 | # new tags are added here they must also be added to the pre-scan check
|
---|
120 | metadata => {
|
---|
121 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Meta' }
|
---|
122 | },
|
---|
123 | xmp => {
|
---|
124 | Name => 'XMP',
|
---|
125 | SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }
|
---|
126 | },
|
---|
127 | );
|
---|
128 |
|
---|
129 | # tags found in the DjVu annotation metadata
|
---|
130 | %Image::ExifTool::DjVu::Meta = (
|
---|
131 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessMeta,
|
---|
132 | GROUPS => { 1 => 'DjVu-Meta', 2 => 'Image' },
|
---|
133 | NOTES => q{
|
---|
134 | This table lists the standard DjVu metadata tags, but ExifTool will extract
|
---|
135 | any tags that exist even if they don't appear here. The DjVu v3
|
---|
136 | documentation endorses tags borrowed from two standards: 1) BibTeX
|
---|
137 | bibliography system tags (all lowercase Tag ID's in the table below), and 2)
|
---|
138 | PDF DocInfo tags (uppercase Tag ID's).
|
---|
139 | },
|
---|
140 | # BibTeX tags (ref http://en.wikipedia.org/wiki/BibTeX)
|
---|
141 | address => { Groups => { 2 => 'Location' } },
|
---|
142 | annote => { Name => 'Annotation' },
|
---|
143 | author => { Groups => { 2 => 'Author' } },
|
---|
144 | booktitle => { Name => 'BookTitle' },
|
---|
145 | chapter => { },
|
---|
146 | crossref => { Name => 'CrossRef' },
|
---|
147 | edition => { },
|
---|
148 | eprint => { Name => 'EPrint' },
|
---|
149 | howpublished=> { Name => 'HowPublished' },
|
---|
150 | institution => { },
|
---|
151 | journal => { },
|
---|
152 | key => { },
|
---|
153 | month => { Groups => { 2 => 'Time' } },
|
---|
154 | note => { },
|
---|
155 | number => { },
|
---|
156 | organization=> { },
|
---|
157 | pages => { },
|
---|
158 | publisher => { },
|
---|
159 | school => { },
|
---|
160 | series => { },
|
---|
161 | title => { },
|
---|
162 | type => { },
|
---|
163 | url => { Name => 'URL' },
|
---|
164 | volume => { },
|
---|
165 | year => { Groups => { 2 => 'Time' } },
|
---|
166 | # PDF tags (same as Image::ExifTool::PDF::Info)
|
---|
167 | Title => { },
|
---|
168 | Author => { Groups => { 2 => 'Author' } },
|
---|
169 | Subject => { },
|
---|
170 | Keywords => { },
|
---|
171 | Creator => { },
|
---|
172 | Producer => { },
|
---|
173 | CreationDate => {
|
---|
174 | Name => 'CreateDate',
|
---|
175 | Groups => { 2 => 'Time' },
|
---|
176 | # RFC 3339 date/time format
|
---|
177 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
178 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
179 | },
|
---|
180 | ModDate => {
|
---|
181 | Name => 'ModifyDate',
|
---|
182 | Groups => { 2 => 'Time' },
|
---|
183 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
184 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
185 | },
|
---|
186 | Trapped => {
|
---|
187 | # remove leading '/' from '/True' or '/False'
|
---|
188 | ValueConv => '$val=~s{^/}{}; $val',
|
---|
189 | },
|
---|
190 | );
|
---|
191 |
|
---|
192 | #------------------------------------------------------------------------------
|
---|
193 | # Parse DjVu annotation "s-expression" syntax (recursively)
|
---|
194 | # Inputs: 0) data ref (with pos($$dataPt) set to start of annotation)
|
---|
195 | # Returns: reference to list of tokens/references, or undef if no tokens,
|
---|
196 | # and the position in $$dataPt is set to end of last token
|
---|
197 | # Notes: The DjVu annotation syntax is not well documented, so I make
|
---|
198 | # a number of assumptions here!
|
---|
199 | sub ParseAnt($)
|
---|
200 | {
|
---|
201 | my $dataPt = shift;
|
---|
202 | my (@toks, $tok, $more);
|
---|
203 | # (the DjVu annotation syntax really sucks, and requires that every
|
---|
204 | # single token be parsed in order to properly scan through the items)
|
---|
205 | Tok: for (;;) {
|
---|
206 | # find the next token
|
---|
207 | last unless $$dataPt =~ /(\S)/sg; # get next non-space character
|
---|
208 | if ($1 eq '(') { # start of list
|
---|
209 | $tok = ParseAnt($dataPt);
|
---|
210 | } elsif ($1 eq ')') { # end of list
|
---|
211 | $more = 1;
|
---|
212 | last;
|
---|
213 | } elsif ($1 eq '"') { # quoted string
|
---|
214 | $tok = '';
|
---|
215 | for (;;) {
|
---|
216 | # get string up to the next quotation mark
|
---|
217 | # this doesn't work in perl 5.6.2! grrrr
|
---|
218 | # last Tok unless $$dataPt =~ /(.*?)"/sg;
|
---|
219 | # $tok .= $1;
|
---|
220 | my $pos = pos($$dataPt);
|
---|
221 | last Tok unless $$dataPt =~ /"/sg;
|
---|
222 | $tok .= substr($$dataPt, $pos, pos($$dataPt)-1-$pos);
|
---|
223 | # we're good unless quote was escaped by odd number of backslashes
|
---|
224 | last unless $tok =~ /(\\+)$/ and length($1) & 0x01;
|
---|
225 | $tok .= '"'; # quote is part of the string
|
---|
226 | }
|
---|
227 | # convert C escape sequences (allowed in quoted text)
|
---|
228 | $tok = eval qq{"$tok"};
|
---|
229 | } else { # key name
|
---|
230 | pos($$dataPt) = pos($$dataPt) - 1;
|
---|
231 | # allow anything in key but whitespace, braces and double quotes
|
---|
232 | # (this is one of those assumptions I mentioned)
|
---|
233 | $$dataPt =~ /([^\s()"]+)/sg;
|
---|
234 | $tok = $1;
|
---|
235 | }
|
---|
236 | push @toks, $tok if defined $tok;
|
---|
237 | }
|
---|
238 | # prevent further parsing unless more after this
|
---|
239 | pos($$dataPt) = length $$dataPt unless $more;
|
---|
240 | return @toks ? \@toks : undef;
|
---|
241 | }
|
---|
242 |
|
---|
243 | #------------------------------------------------------------------------------
|
---|
244 | # Process DjVu annotation chunk (ANTa or decoded ANTz)
|
---|
245 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
246 | # Returns: 1 on success
|
---|
247 | sub ProcessAnt($$$)
|
---|
248 | {
|
---|
249 | my ($exifTool, $dirInfo, $tagTablePtr) = @_;
|
---|
250 | my $dataPt = $$dirInfo{DataPt};
|
---|
251 |
|
---|
252 | # quick pre-scan to check for metadata or XMP
|
---|
253 | return 1 unless $$dataPt =~ /\(\s*(metadata|xmp)[\s("]/s;
|
---|
254 |
|
---|
255 | # parse annotations into a tree structure
|
---|
256 | pos($$dataPt) = 0;
|
---|
257 | my $toks = ParseAnt($dataPt) or return 0;
|
---|
258 |
|
---|
259 | # process annotations individually
|
---|
260 | my $ant;
|
---|
261 | foreach $ant (@$toks) {
|
---|
262 | next unless ref $ant eq 'ARRAY' and @$ant >= 2;
|
---|
263 | my $tag = shift @$ant;
|
---|
264 | next if ref $tag or not defined $$tagTablePtr{$tag};
|
---|
265 | if ($tag eq 'metadata') {
|
---|
266 | # ProcessMeta() takes array reference
|
---|
267 | $exifTool->HandleTag($tagTablePtr, $tag, $ant);
|
---|
268 | } else {
|
---|
269 | next if ref $$ant[0]; # only process simple values
|
---|
270 | $exifTool->HandleTag($tagTablePtr, $tag, $$ant[0]);
|
---|
271 | }
|
---|
272 | }
|
---|
273 | return 1;
|
---|
274 | }
|
---|
275 |
|
---|
276 | #------------------------------------------------------------------------------
|
---|
277 | # Process DjVu metadata
|
---|
278 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
279 | # Returns: 1 on success
|
---|
280 | # Notes: input dirInfo DataPt is a reference to a list of pre-parsed metadata entries
|
---|
281 | sub ProcessMeta($$$)
|
---|
282 | {
|
---|
283 | my ($exifTool, $dirInfo, $tagTablePtr) = @_;
|
---|
284 | my $dataPt = $$dirInfo{DataPt};
|
---|
285 | return 0 unless ref $$dataPt eq 'ARRAY';
|
---|
286 | $exifTool->VerboseDir('Metadata', scalar @$$dataPt);
|
---|
287 | my ($item, $err);
|
---|
288 | foreach $item (@$$dataPt) {
|
---|
289 | # make sure item is a simple tag/value pair
|
---|
290 | $err=1, next unless ref $item eq 'ARRAY' and @$item >= 2 and
|
---|
291 | not ref $$item[0] and not ref $$item[1];
|
---|
292 | # add any new tags to the table
|
---|
293 | unless ($$tagTablePtr{$$item[0]}) {
|
---|
294 | my $name = $$item[0];
|
---|
295 | $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
|
---|
296 | length $name or $err = 1, next;
|
---|
297 | Image::ExifTool::AddTagToTable($tagTablePtr, $$item[0], { Name => ucfirst($name) });
|
---|
298 | }
|
---|
299 | $exifTool->HandleTag($tagTablePtr, $$item[0], $$item[1]);
|
---|
300 | }
|
---|
301 | $err and $exifTool->Warn('Ignored invalid metadata entry(s)');
|
---|
302 | return 1;
|
---|
303 | }
|
---|
304 |
|
---|
305 | #------------------------------------------------------------------------------
|
---|
306 | # Process BZZ-compressed data (in DjVu images)
|
---|
307 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
308 | # Returns: 1 on success
|
---|
309 | sub ProcessBZZ($$$)
|
---|
310 | {
|
---|
311 | my ($exifTool, $dirInfo, $tagTablePtr) = @_;
|
---|
312 | require Image::ExifTool::BZZ;
|
---|
313 | my $buff = Image::ExifTool::BZZ::Decode($$dirInfo{DataPt});
|
---|
314 | unless (defined $buff) {
|
---|
315 | $exifTool->Warn("Error decoding $$dirInfo{DirName}");
|
---|
316 | return 0;
|
---|
317 | }
|
---|
318 | my $verbose = $exifTool->Options('Verbose');
|
---|
319 | if ($verbose >= 3) {
|
---|
320 | # dump the decoded data in very verbose mode
|
---|
321 | $exifTool->VerboseDir("Decoded $$dirInfo{DirName}", 0, length $buff);
|
---|
322 | $exifTool->VerboseDump(\$buff);
|
---|
323 | }
|
---|
324 | $$dirInfo{DataPt} = \$buff;
|
---|
325 | $$dirInfo{DataLen} = $$dirInfo{DirLen} = length $buff;
|
---|
326 | # process the data using the default process proc for this table
|
---|
327 | my $processProc = $$tagTablePtr{PROCESS_PROC} or return 0;
|
---|
328 | return &$processProc($exifTool, $dirInfo, $tagTablePtr);
|
---|
329 | }
|
---|
330 |
|
---|
331 | 1; # end
|
---|
332 |
|
---|
333 | __END__
|
---|
334 |
|
---|
335 | =head1 NAME
|
---|
336 |
|
---|
337 | Image::ExifTool::DjVu - Read DjVu meta information
|
---|
338 |
|
---|
339 | =head1 SYNOPSIS
|
---|
340 |
|
---|
341 | This module is used by Image::ExifTool
|
---|
342 |
|
---|
343 | =head1 DESCRIPTION
|
---|
344 |
|
---|
345 | This module contains definitions required by Image::ExifTool to extract meta
|
---|
346 | information from DjVu images. Parsing of the DjVu IFF structure is done by
|
---|
347 | Image::ExifTool::AIFF.
|
---|
348 |
|
---|
349 | =head1 AUTHOR
|
---|
350 |
|
---|
351 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
352 |
|
---|
353 | This library is free software; you can redistribute it and/or modify it
|
---|
354 | under the same terms as Perl itself.
|
---|
355 |
|
---|
356 | =head1 REFERENCES
|
---|
357 |
|
---|
358 | =over 4
|
---|
359 |
|
---|
360 | =item L<http://djvu.sourceforge.net/>
|
---|
361 |
|
---|
362 | =item L<http://www.djvu.org/>
|
---|
363 |
|
---|
364 | =back
|
---|
365 |
|
---|
366 | =head1 SEE ALSO
|
---|
367 |
|
---|
368 | L<Image::ExifTool::TagNames/DjVu Tags>,
|
---|
369 | L<Image::ExifTool::AIFF(3pm)|Image::ExifTool::AIFF>,
|
---|
370 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
371 |
|
---|
372 | =cut
|
---|
373 |
|
---|