1 | #------------------------------------------------------------------------------
|
---|
2 | # File: DjVu.pm
|
---|
3 | #
|
---|
4 | # Description: Read DjVu archive meta information
|
---|
5 | #
|
---|
6 | # Revisions: 09/25/2008 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://djvu.sourceforge.net/ (DjVu v3 specification, Nov 2005)
|
---|
9 | # 2) http://www.djvu.org/
|
---|
10 | #
|
---|
11 | # Notes: DjVu files are recognized and the IFF structure is processed
|
---|
12 | # by Image::ExifTool::AIFF
|
---|
13 | #------------------------------------------------------------------------------
|
---|
14 |
|
---|
15 | package Image::ExifTool::DjVu;
|
---|
16 |
|
---|
17 | use strict;
|
---|
18 | use vars qw($VERSION);
|
---|
19 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
20 |
|
---|
21 | $VERSION = '1.06';
|
---|
22 |
|
---|
23 | sub ParseAnt($);
|
---|
24 | sub ProcessAnt($$$);
|
---|
25 | sub ProcessMeta($$$);
|
---|
26 | sub ProcessBZZ($$$);
|
---|
27 |
|
---|
28 | # DjVu chunks that we parse (ref 4)
|
---|
29 | %Image::ExifTool::DjVu::Main = (
|
---|
30 | GROUPS => { 2 => 'Image' },
|
---|
31 | NOTES => q{
|
---|
32 | Information is extracted from the following chunks in DjVu images. See
|
---|
33 | L<http://www.djvu.org/> for the DjVu specification.
|
---|
34 | },
|
---|
35 | INFO => {
|
---|
36 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Info' },
|
---|
37 | },
|
---|
38 | FORM => {
|
---|
39 | TypeOnly => 1, # extract chunk type only, then descend into chunk
|
---|
40 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Form' },
|
---|
41 | },
|
---|
42 | ANTa => {
|
---|
43 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Ant' },
|
---|
44 | },
|
---|
45 | ANTz => {
|
---|
46 | Name => 'CompressedAnnotation',
|
---|
47 | SubDirectory => {
|
---|
48 | TagTable => 'Image::ExifTool::DjVu::Ant',
|
---|
49 | ProcessProc => \&ProcessBZZ,
|
---|
50 | }
|
---|
51 | },
|
---|
52 | INCL => 'IncludedFileID',
|
---|
53 | );
|
---|
54 |
|
---|
55 | # information in the DjVu INFO chunk
|
---|
56 | %Image::ExifTool::DjVu::Info = (
|
---|
57 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
58 | GROUPS => { 2 => 'Image' },
|
---|
59 | FORMAT => 'int8u',
|
---|
60 | PRIORITY => 0, # first INFO block takes priority
|
---|
61 | 0 => {
|
---|
62 | Name => 'ImageWidth',
|
---|
63 | Format => 'int16u',
|
---|
64 | },
|
---|
65 | 2 => {
|
---|
66 | Name => 'ImageHeight',
|
---|
67 | Format => 'int16u',
|
---|
68 | },
|
---|
69 | 4 => {
|
---|
70 | Name => 'DjVuVersion',
|
---|
71 | Description => 'DjVu Version',
|
---|
72 | Format => 'int8u[2]',
|
---|
73 | # (this may be just one byte as with version 0.16)
|
---|
74 | ValueConv => '$val=~/(\d+) (\d+)/ ? "$2.$1" : "0.$val"',
|
---|
75 | },
|
---|
76 | 6 => {
|
---|
77 | Name => 'SpatialResolution',
|
---|
78 | Format => 'int16u',
|
---|
79 | ValueConv => '(($val & 0xff)<<8) + ($val>>8)', # (little-endian!)
|
---|
80 | },
|
---|
81 | 8 => {
|
---|
82 | Name => 'Gamma',
|
---|
83 | ValueConv => '$val / 10',
|
---|
84 | },
|
---|
85 | 9 => {
|
---|
86 | Name => 'Orientation',
|
---|
87 | Mask => 0x07, # (upper 5 bits reserved)
|
---|
88 | PrintConv => {
|
---|
89 | 1 => 'Horizontal (normal)',
|
---|
90 | 2 => 'Rotate 180',
|
---|
91 | 5 => 'Rotate 90 CW',
|
---|
92 | 6 => 'Rotate 270 CW',
|
---|
93 | },
|
---|
94 | },
|
---|
95 | );
|
---|
96 |
|
---|
97 | # information in the DjVu FORM chunk
|
---|
98 | %Image::ExifTool::DjVu::Form = (
|
---|
99 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
100 | GROUPS => { 2 => 'Image' },
|
---|
101 | 0 => {
|
---|
102 | Name => 'SubfileType',
|
---|
103 | Format => 'undef[4]',
|
---|
104 | Priority => 0,
|
---|
105 | PrintConv => {
|
---|
106 | DJVU => 'Single-page image',
|
---|
107 | DJVM => 'Multi-page document',
|
---|
108 | PM44 => 'Color IW44',
|
---|
109 | BM44 => 'Grayscale IW44',
|
---|
110 | DJVI => 'Shared component',
|
---|
111 | THUM => 'Thumbnail image',
|
---|
112 | },
|
---|
113 | },
|
---|
114 | );
|
---|
115 |
|
---|
116 | # tags found in the DjVu annotation chunk (ANTz or ANTa)
|
---|
117 | %Image::ExifTool::DjVu::Ant = (
|
---|
118 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessAnt,
|
---|
119 | GROUPS => { 2 => 'Image' },
|
---|
120 | NOTES => 'Information extracted from annotation chunks.',
|
---|
121 | # Note: For speed, ProcessAnt() pre-scans for known tag ID's, so if any
|
---|
122 | # new tags are added here they must also be added to the pre-scan check
|
---|
123 | metadata => {
|
---|
124 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Meta' }
|
---|
125 | },
|
---|
126 | xmp => {
|
---|
127 | Name => 'XMP',
|
---|
128 | SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }
|
---|
129 | },
|
---|
130 | );
|
---|
131 |
|
---|
132 | # tags found in the DjVu annotation metadata
|
---|
133 | %Image::ExifTool::DjVu::Meta = (
|
---|
134 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessMeta,
|
---|
135 | GROUPS => { 1 => 'DjVu-Meta', 2 => 'Image' },
|
---|
136 | NOTES => q{
|
---|
137 | This table lists the standard DjVu metadata tags, but ExifTool will extract
|
---|
138 | any tags that exist even if they don't appear here. The DjVu v3
|
---|
139 | documentation endorses tags borrowed from two standards: 1) BibTeX
|
---|
140 | bibliography system tags (all lowercase Tag ID's in the table below), and 2)
|
---|
141 | PDF DocInfo tags (capitalized Tag ID's).
|
---|
142 | },
|
---|
143 | # BibTeX tags (ref http://en.wikipedia.org/wiki/BibTeX)
|
---|
144 | address => { Groups => { 2 => 'Location' } },
|
---|
145 | annote => { Name => 'Annotation' },
|
---|
146 | author => { Groups => { 2 => 'Author' } },
|
---|
147 | booktitle => { Name => 'BookTitle' },
|
---|
148 | chapter => { },
|
---|
149 | crossref => { Name => 'CrossRef' },
|
---|
150 | edition => { },
|
---|
151 | eprint => { Name => 'EPrint' },
|
---|
152 | howpublished=> { Name => 'HowPublished' },
|
---|
153 | institution => { },
|
---|
154 | journal => { },
|
---|
155 | key => { },
|
---|
156 | month => { Groups => { 2 => 'Time' } },
|
---|
157 | note => { },
|
---|
158 | number => { },
|
---|
159 | organization=> { },
|
---|
160 | pages => { },
|
---|
161 | publisher => { },
|
---|
162 | school => { },
|
---|
163 | series => { },
|
---|
164 | title => { },
|
---|
165 | type => { },
|
---|
166 | url => { Name => 'URL' },
|
---|
167 | volume => { },
|
---|
168 | year => { Groups => { 2 => 'Time' } },
|
---|
169 | # PDF tags (same as Image::ExifTool::PDF::Info)
|
---|
170 | Title => { },
|
---|
171 | Author => { Groups => { 2 => 'Author' } },
|
---|
172 | Subject => { },
|
---|
173 | Keywords => { },
|
---|
174 | Creator => { },
|
---|
175 | Producer => { },
|
---|
176 | CreationDate => {
|
---|
177 | Name => 'CreateDate',
|
---|
178 | Groups => { 2 => 'Time' },
|
---|
179 | # RFC 3339 date/time format
|
---|
180 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
181 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
182 | },
|
---|
183 | ModDate => {
|
---|
184 | Name => 'ModifyDate',
|
---|
185 | Groups => { 2 => 'Time' },
|
---|
186 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
187 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
188 | },
|
---|
189 | Trapped => {
|
---|
190 | # remove leading '/' from '/True' or '/False'
|
---|
191 | ValueConv => '$val=~s{^/}{}; $val',
|
---|
192 | },
|
---|
193 | );
|
---|
194 |
|
---|
195 | #------------------------------------------------------------------------------
|
---|
196 | # Parse DjVu annotation "s-expression" syntax (recursively)
|
---|
197 | # Inputs: 0) data ref (with pos($$dataPt) set to start of annotation)
|
---|
198 | # Returns: reference to list of tokens/references, or undef if no tokens,
|
---|
199 | # and the position in $$dataPt is set to end of last token
|
---|
200 | # Notes: The DjVu annotation syntax is not well documented, so I make
|
---|
201 | # a number of assumptions here!
|
---|
202 | sub ParseAnt($)
|
---|
203 | {
|
---|
204 | my $dataPt = shift;
|
---|
205 | my (@toks, $tok, $more);
|
---|
206 | # (the DjVu annotation syntax really sucks, and requires that every
|
---|
207 | # single token be parsed in order to properly scan through the items)
|
---|
208 | Tok: for (;;) {
|
---|
209 | # find the next token
|
---|
210 | last unless $$dataPt =~ /(\S)/sg; # get next non-space character
|
---|
211 | if ($1 eq '(') { # start of list
|
---|
212 | $tok = ParseAnt($dataPt);
|
---|
213 | } elsif ($1 eq ')') { # end of list
|
---|
214 | $more = 1;
|
---|
215 | last;
|
---|
216 | } elsif ($1 eq '"') { # quoted string
|
---|
217 | $tok = '';
|
---|
218 | for (;;) {
|
---|
219 | # get string up to the next quotation mark
|
---|
220 | # this doesn't work in perl 5.6.2! grrrr
|
---|
221 | # last Tok unless $$dataPt =~ /(.*?)"/sg;
|
---|
222 | # $tok .= $1;
|
---|
223 | my $pos = pos($$dataPt);
|
---|
224 | last Tok unless $$dataPt =~ /"/sg;
|
---|
225 | $tok .= substr($$dataPt, $pos, pos($$dataPt)-1-$pos);
|
---|
226 | # we're good unless quote was escaped by odd number of backslashes
|
---|
227 | last unless $tok =~ /(\\+)$/ and length($1) & 0x01;
|
---|
228 | $tok .= '"'; # quote is part of the string
|
---|
229 | }
|
---|
230 | # must protect unescaped "$" and "@" symbols, and "\" at end of string
|
---|
231 | $tok =~ s{\\(.)|([\$\@]|\\$)}{'\\'.($2 || $1)}sge;
|
---|
232 | # convert C escape sequences (allowed in quoted text)
|
---|
233 | $tok = eval qq{"$tok"};
|
---|
234 | } else { # key name
|
---|
235 | pos($$dataPt) = pos($$dataPt) - 1;
|
---|
236 | # allow anything in key but whitespace, braces and double quotes
|
---|
237 | # (this is one of those assumptions I mentioned)
|
---|
238 | $tok = $$dataPt =~ /([^\s()"]+)/sg ? $1 : undef;
|
---|
239 | }
|
---|
240 | push @toks, $tok if defined $tok;
|
---|
241 | }
|
---|
242 | # prevent further parsing unless more after this
|
---|
243 | pos($$dataPt) = length $$dataPt unless $more;
|
---|
244 | return @toks ? \@toks : undef;
|
---|
245 | }
|
---|
246 |
|
---|
247 | #------------------------------------------------------------------------------
|
---|
248 | # Process DjVu annotation chunk (ANTa or decoded ANTz)
|
---|
249 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
250 | # Returns: 1 on success
|
---|
251 | sub ProcessAnt($$$)
|
---|
252 | {
|
---|
253 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
254 | my $dataPt = $$dirInfo{DataPt};
|
---|
255 |
|
---|
256 | # quick pre-scan to check for metadata or XMP
|
---|
257 | return 1 unless $$dataPt =~ /\(\s*(metadata|xmp)[\s("]/s;
|
---|
258 |
|
---|
259 | # parse annotations into a tree structure
|
---|
260 | pos($$dataPt) = 0;
|
---|
261 | my $toks = ParseAnt($dataPt) or return 0;
|
---|
262 |
|
---|
263 | # process annotations individually
|
---|
264 | my $ant;
|
---|
265 | foreach $ant (@$toks) {
|
---|
266 | next unless ref $ant eq 'ARRAY' and @$ant >= 2;
|
---|
267 | my $tag = shift @$ant;
|
---|
268 | next if ref $tag or not defined $$tagTablePtr{$tag};
|
---|
269 | if ($tag eq 'metadata') {
|
---|
270 | # ProcessMeta() takes array reference
|
---|
271 | $et->HandleTag($tagTablePtr, $tag, $ant);
|
---|
272 | } else {
|
---|
273 | next if ref $$ant[0]; # only process simple values
|
---|
274 | $et->HandleTag($tagTablePtr, $tag, $$ant[0]);
|
---|
275 | }
|
---|
276 | }
|
---|
277 | return 1;
|
---|
278 | }
|
---|
279 |
|
---|
280 | #------------------------------------------------------------------------------
|
---|
281 | # Process DjVu metadata
|
---|
282 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
283 | # Returns: 1 on success
|
---|
284 | # Notes: input dirInfo DataPt is a reference to a list of pre-parsed metadata entries
|
---|
285 | sub ProcessMeta($$$)
|
---|
286 | {
|
---|
287 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
288 | my $dataPt = $$dirInfo{DataPt};
|
---|
289 | return 0 unless ref $$dataPt eq 'ARRAY';
|
---|
290 | $et->VerboseDir('Metadata', scalar @$$dataPt);
|
---|
291 | my ($item, $err);
|
---|
292 | foreach $item (@$$dataPt) {
|
---|
293 | # make sure item is a simple tag/value pair
|
---|
294 | $err=1, next unless ref $item eq 'ARRAY' and @$item >= 2 and
|
---|
295 | not ref $$item[0] and not ref $$item[1];
|
---|
296 | # add any new tags to the table
|
---|
297 | unless ($$tagTablePtr{$$item[0]}) {
|
---|
298 | my $name = $$item[0];
|
---|
299 | $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
|
---|
300 | length $name or $err = 1, next;
|
---|
301 | AddTagToTable($tagTablePtr, $$item[0], { Name => ucfirst($name) });
|
---|
302 | }
|
---|
303 | $et->HandleTag($tagTablePtr, $$item[0], $$item[1]);
|
---|
304 | }
|
---|
305 | $err and $et->Warn('Ignored invalid metadata entry(s)');
|
---|
306 | return 1;
|
---|
307 | }
|
---|
308 |
|
---|
309 | #------------------------------------------------------------------------------
|
---|
310 | # Process BZZ-compressed data (in DjVu images)
|
---|
311 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
312 | # Returns: 1 on success
|
---|
313 | sub ProcessBZZ($$$)
|
---|
314 | {
|
---|
315 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
316 | require Image::ExifTool::BZZ;
|
---|
317 | my $buff = Image::ExifTool::BZZ::Decode($$dirInfo{DataPt});
|
---|
318 | unless (defined $buff) {
|
---|
319 | $et->Warn("Error decoding $$dirInfo{DirName}");
|
---|
320 | return 0;
|
---|
321 | }
|
---|
322 | my $verbose = $et->Options('Verbose');
|
---|
323 | if ($verbose >= 3) {
|
---|
324 | # dump the decoded data in very verbose mode
|
---|
325 | $et->VerboseDir("Decoded $$dirInfo{DirName}", 0, length $buff);
|
---|
326 | $et->VerboseDump(\$buff);
|
---|
327 | }
|
---|
328 | $$dirInfo{DataPt} = \$buff;
|
---|
329 | $$dirInfo{DataLen} = $$dirInfo{DirLen} = length $buff;
|
---|
330 | # process the data using the default process proc for this table
|
---|
331 | my $processProc = $$tagTablePtr{PROCESS_PROC} or return 0;
|
---|
332 | return &$processProc($et, $dirInfo, $tagTablePtr);
|
---|
333 | }
|
---|
334 |
|
---|
335 | 1; # end
|
---|
336 |
|
---|
337 | __END__
|
---|
338 |
|
---|
339 | =head1 NAME
|
---|
340 |
|
---|
341 | Image::ExifTool::DjVu - Read DjVu meta information
|
---|
342 |
|
---|
343 | =head1 SYNOPSIS
|
---|
344 |
|
---|
345 | This module is used by Image::ExifTool
|
---|
346 |
|
---|
347 | =head1 DESCRIPTION
|
---|
348 |
|
---|
349 | This module contains definitions required by Image::ExifTool to extract meta
|
---|
350 | information from DjVu images. Parsing of the DjVu IFF structure is done by
|
---|
351 | Image::ExifTool::AIFF.
|
---|
352 |
|
---|
353 | =head1 AUTHOR
|
---|
354 |
|
---|
355 | Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
|
---|
356 |
|
---|
357 | This library is free software; you can redistribute it and/or modify it
|
---|
358 | under the same terms as Perl itself.
|
---|
359 |
|
---|
360 | =head1 REFERENCES
|
---|
361 |
|
---|
362 | =over 4
|
---|
363 |
|
---|
364 | =item L<http://djvu.sourceforge.net/>
|
---|
365 |
|
---|
366 | =item L<http://www.djvu.org/>
|
---|
367 |
|
---|
368 | =back
|
---|
369 |
|
---|
370 | =head1 SEE ALSO
|
---|
371 |
|
---|
372 | L<Image::ExifTool::TagNames/DjVu Tags>,
|
---|
373 | L<Image::ExifTool::AIFF(3pm)|Image::ExifTool::AIFF>,
|
---|
374 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
375 |
|
---|
376 | =cut
|
---|
377 |
|
---|