source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/Image/ExifTool/DjVu.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

  • Property svn:executable set to *
File size: 12.3 KB
Line 
1#------------------------------------------------------------------------------
2# File: DjVu.pm
3#
4# Description: Read DjVu archive meta information
5#
6# Revisions: 09/25/2008 - P. Harvey Created
7#
8# References: 1) http://djvu.sourceforge.net/ (DjVu v3 specification, Nov 2005)
9# 2) http://www.djvu.org/
10#
11# Notes: DjVu files are recognized and the IFF structure is processed
12# by Image::ExifTool::AIFF
13#------------------------------------------------------------------------------
14
15package Image::ExifTool::DjVu;
16
17use strict;
18use vars qw($VERSION);
19use Image::ExifTool qw(:DataAccess :Utils);
20
21$VERSION = '1.03';
22
23sub ParseAnt($);
24sub ProcessAnt($$$);
25sub ProcessMeta($$$);
26sub ProcessBZZ($$$);
27
28# DjVu chunks that we parse (ref 4)
29%Image::ExifTool::DjVu::Main = (
30 GROUPS => { 2 => 'Image' },
31 NOTES => 'Information is extracted from the following chunks in DjVu images.',
32 INFO => {
33 SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Info' },
34 },
35 FORM => {
36 TypeOnly => 1, # extract chunk type only, then descend into chunk
37 SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Form' },
38 },
39 ANTa => {
40 SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Ant' },
41 },
42 ANTz => {
43 Name => 'CompressedAnnotation',
44 SubDirectory => {
45 TagTable => 'Image::ExifTool::DjVu::Ant',
46 ProcessProc => \&ProcessBZZ,
47 }
48 },
49 INCL => 'IncludedFileID',
50);
51
52# information in the DjVu INFO chunk
53%Image::ExifTool::DjVu::Info = (
54 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
55 GROUPS => { 2 => 'Image' },
56 FORMAT => 'int8u',
57 PRIORITY => 0, # first INFO block takes priority
58 0 => {
59 Name => 'ImageWidth',
60 Format => 'int16u',
61 },
62 2 => {
63 Name => 'ImageHeight',
64 Format => 'int16u',
65 },
66 4 => {
67 Name => 'DjVuVersion',
68 Description => 'DjVu Version',
69 Format => 'int8u[2]',
70 # (this may be just one byte as with version 0.16)
71 ValueConv => '$val=~/(\d+) (\d+)/ ? "$2.$1" : "0.$val"',
72 },
73 6 => {
74 Name => 'SpatialResolution',
75 Format => 'int16u',
76 ValueConv => '(($val & 0xff)<<8) + ($val>>8)', # (little-endian!)
77 },
78 8 => {
79 Name => 'Gamma',
80 ValueConv => '$val / 10',
81 },
82 9 => {
83 Name => 'Orientation',
84 Mask => 0x07, # (upper 5 bits reserved)
85 PrintConv => {
86 1 => 'Horizontal (normal)',
87 2 => 'Rotate 180',
88 5 => 'Rotate 90 CW',
89 6 => 'Rotate 270 CW',
90 },
91 },
92);
93
94# information in the DjVu FORM chunk
95%Image::ExifTool::DjVu::Form = (
96 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
97 GROUPS => { 2 => 'Image' },
98 0 => {
99 Name => 'SubfileType',
100 Format => 'undef[4]',
101 Priority => 0,
102 PrintConv => {
103 DJVU => 'Single-page image',
104 DJVM => 'Multi-page document',
105 PM44 => 'Color IW44',
106 BM44 => 'Grayscale IW44',
107 DJVI => 'Shared component',
108 THUM => 'Thumbnail image',
109 },
110 },
111);
112
113# tags found in the DjVu annotation chunk (ANTz or ANTa)
114%Image::ExifTool::DjVu::Ant = (
115 PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessAnt,
116 GROUPS => { 2 => 'Image' },
117 NOTES => 'Information extracted from annotation chunks.',
118 # Note: For speed, ProcessAnt() pre-scans for known tag ID's, so if any
119 # new tags are added here they must also be added to the pre-scan check
120 metadata => {
121 SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Meta' }
122 },
123 xmp => {
124 Name => 'XMP',
125 SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }
126 },
127);
128
129# tags found in the DjVu annotation metadata
130%Image::ExifTool::DjVu::Meta = (
131 PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessMeta,
132 GROUPS => { 1 => 'DjVu-Meta', 2 => 'Image' },
133 NOTES => q{
134 This table lists the standard DjVu metadata tags, but ExifTool will extract
135 any tags that exist even if they don't appear here. The DjVu v3
136 documentation endorses tags borrowed from two standards: 1) BibTeX
137 bibliography system tags (all lowercase Tag ID's in the table below), and 2)
138 PDF DocInfo tags (uppercase Tag ID's).
139 },
140 # BibTeX tags (ref http://en.wikipedia.org/wiki/BibTeX)
141 address => { Groups => { 2 => 'Location' } },
142 annote => { Name => 'Annotation' },
143 author => { Groups => { 2 => 'Author' } },
144 booktitle => { Name => 'BookTitle' },
145 chapter => { },
146 crossref => { Name => 'CrossRef' },
147 edition => { },
148 eprint => { Name => 'EPrint' },
149 howpublished=> { Name => 'HowPublished' },
150 institution => { },
151 journal => { },
152 key => { },
153 month => { Groups => { 2 => 'Time' } },
154 note => { },
155 number => { },
156 organization=> { },
157 pages => { },
158 publisher => { },
159 school => { },
160 series => { },
161 title => { },
162 type => { },
163 url => { Name => 'URL' },
164 volume => { },
165 year => { Groups => { 2 => 'Time' } },
166 # PDF tags (same as Image::ExifTool::PDF::Info)
167 Title => { },
168 Author => { Groups => { 2 => 'Author' } },
169 Subject => { },
170 Keywords => { },
171 Creator => { },
172 Producer => { },
173 CreationDate => {
174 Name => 'CreateDate',
175 Groups => { 2 => 'Time' },
176 # RFC 3339 date/time format
177 ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
178 PrintConv => '$self->ConvertDateTime($val)',
179 },
180 ModDate => {
181 Name => 'ModifyDate',
182 Groups => { 2 => 'Time' },
183 ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
184 PrintConv => '$self->ConvertDateTime($val)',
185 },
186 Trapped => {
187 # remove leading '/' from '/True' or '/False'
188 ValueConv => '$val=~s{^/}{}; $val',
189 },
190);
191
192#------------------------------------------------------------------------------
193# Parse DjVu annotation "s-expression" syntax (recursively)
194# Inputs: 0) data ref (with pos($$dataPt) set to start of annotation)
195# Returns: reference to list of tokens/references, or undef if no tokens,
196# and the position in $$dataPt is set to end of last token
197# Notes: The DjVu annotation syntax is not well documented, so I make
198# a number of assumptions here!
199sub ParseAnt($)
200{
201 my $dataPt = shift;
202 my (@toks, $tok, $more);
203 # (the DjVu annotation syntax really sucks, and requires that every
204 # single token be parsed in order to properly scan through the items)
205Tok: for (;;) {
206 # find the next token
207 last unless $$dataPt =~ /(\S)/sg; # get next non-space character
208 if ($1 eq '(') { # start of list
209 $tok = ParseAnt($dataPt);
210 } elsif ($1 eq ')') { # end of list
211 $more = 1;
212 last;
213 } elsif ($1 eq '"') { # quoted string
214 $tok = '';
215 for (;;) {
216 # get string up to the next quotation mark
217 # this doesn't work in perl 5.6.2! grrrr
218 # last Tok unless $$dataPt =~ /(.*?)"/sg;
219 # $tok .= $1;
220 my $pos = pos($$dataPt);
221 last Tok unless $$dataPt =~ /"/sg;
222 $tok .= substr($$dataPt, $pos, pos($$dataPt)-1-$pos);
223 # we're good unless quote was escaped by odd number of backslashes
224 last unless $tok =~ /(\\+)$/ and length($1) & 0x01;
225 $tok .= '"'; # quote is part of the string
226 }
227 # convert C escape sequences (allowed in quoted text)
228 $tok = eval qq{"$tok"};
229 } else { # key name
230 pos($$dataPt) = pos($$dataPt) - 1;
231 # allow anything in key but whitespace, braces and double quotes
232 # (this is one of those assumptions I mentioned)
233 $$dataPt =~ /([^\s()"]+)/sg;
234 $tok = $1;
235 }
236 push @toks, $tok if defined $tok;
237 }
238 # prevent further parsing unless more after this
239 pos($$dataPt) = length $$dataPt unless $more;
240 return @toks ? \@toks : undef;
241}
242
243#------------------------------------------------------------------------------
244# Process DjVu annotation chunk (ANTa or decoded ANTz)
245# Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
246# Returns: 1 on success
247sub ProcessAnt($$$)
248{
249 my ($exifTool, $dirInfo, $tagTablePtr) = @_;
250 my $dataPt = $$dirInfo{DataPt};
251
252 # quick pre-scan to check for metadata or XMP
253 return 1 unless $$dataPt =~ /\(\s*(metadata|xmp)[\s("]/s;
254
255 # parse annotations into a tree structure
256 pos($$dataPt) = 0;
257 my $toks = ParseAnt($dataPt) or return 0;
258
259 # process annotations individually
260 my $ant;
261 foreach $ant (@$toks) {
262 next unless ref $ant eq 'ARRAY' and @$ant >= 2;
263 my $tag = shift @$ant;
264 next if ref $tag or not defined $$tagTablePtr{$tag};
265 if ($tag eq 'metadata') {
266 # ProcessMeta() takes array reference
267 $exifTool->HandleTag($tagTablePtr, $tag, $ant);
268 } else {
269 next if ref $$ant[0]; # only process simple values
270 $exifTool->HandleTag($tagTablePtr, $tag, $$ant[0]);
271 }
272 }
273 return 1;
274}
275
276#------------------------------------------------------------------------------
277# Process DjVu metadata
278# Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
279# Returns: 1 on success
280# Notes: input dirInfo DataPt is a reference to a list of pre-parsed metadata entries
281sub ProcessMeta($$$)
282{
283 my ($exifTool, $dirInfo, $tagTablePtr) = @_;
284 my $dataPt = $$dirInfo{DataPt};
285 return 0 unless ref $$dataPt eq 'ARRAY';
286 $exifTool->VerboseDir('Metadata', scalar @$$dataPt);
287 my ($item, $err);
288 foreach $item (@$$dataPt) {
289 # make sure item is a simple tag/value pair
290 $err=1, next unless ref $item eq 'ARRAY' and @$item >= 2 and
291 not ref $$item[0] and not ref $$item[1];
292 # add any new tags to the table
293 unless ($$tagTablePtr{$$item[0]}) {
294 my $name = $$item[0];
295 $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
296 length $name or $err = 1, next;
297 Image::ExifTool::AddTagToTable($tagTablePtr, $$item[0], { Name => ucfirst($name) });
298 }
299 $exifTool->HandleTag($tagTablePtr, $$item[0], $$item[1]);
300 }
301 $err and $exifTool->Warn('Ignored invalid metadata entry(s)');
302 return 1;
303}
304
305#------------------------------------------------------------------------------
306# Process BZZ-compressed data (in DjVu images)
307# Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
308# Returns: 1 on success
309sub ProcessBZZ($$$)
310{
311 my ($exifTool, $dirInfo, $tagTablePtr) = @_;
312 require Image::ExifTool::BZZ;
313 my $buff = Image::ExifTool::BZZ::Decode($$dirInfo{DataPt});
314 unless (defined $buff) {
315 $exifTool->Warn("Error decoding $$dirInfo{DirName}");
316 return 0;
317 }
318 my $verbose = $exifTool->Options('Verbose');
319 if ($verbose >= 3) {
320 # dump the decoded data in very verbose mode
321 $exifTool->VerboseDir("Decoded $$dirInfo{DirName}", 0, length $buff);
322 $exifTool->VerboseDump(\$buff);
323 }
324 $$dirInfo{DataPt} = \$buff;
325 $$dirInfo{DataLen} = $$dirInfo{DirLen} = length $buff;
326 # process the data using the default process proc for this table
327 my $processProc = $$tagTablePtr{PROCESS_PROC} or return 0;
328 return &$processProc($exifTool, $dirInfo, $tagTablePtr);
329}
330
3311; # end
332
333__END__
334
335=head1 NAME
336
337Image::ExifTool::DjVu - Read DjVu meta information
338
339=head1 SYNOPSIS
340
341This module is used by Image::ExifTool
342
343=head1 DESCRIPTION
344
345This module contains definitions required by Image::ExifTool to extract meta
346information from DjVu images. Parsing of the DjVu IFF structure is done by
347Image::ExifTool::AIFF.
348
349=head1 AUTHOR
350
351Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
352
353This library is free software; you can redistribute it and/or modify it
354under the same terms as Perl itself.
355
356=head1 REFERENCES
357
358=over 4
359
360=item L<http://djvu.sourceforge.net/>
361
362=item L<http://www.djvu.org/>
363
364=back
365
366=head1 SEE ALSO
367
368L<Image::ExifTool::TagNames/DjVu Tags>,
369L<Image::ExifTool::AIFF(3pm)|Image::ExifTool::AIFF>,
370L<Image::ExifTool(3pm)|Image::ExifTool>
371
372=cut
373
Note: See TracBrowser for help on using the repository browser.