[24107] | 1 | #------------------------------------------------------------------------------
|
---|
| 2 | # File: DjVu.pm
|
---|
| 3 | #
|
---|
| 4 | # Description: Read DjVu archive meta information
|
---|
| 5 | #
|
---|
| 6 | # Revisions: 09/25/2008 - P. Harvey Created
|
---|
| 7 | #
|
---|
| 8 | # References: 1) http://djvu.sourceforge.net/ (DjVu v3 specification, Nov 2005)
|
---|
| 9 | # 2) http://www.djvu.org/
|
---|
| 10 | #
|
---|
| 11 | # Notes: DjVu files are recognized and the IFF structure is processed
|
---|
| 12 | # by Image::ExifTool::AIFF
|
---|
| 13 | #------------------------------------------------------------------------------
|
---|
| 14 |
|
---|
| 15 | package Image::ExifTool::DjVu;
|
---|
| 16 |
|
---|
| 17 | use strict;
|
---|
| 18 | use vars qw($VERSION);
|
---|
| 19 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
| 20 |
|
---|
[34921] | 21 | $VERSION = '1.06';
|
---|
[24107] | 22 |
|
---|
| 23 | sub ParseAnt($);
|
---|
| 24 | sub ProcessAnt($$$);
|
---|
| 25 | sub ProcessMeta($$$);
|
---|
| 26 | sub ProcessBZZ($$$);
|
---|
| 27 |
|
---|
| 28 | # DjVu chunks that we parse (ref 4)
|
---|
| 29 | %Image::ExifTool::DjVu::Main = (
|
---|
| 30 | GROUPS => { 2 => 'Image' },
|
---|
[34921] | 31 | NOTES => q{
|
---|
| 32 | Information is extracted from the following chunks in DjVu images. See
|
---|
| 33 | L<http://www.djvu.org/> for the DjVu specification.
|
---|
| 34 | },
|
---|
[24107] | 35 | INFO => {
|
---|
| 36 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Info' },
|
---|
| 37 | },
|
---|
| 38 | FORM => {
|
---|
| 39 | TypeOnly => 1, # extract chunk type only, then descend into chunk
|
---|
| 40 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Form' },
|
---|
| 41 | },
|
---|
| 42 | ANTa => {
|
---|
| 43 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Ant' },
|
---|
| 44 | },
|
---|
| 45 | ANTz => {
|
---|
| 46 | Name => 'CompressedAnnotation',
|
---|
| 47 | SubDirectory => {
|
---|
| 48 | TagTable => 'Image::ExifTool::DjVu::Ant',
|
---|
| 49 | ProcessProc => \&ProcessBZZ,
|
---|
| 50 | }
|
---|
| 51 | },
|
---|
| 52 | INCL => 'IncludedFileID',
|
---|
| 53 | );
|
---|
| 54 |
|
---|
| 55 | # information in the DjVu INFO chunk
|
---|
| 56 | %Image::ExifTool::DjVu::Info = (
|
---|
| 57 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
| 58 | GROUPS => { 2 => 'Image' },
|
---|
| 59 | FORMAT => 'int8u',
|
---|
| 60 | PRIORITY => 0, # first INFO block takes priority
|
---|
| 61 | 0 => {
|
---|
| 62 | Name => 'ImageWidth',
|
---|
| 63 | Format => 'int16u',
|
---|
| 64 | },
|
---|
| 65 | 2 => {
|
---|
| 66 | Name => 'ImageHeight',
|
---|
| 67 | Format => 'int16u',
|
---|
| 68 | },
|
---|
| 69 | 4 => {
|
---|
| 70 | Name => 'DjVuVersion',
|
---|
| 71 | Description => 'DjVu Version',
|
---|
| 72 | Format => 'int8u[2]',
|
---|
| 73 | # (this may be just one byte as with version 0.16)
|
---|
| 74 | ValueConv => '$val=~/(\d+) (\d+)/ ? "$2.$1" : "0.$val"',
|
---|
| 75 | },
|
---|
| 76 | 6 => {
|
---|
| 77 | Name => 'SpatialResolution',
|
---|
| 78 | Format => 'int16u',
|
---|
| 79 | ValueConv => '(($val & 0xff)<<8) + ($val>>8)', # (little-endian!)
|
---|
| 80 | },
|
---|
| 81 | 8 => {
|
---|
| 82 | Name => 'Gamma',
|
---|
| 83 | ValueConv => '$val / 10',
|
---|
| 84 | },
|
---|
| 85 | 9 => {
|
---|
| 86 | Name => 'Orientation',
|
---|
| 87 | Mask => 0x07, # (upper 5 bits reserved)
|
---|
| 88 | PrintConv => {
|
---|
| 89 | 1 => 'Horizontal (normal)',
|
---|
| 90 | 2 => 'Rotate 180',
|
---|
| 91 | 5 => 'Rotate 90 CW',
|
---|
| 92 | 6 => 'Rotate 270 CW',
|
---|
| 93 | },
|
---|
| 94 | },
|
---|
| 95 | );
|
---|
| 96 |
|
---|
| 97 | # information in the DjVu FORM chunk
|
---|
| 98 | %Image::ExifTool::DjVu::Form = (
|
---|
| 99 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
| 100 | GROUPS => { 2 => 'Image' },
|
---|
| 101 | 0 => {
|
---|
| 102 | Name => 'SubfileType',
|
---|
| 103 | Format => 'undef[4]',
|
---|
| 104 | Priority => 0,
|
---|
| 105 | PrintConv => {
|
---|
| 106 | DJVU => 'Single-page image',
|
---|
| 107 | DJVM => 'Multi-page document',
|
---|
| 108 | PM44 => 'Color IW44',
|
---|
| 109 | BM44 => 'Grayscale IW44',
|
---|
| 110 | DJVI => 'Shared component',
|
---|
| 111 | THUM => 'Thumbnail image',
|
---|
| 112 | },
|
---|
| 113 | },
|
---|
| 114 | );
|
---|
| 115 |
|
---|
| 116 | # tags found in the DjVu annotation chunk (ANTz or ANTa)
|
---|
| 117 | %Image::ExifTool::DjVu::Ant = (
|
---|
| 118 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessAnt,
|
---|
| 119 | GROUPS => { 2 => 'Image' },
|
---|
| 120 | NOTES => 'Information extracted from annotation chunks.',
|
---|
| 121 | # Note: For speed, ProcessAnt() pre-scans for known tag ID's, so if any
|
---|
| 122 | # new tags are added here they must also be added to the pre-scan check
|
---|
| 123 | metadata => {
|
---|
| 124 | SubDirectory => { TagTable => 'Image::ExifTool::DjVu::Meta' }
|
---|
| 125 | },
|
---|
| 126 | xmp => {
|
---|
| 127 | Name => 'XMP',
|
---|
| 128 | SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }
|
---|
| 129 | },
|
---|
| 130 | );
|
---|
| 131 |
|
---|
| 132 | # tags found in the DjVu annotation metadata
|
---|
| 133 | %Image::ExifTool::DjVu::Meta = (
|
---|
| 134 | PROCESS_PROC => \&Image::ExifTool::DjVu::ProcessMeta,
|
---|
| 135 | GROUPS => { 1 => 'DjVu-Meta', 2 => 'Image' },
|
---|
| 136 | NOTES => q{
|
---|
| 137 | This table lists the standard DjVu metadata tags, but ExifTool will extract
|
---|
| 138 | any tags that exist even if they don't appear here. The DjVu v3
|
---|
| 139 | documentation endorses tags borrowed from two standards: 1) BibTeX
|
---|
| 140 | bibliography system tags (all lowercase Tag ID's in the table below), and 2)
|
---|
[34921] | 141 | PDF DocInfo tags (capitalized Tag ID's).
|
---|
[24107] | 142 | },
|
---|
| 143 | # BibTeX tags (ref http://en.wikipedia.org/wiki/BibTeX)
|
---|
| 144 | address => { Groups => { 2 => 'Location' } },
|
---|
| 145 | annote => { Name => 'Annotation' },
|
---|
| 146 | author => { Groups => { 2 => 'Author' } },
|
---|
| 147 | booktitle => { Name => 'BookTitle' },
|
---|
| 148 | chapter => { },
|
---|
| 149 | crossref => { Name => 'CrossRef' },
|
---|
| 150 | edition => { },
|
---|
| 151 | eprint => { Name => 'EPrint' },
|
---|
| 152 | howpublished=> { Name => 'HowPublished' },
|
---|
| 153 | institution => { },
|
---|
| 154 | journal => { },
|
---|
| 155 | key => { },
|
---|
| 156 | month => { Groups => { 2 => 'Time' } },
|
---|
| 157 | note => { },
|
---|
| 158 | number => { },
|
---|
| 159 | organization=> { },
|
---|
| 160 | pages => { },
|
---|
| 161 | publisher => { },
|
---|
| 162 | school => { },
|
---|
| 163 | series => { },
|
---|
| 164 | title => { },
|
---|
| 165 | type => { },
|
---|
| 166 | url => { Name => 'URL' },
|
---|
| 167 | volume => { },
|
---|
| 168 | year => { Groups => { 2 => 'Time' } },
|
---|
| 169 | # PDF tags (same as Image::ExifTool::PDF::Info)
|
---|
| 170 | Title => { },
|
---|
| 171 | Author => { Groups => { 2 => 'Author' } },
|
---|
| 172 | Subject => { },
|
---|
| 173 | Keywords => { },
|
---|
| 174 | Creator => { },
|
---|
| 175 | Producer => { },
|
---|
| 176 | CreationDate => {
|
---|
| 177 | Name => 'CreateDate',
|
---|
| 178 | Groups => { 2 => 'Time' },
|
---|
| 179 | # RFC 3339 date/time format
|
---|
| 180 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
| 181 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
| 182 | },
|
---|
| 183 | ModDate => {
|
---|
| 184 | Name => 'ModifyDate',
|
---|
| 185 | Groups => { 2 => 'Time' },
|
---|
| 186 | ValueConv => 'require Image::ExifTool::XMP; Image::ExifTool::XMP::ConvertXMPDate($val)',
|
---|
| 187 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
| 188 | },
|
---|
| 189 | Trapped => {
|
---|
| 190 | # remove leading '/' from '/True' or '/False'
|
---|
| 191 | ValueConv => '$val=~s{^/}{}; $val',
|
---|
| 192 | },
|
---|
| 193 | );
|
---|
| 194 |
|
---|
| 195 | #------------------------------------------------------------------------------
|
---|
| 196 | # Parse DjVu annotation "s-expression" syntax (recursively)
|
---|
| 197 | # Inputs: 0) data ref (with pos($$dataPt) set to start of annotation)
|
---|
| 198 | # Returns: reference to list of tokens/references, or undef if no tokens,
|
---|
| 199 | # and the position in $$dataPt is set to end of last token
|
---|
| 200 | # Notes: The DjVu annotation syntax is not well documented, so I make
|
---|
| 201 | # a number of assumptions here!
|
---|
| 202 | sub ParseAnt($)
|
---|
| 203 | {
|
---|
| 204 | my $dataPt = shift;
|
---|
| 205 | my (@toks, $tok, $more);
|
---|
| 206 | # (the DjVu annotation syntax really sucks, and requires that every
|
---|
| 207 | # single token be parsed in order to properly scan through the items)
|
---|
| 208 | Tok: for (;;) {
|
---|
| 209 | # find the next token
|
---|
| 210 | last unless $$dataPt =~ /(\S)/sg; # get next non-space character
|
---|
| 211 | if ($1 eq '(') { # start of list
|
---|
| 212 | $tok = ParseAnt($dataPt);
|
---|
| 213 | } elsif ($1 eq ')') { # end of list
|
---|
| 214 | $more = 1;
|
---|
| 215 | last;
|
---|
| 216 | } elsif ($1 eq '"') { # quoted string
|
---|
| 217 | $tok = '';
|
---|
| 218 | for (;;) {
|
---|
| 219 | # get string up to the next quotation mark
|
---|
| 220 | # this doesn't work in perl 5.6.2! grrrr
|
---|
| 221 | # last Tok unless $$dataPt =~ /(.*?)"/sg;
|
---|
| 222 | # $tok .= $1;
|
---|
| 223 | my $pos = pos($$dataPt);
|
---|
| 224 | last Tok unless $$dataPt =~ /"/sg;
|
---|
| 225 | $tok .= substr($$dataPt, $pos, pos($$dataPt)-1-$pos);
|
---|
| 226 | # we're good unless quote was escaped by odd number of backslashes
|
---|
| 227 | last unless $tok =~ /(\\+)$/ and length($1) & 0x01;
|
---|
| 228 | $tok .= '"'; # quote is part of the string
|
---|
| 229 | }
|
---|
[34921] | 230 | # must protect unescaped "$" and "@" symbols, and "\" at end of string
|
---|
| 231 | $tok =~ s{\\(.)|([\$\@]|\\$)}{'\\'.($2 || $1)}sge;
|
---|
[24107] | 232 | # convert C escape sequences (allowed in quoted text)
|
---|
| 233 | $tok = eval qq{"$tok"};
|
---|
| 234 | } else { # key name
|
---|
| 235 | pos($$dataPt) = pos($$dataPt) - 1;
|
---|
| 236 | # allow anything in key but whitespace, braces and double quotes
|
---|
| 237 | # (this is one of those assumptions I mentioned)
|
---|
[34921] | 238 | $tok = $$dataPt =~ /([^\s()"]+)/sg ? $1 : undef;
|
---|
[24107] | 239 | }
|
---|
| 240 | push @toks, $tok if defined $tok;
|
---|
| 241 | }
|
---|
| 242 | # prevent further parsing unless more after this
|
---|
| 243 | pos($$dataPt) = length $$dataPt unless $more;
|
---|
| 244 | return @toks ? \@toks : undef;
|
---|
| 245 | }
|
---|
| 246 |
|
---|
| 247 | #------------------------------------------------------------------------------
|
---|
| 248 | # Process DjVu annotation chunk (ANTa or decoded ANTz)
|
---|
| 249 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
| 250 | # Returns: 1 on success
|
---|
| 251 | sub ProcessAnt($$$)
|
---|
| 252 | {
|
---|
[34921] | 253 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
[24107] | 254 | my $dataPt = $$dirInfo{DataPt};
|
---|
| 255 |
|
---|
| 256 | # quick pre-scan to check for metadata or XMP
|
---|
| 257 | return 1 unless $$dataPt =~ /\(\s*(metadata|xmp)[\s("]/s;
|
---|
| 258 |
|
---|
| 259 | # parse annotations into a tree structure
|
---|
| 260 | pos($$dataPt) = 0;
|
---|
| 261 | my $toks = ParseAnt($dataPt) or return 0;
|
---|
| 262 |
|
---|
| 263 | # process annotations individually
|
---|
| 264 | my $ant;
|
---|
| 265 | foreach $ant (@$toks) {
|
---|
| 266 | next unless ref $ant eq 'ARRAY' and @$ant >= 2;
|
---|
| 267 | my $tag = shift @$ant;
|
---|
| 268 | next if ref $tag or not defined $$tagTablePtr{$tag};
|
---|
| 269 | if ($tag eq 'metadata') {
|
---|
| 270 | # ProcessMeta() takes array reference
|
---|
[34921] | 271 | $et->HandleTag($tagTablePtr, $tag, $ant);
|
---|
[24107] | 272 | } else {
|
---|
| 273 | next if ref $$ant[0]; # only process simple values
|
---|
[34921] | 274 | $et->HandleTag($tagTablePtr, $tag, $$ant[0]);
|
---|
[24107] | 275 | }
|
---|
| 276 | }
|
---|
| 277 | return 1;
|
---|
| 278 | }
|
---|
| 279 |
|
---|
| 280 | #------------------------------------------------------------------------------
|
---|
| 281 | # Process DjVu metadata
|
---|
| 282 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
| 283 | # Returns: 1 on success
|
---|
| 284 | # Notes: input dirInfo DataPt is a reference to a list of pre-parsed metadata entries
|
---|
| 285 | sub ProcessMeta($$$)
|
---|
| 286 | {
|
---|
[34921] | 287 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
[24107] | 288 | my $dataPt = $$dirInfo{DataPt};
|
---|
| 289 | return 0 unless ref $$dataPt eq 'ARRAY';
|
---|
[34921] | 290 | $et->VerboseDir('Metadata', scalar @$$dataPt);
|
---|
[24107] | 291 | my ($item, $err);
|
---|
| 292 | foreach $item (@$$dataPt) {
|
---|
| 293 | # make sure item is a simple tag/value pair
|
---|
| 294 | $err=1, next unless ref $item eq 'ARRAY' and @$item >= 2 and
|
---|
| 295 | not ref $$item[0] and not ref $$item[1];
|
---|
| 296 | # add any new tags to the table
|
---|
| 297 | unless ($$tagTablePtr{$$item[0]}) {
|
---|
| 298 | my $name = $$item[0];
|
---|
| 299 | $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
|
---|
| 300 | length $name or $err = 1, next;
|
---|
[34921] | 301 | AddTagToTable($tagTablePtr, $$item[0], { Name => ucfirst($name) });
|
---|
[24107] | 302 | }
|
---|
[34921] | 303 | $et->HandleTag($tagTablePtr, $$item[0], $$item[1]);
|
---|
[24107] | 304 | }
|
---|
[34921] | 305 | $err and $et->Warn('Ignored invalid metadata entry(s)');
|
---|
[24107] | 306 | return 1;
|
---|
| 307 | }
|
---|
| 308 |
|
---|
| 309 | #------------------------------------------------------------------------------
|
---|
| 310 | # Process BZZ-compressed data (in DjVu images)
|
---|
| 311 | # Inputs: 0) ExifTool object reference, 1) DirInfo reference, 2) tag table ref
|
---|
| 312 | # Returns: 1 on success
|
---|
| 313 | sub ProcessBZZ($$$)
|
---|
| 314 | {
|
---|
[34921] | 315 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
[24107] | 316 | require Image::ExifTool::BZZ;
|
---|
| 317 | my $buff = Image::ExifTool::BZZ::Decode($$dirInfo{DataPt});
|
---|
| 318 | unless (defined $buff) {
|
---|
[34921] | 319 | $et->Warn("Error decoding $$dirInfo{DirName}");
|
---|
[24107] | 320 | return 0;
|
---|
| 321 | }
|
---|
[34921] | 322 | my $verbose = $et->Options('Verbose');
|
---|
[24107] | 323 | if ($verbose >= 3) {
|
---|
| 324 | # dump the decoded data in very verbose mode
|
---|
[34921] | 325 | $et->VerboseDir("Decoded $$dirInfo{DirName}", 0, length $buff);
|
---|
| 326 | $et->VerboseDump(\$buff);
|
---|
[24107] | 327 | }
|
---|
| 328 | $$dirInfo{DataPt} = \$buff;
|
---|
| 329 | $$dirInfo{DataLen} = $$dirInfo{DirLen} = length $buff;
|
---|
| 330 | # process the data using the default process proc for this table
|
---|
| 331 | my $processProc = $$tagTablePtr{PROCESS_PROC} or return 0;
|
---|
[34921] | 332 | return &$processProc($et, $dirInfo, $tagTablePtr);
|
---|
[24107] | 333 | }
|
---|
| 334 |
|
---|
| 335 | 1; # end
|
---|
| 336 |
|
---|
| 337 | __END__
|
---|
| 338 |
|
---|
| 339 | =head1 NAME
|
---|
| 340 |
|
---|
| 341 | Image::ExifTool::DjVu - Read DjVu meta information
|
---|
| 342 |
|
---|
| 343 | =head1 SYNOPSIS
|
---|
| 344 |
|
---|
| 345 | This module is used by Image::ExifTool
|
---|
| 346 |
|
---|
| 347 | =head1 DESCRIPTION
|
---|
| 348 |
|
---|
| 349 | This module contains definitions required by Image::ExifTool to extract meta
|
---|
| 350 | information from DjVu images. Parsing of the DjVu IFF structure is done by
|
---|
| 351 | Image::ExifTool::AIFF.
|
---|
| 352 |
|
---|
| 353 | =head1 AUTHOR
|
---|
| 354 |
|
---|
[34921] | 355 | Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
|
---|
[24107] | 356 |
|
---|
| 357 | This library is free software; you can redistribute it and/or modify it
|
---|
| 358 | under the same terms as Perl itself.
|
---|
| 359 |
|
---|
| 360 | =head1 REFERENCES
|
---|
| 361 |
|
---|
| 362 | =over 4
|
---|
| 363 |
|
---|
| 364 | =item L<http://djvu.sourceforge.net/>
|
---|
| 365 |
|
---|
| 366 | =item L<http://www.djvu.org/>
|
---|
| 367 |
|
---|
| 368 | =back
|
---|
| 369 |
|
---|
| 370 | =head1 SEE ALSO
|
---|
| 371 |
|
---|
| 372 | L<Image::ExifTool::TagNames/DjVu Tags>,
|
---|
| 373 | L<Image::ExifTool::AIFF(3pm)|Image::ExifTool::AIFF>,
|
---|
| 374 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
| 375 |
|
---|
| 376 | =cut
|
---|
| 377 |
|
---|