source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Rawzor.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 6.2 KB
Line 
1#------------------------------------------------------------------------------
2# File: Rawzor.pm
3#
4# Description: Read meta information from Rawzor compressed images
5#
6# Revisions: 09/09/2008 - P. Harvey Created
7#
8# References: 1) http://www.rawzor.com/
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::Rawzor;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16
17$VERSION = '1.05';
18
19# currently support this version Rawzor images
20my $implementedRawzorVersion = 199; # (up to version 1.99)
21
22# Rawzor-specific tags
23%Image::ExifTool::Rawzor::Main = (
24 GROUPS => { 2 => 'Other' },
25 VARS => { NO_ID => 1 },
26 NOTES => q{
27 Rawzor files store compressed images of other formats. As well as the
28 information listed below, exiftool uncompresses and extracts the meta
29 information from the original image.
30 },
31 OriginalFileType => { },
32 OriginalFileSize => {
33 PrintConv => $Image::ExifTool::Extra{FileSize}->{PrintConv},
34 },
35 RawzorRequiredVersion => {
36 ValueConv => '$val / 100',
37 PrintConv => 'sprintf("%.2f", $val)',
38 },
39 RawzorCreatorVersion => {
40 ValueConv => '$val / 100',
41 PrintConv => 'sprintf("%.2f", $val)',
42 },
43 # compression factor is originalSize/compressedSize (and compression
44 # ratio is the inverse - ref "Data Compression" by David Salomon)
45 CompressionFactor => { PrintConv => 'sprintf("%.2f", $val)' },
46);
47
48#------------------------------------------------------------------------------
49# Extract information from a Rawzor file
50# Inputs: 0) ExifTool object reference, 1) dirInfo reference
51# Returns: 1 on success, 0 if this wasn't a valid Rawzor file
52sub ProcessRWZ($$)
53{
54 my ($et, $dirInfo) = @_;
55 my $raf = $$dirInfo{RAF};
56 my ($buff, $buf2);
57
58 # read the Rawzor file header:
59 # 0 string - "rawzor" signature
60 # 6 int16u - Required SDK version
61 # 8 int16u - Creator SDK version
62 # 10 int64u - RWZ file size
63 # 18 int64u - original raw file size
64 # 26 undef[12] - reserved
65 # 38 int64u - metadata offset
66 $raf->Read($buff, 46) == 46 and $buff =~ /^rawzor/ or return 0;
67
68 SetByteOrder('II');
69 my $reqVers = Get16u(\$buff, 6);
70 my $creatorVers = Get16u(\$buff, 8);
71 my $rwzSize = Get64u(\$buff, 10);
72 my $origSize = Get64u(\$buff, 18);
73 my $tagTablePtr = GetTagTable('Image::ExifTool::Rawzor::Main');
74 $et->HandleTag($tagTablePtr, RawzorRequiredVersion => $reqVers);
75 $et->HandleTag($tagTablePtr, RawzorCreatorVersion => $creatorVers);
76 $et->HandleTag($tagTablePtr, OriginalFileSize => $origSize);
77 $et->HandleTag($tagTablePtr, CompressionFactor => $origSize/$rwzSize) if $rwzSize;
78 # check version numbers
79 if ($reqVers > $implementedRawzorVersion) {
80 $et->Warn("Version $reqVers Rawzor images not yet supported");
81 return 1;
82 }
83 my $metaOffset = Get64u(\$buff, 38);
84 if ($metaOffset > 0x7fffffff) {
85 $et->Warn('Bad metadata offset');
86 return 1;
87 }
88 # check for the ability to uncompress the information
89 unless (eval { require IO::Uncompress::Bunzip2 }) {
90 $et->Warn('Install IO::Compress::Bzip2 to decode Rawzor bzip2 compression');
91 return 1;
92 }
93 # read the metadata header:
94 # 0 int64u - metadata section 0 end (offset in original file)
95 # 8 int64u - metadata section 1 start
96 # 16 int64u - metadata section 1 end
97 # 24 int64u - metadata section 2 start
98 # 32 undef[4] - reserved
99 # 36 int32u - original metadata size
100 # 40 int32u - compressed metadata size
101 unless ($raf->Seek($metaOffset, 0) and $raf->Read($buff, 44) == 44) {
102 $et->Warn('Error reading metadata header');
103 return 1;
104 }
105 my $metaSize = Get32u(\$buff, 36);
106 if ($metaSize) {
107 $$et{DontValidateImageData} = 1;
108 # validate the metadata header and read the compressed metadata
109 my $end0 = Get64u(\$buff, 0);
110 my $pos1 = Get64u(\$buff, 8);
111 my $end1 = Get64u(\$buff, 16);
112 my $pos2 = Get64u(\$buff, 24);
113 my $len = Get32u(\$buff, 40);
114 unless ($raf->Read($buff, $len) == $len and
115 $end0 + ($end1 - $pos1) + ($origSize - $pos2) == $metaSize and
116 $end0 <= $pos1 and $pos1 <= $end1 and $end1 <= $pos2)
117 {
118 $et->Warn('Error reading image metadata');
119 return 1;
120 }
121 # uncompress the metadata
122 unless (IO::Uncompress::Bunzip2::bunzip2(\$buff, \$buf2) and
123 length($buf2) eq $metaSize)
124 {
125 $et->Warn('Error uncompressing image metadata');
126 return 1;
127 }
128 # re-assemble the original file (sans image data)
129 undef $buff; # (can't hurt to free memory as soon as possible)
130 $buff = substr($buf2, 0, $end0) . ("\0" x ($pos1 - $end0)) .
131 substr($buf2, $end0, $end1 - $pos1) . ("\0" x ($pos2 - $end1)) .
132 substr($buf2, $end0 + $end1 - $pos1, $origSize - $pos2);
133 undef $buf2;
134
135 # extract original information by calling ExtractInfo recursively
136 $et->ExtractInfo(\$buff, { ReEntry => 1 });
137 undef $buff;
138 }
139 # set OriginalFileType from FileType of original file
140 # then change FileType and MIMEType to indicate a Rawzor image
141 my $origFileType = $$et{VALUE}{FileType};
142 if ($origFileType) {
143 $et->HandleTag($tagTablePtr, OriginalFileType => $origFileType);
144 $et->OverrideFileType('RWZ');
145 } else {
146 $et->HandleTag($tagTablePtr, OriginalFileType => 'Unknown');
147 $et->SetFileType();
148 }
149 return 1;
150}
151
1521; # end
153
154__END__
155
156=head1 NAME
157
158Image::ExifTool::Rawzor - Read meta information from Rawzor compressed images
159
160=head1 SYNOPSIS
161
162This module is used by Image::ExifTool
163
164=head1 DESCRIPTION
165
166This module contains definitions required by Image::ExifTool to extract meta
167information from Rawzor compressed images.
168
169=head1 AUTHOR
170
171Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
172
173This library is free software; you can redistribute it and/or modify it
174under the same terms as Perl itself.
175
176=head1 REFERENCES
177
178=over 4
179
180=item L<http://www.rawzor.com/>
181
182=back
183
184=head1 SEE ALSO
185
186L<Image::ExifTool::TagNames/Rawzor Tags>,
187L<Image::ExifTool(3pm)|Image::ExifTool>
188
189=cut
190
Note: See TracBrowser for help on using the repository browser.