1 | #------------------------------------------------------------------------------
|
---|
2 | # File: Palm.pm
|
---|
3 | #
|
---|
4 | # Description: Read Palm Database files
|
---|
5 | #
|
---|
6 | # Revisions: 2014/05/28 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://wiki.mobileread.com/wiki/PDB
|
---|
9 | # 2) http://wiki.mobileread.com/wiki/MOBI
|
---|
10 | #------------------------------------------------------------------------------
|
---|
11 |
|
---|
12 | package Image::ExifTool::Palm;
|
---|
13 |
|
---|
14 | use strict;
|
---|
15 | use vars qw($VERSION);
|
---|
16 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
17 |
|
---|
18 | $VERSION = '1.00';
|
---|
19 |
|
---|
20 | sub ProcessEXTH($$$);
|
---|
21 |
|
---|
22 | # type/creator ID's for Palm database files
|
---|
23 | my %palmTypes = (
|
---|
24 | '.pdfADBE' => 'Adobe Reader',
|
---|
25 | 'TEXtREAd' => 'PalmDOC',
|
---|
26 | 'BVokBDIC' => 'BDicty',
|
---|
27 | 'DB99DBOS' => 'DB (Database program)',
|
---|
28 | 'PNRdPPrs' => 'eReader',
|
---|
29 | 'DataPPrs' => 'eReader',
|
---|
30 | 'vIMGView' => 'FireViewer (ImageViewer)',
|
---|
31 | 'PmDBPmDB' => 'HanDBase',
|
---|
32 | 'InfoINDB' => 'InfoView',
|
---|
33 | 'ToGoToGo' => 'iSilo',
|
---|
34 | 'SDocSilX' => 'iSilo 3',
|
---|
35 | 'JbDbJBas' => 'JFile',
|
---|
36 | 'JfDbJFil' => 'JFile Pro',
|
---|
37 | 'DATALSdb' => 'LIST',
|
---|
38 | 'Mdb1Mdb1' => 'MobileDB',
|
---|
39 | 'BOOKMOBI' => 'Mobipocket',
|
---|
40 | 'DataPlkr' => 'Plucker',
|
---|
41 | 'DataSprd' => 'QuickSheet',
|
---|
42 | 'SM01SMem' => 'SuperMemo',
|
---|
43 | 'TEXtTlDc' => 'TealDoc',
|
---|
44 | 'InfoTlIf' => 'TealInfo',
|
---|
45 | 'DataTlMl' => 'TealMeal',
|
---|
46 | 'DataTlPt' => 'TealPaint',
|
---|
47 | 'dataTDBP' => 'ThinkDB',
|
---|
48 | 'TdatTide' => 'Tides',
|
---|
49 | 'ToRaTRPW' => 'TomeRaider',
|
---|
50 | 'zTXTGPlm' => 'Weasel',
|
---|
51 | 'BDOCWrdS' => 'WordSmith',
|
---|
52 | );
|
---|
53 |
|
---|
54 | my %dateTimeInfo = (
|
---|
55 | # like QuickTime, the time zero should be Jan 1, 1904, but not all software writes this,
|
---|
56 | # so assume a time zero of Jan 1, 1970 if the date is before this
|
---|
57 | RawConv => q{
|
---|
58 | my $offset = (66 * 365 + 17) * 24 * 3600;
|
---|
59 | return $val - $offset if $val >= $offset;
|
---|
60 | return $val;
|
---|
61 | },
|
---|
62 | ValueConv => 'ConvertUnixTime($val, 1)', # (UTC written by "EPUB Converter", ref PH)
|
---|
63 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
64 | );
|
---|
65 |
|
---|
66 | # Palm Database header information
|
---|
67 | %Image::ExifTool::Palm::Main = (
|
---|
68 | GROUPS => { 0 => 'Palm', 1 => 'Palm', 2 => 'Document' },
|
---|
69 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
70 | FORMAT => 'int32u',
|
---|
71 | NOTES => q{
|
---|
72 | Information extracted from Palm database files (PDB and PRC extensions),
|
---|
73 | Mobipocket electronic books (MOBI), and Amazon Kindle KF7 and KF8 books (AZW
|
---|
74 | and AZW3).
|
---|
75 | },
|
---|
76 | 0 => { Name => 'DatabaseName', Format => 'string[32]' },
|
---|
77 | # 8 - int16u: file attributes (not very useful)
|
---|
78 | # 8.5 - int16u: version
|
---|
79 | 9 => {
|
---|
80 | Name => 'CreateDate',
|
---|
81 | Groups => { 2 => 'Time' },
|
---|
82 | %dateTimeInfo,
|
---|
83 | },
|
---|
84 | 10 => {
|
---|
85 | Name => 'ModifyDate',
|
---|
86 | Groups => { 2 => 'Time' },
|
---|
87 | %dateTimeInfo,
|
---|
88 | },
|
---|
89 | 11 => {
|
---|
90 | Name => 'LastBackupDate',
|
---|
91 | Groups => { 2 => 'Time' },
|
---|
92 | %dateTimeInfo,
|
---|
93 | },
|
---|
94 | 12 => 'ModificationNumber',
|
---|
95 | 15 => {
|
---|
96 | Name => 'PalmFileType',
|
---|
97 | Format => 'undef[8]',
|
---|
98 | PrintConv => \%palmTypes,
|
---|
99 | },
|
---|
100 | );
|
---|
101 |
|
---|
102 |
|
---|
103 | # MOBI header tags
|
---|
104 | %Image::ExifTool::Palm::MOBI = (
|
---|
105 | GROUPS => { 0 => 'Palm', 1 => 'MOBI', 2 => 'Document' },
|
---|
106 | NOTES => q{
|
---|
107 | Information extracted from the MOBI header of Mobipocket and Amazon Kindle
|
---|
108 | KF7 and KF8 files.
|
---|
109 | },
|
---|
110 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
111 | FORMAT => 'int32u',
|
---|
112 | 0 => {
|
---|
113 | Name => 'Compression',
|
---|
114 | Format => 'int16u',
|
---|
115 | PrintConv => {
|
---|
116 | 1 => 'None',
|
---|
117 | 2 => 'PalmDOC',
|
---|
118 | 17480 => 'HUFF/CDIC',
|
---|
119 | },
|
---|
120 | },
|
---|
121 | 1 => {
|
---|
122 | Name => 'UncompressedTextLength',
|
---|
123 | PrintConv => \&Image::ExifTool::ConvertFileSize,
|
---|
124 | },
|
---|
125 | 3 => {
|
---|
126 | Name => 'Encryption',
|
---|
127 | PrintConv => {
|
---|
128 | 0 => 'None',
|
---|
129 | 1 => 'Old Mobipocket',
|
---|
130 | 2 => 'Mobipocket',
|
---|
131 | },
|
---|
132 | },
|
---|
133 | 6 => {
|
---|
134 | Name => 'MobiType',
|
---|
135 | PrintConv => {
|
---|
136 | 2 => 'Mobipocket Book',
|
---|
137 | 3 => 'PalmDoc Book',
|
---|
138 | 4 => 'Audio',
|
---|
139 | 232 => 'mobipocket? generated by kindlegen1.2',
|
---|
140 | 248 => 'KF8: generated by kindlegen2',
|
---|
141 | 257 => 'News',
|
---|
142 | 258 => 'News_Feed',
|
---|
143 | 259 => 'News_Magazine',
|
---|
144 | 513 => 'PICS',
|
---|
145 | 514 => 'WORD',
|
---|
146 | 515 => 'XLS',
|
---|
147 | 516 => 'PPT',
|
---|
148 | 517 => 'TEXT',
|
---|
149 | 518 => 'HTML',
|
---|
150 | },
|
---|
151 | },
|
---|
152 | 7 => {
|
---|
153 | Name => 'CodePage',
|
---|
154 | RawConv => '$$self{CodePage} = $val',
|
---|
155 | PrintConv => {
|
---|
156 | # just define commonly used code pages
|
---|
157 | # (a much more complete list may be found in FlashPix.pm)
|
---|
158 | 1252 => 'Windows Latin 1 (Western European)',
|
---|
159 | 65001 => 'Unicode (UTF-8)',
|
---|
160 | },
|
---|
161 | },
|
---|
162 | 9 => 'MobiVersion',
|
---|
163 | 21 => 'BookName', # this is actually an offset, but replace it with the string later
|
---|
164 | 26 => 'MinimumVersion',
|
---|
165 | );
|
---|
166 |
|
---|
167 | # MOBI extended header tags
|
---|
168 | %Image::ExifTool::Palm::EXTH = (
|
---|
169 | GROUPS => { 0 => 'Palm', 1 => 'MOBI', 2 => 'Document' },
|
---|
170 | FORMAT => 'string',
|
---|
171 | NOTES => 'Information extracted from the MOBI extended header.',
|
---|
172 | PROCESS_PROC => \&ProcessEXTH,
|
---|
173 | 1 => 'DRMServerID',
|
---|
174 | 2 => 'DRMCommerceID',
|
---|
175 | 3 => 'DRM_E-BookBaseID',
|
---|
176 | 100 => { Name => 'Author', Groups => { 2 => 'Author' } },
|
---|
177 | 101 => 'Publisher',
|
---|
178 | 102 => 'Imprint',
|
---|
179 | 103 => 'Description',
|
---|
180 | 104 => 'ISBN',
|
---|
181 | 105 => { Name => 'Subject', List => 1 },
|
---|
182 | 106 => {
|
---|
183 | Name => 'PublishDate',
|
---|
184 | Groups => { 2 => 'Time' },
|
---|
185 | ValueConv => q{
|
---|
186 | require Image::ExifTool::XMP;
|
---|
187 | Image::ExifTool::XMP::ConvertXMPDate($val, 1);
|
---|
188 | },
|
---|
189 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
190 | },
|
---|
191 | 107 => 'Review',
|
---|
192 | 108 => 'Contributor',
|
---|
193 | 109 => { Name => 'Rights', Groups => { 2 => 'Author' } },
|
---|
194 | 110 => 'SubjectCode',
|
---|
195 | 111 => 'BookType',
|
---|
196 | 112 => 'Source',
|
---|
197 | 113 => 'ASIN',
|
---|
198 | 114 => 'BookVersion',
|
---|
199 | 115 => { Name => 'SampleFlag', Format => 'int32u' },
|
---|
200 | 116 => { Name => 'StartReading', Format => 'int32u' },
|
---|
201 | 117 => 'Adult',
|
---|
202 | 118 => 'RetailPrice',
|
---|
203 | 119 => 'RetailPriceCurrency',
|
---|
204 | # 121 => 'KF8BoundaryOffset',
|
---|
205 | 125 => { Name => 'ResourceCount', Format => 'int32u' },
|
---|
206 | 129 => 'KF8CoverURI',
|
---|
207 | 200 => 'DictionaryShortName',
|
---|
208 | # 201 => { Name => 'CoverOffset', Format => 'int32u' },
|
---|
209 | # 202 => { Name => 'ThumbOffset', Format => 'int32u' },
|
---|
210 | # 203 => 'HasFakeCover',
|
---|
211 | 204 => {
|
---|
212 | Name => 'CreatorSoftware',
|
---|
213 | Format => 'int32u',
|
---|
214 | PrintConv => {
|
---|
215 | 1 => 'Mobigen',
|
---|
216 | 2 => 'Mobipocket',
|
---|
217 | 200 => 'Kindlegen (Windows)',
|
---|
218 | 201 => 'Kindlegen (Linux)',
|
---|
219 | 202 => 'Kindlegen (Mac)',
|
---|
220 | },
|
---|
221 | },
|
---|
222 | 205 => { Name => 'CreatorMajorVersion', Format => 'int32u' },
|
---|
223 | 206 => { Name => 'CreatorMinorVersion', Format => 'int32u' },
|
---|
224 | 207 => { Name => 'CreatorBuildNumber', Format => 'int32u' },
|
---|
225 | 208 => 'Watermark',
|
---|
226 | 209 => 'Tamper-proofKeys',
|
---|
227 | # 300 => 'FontSignature',
|
---|
228 | 401 => { Name => 'ClippingLimit', Format => 'int8u' },
|
---|
229 | 402 => 'PublisherLimit',
|
---|
230 | 404 => {
|
---|
231 | Name => 'TextToSpeech',
|
---|
232 | Format => 'int8u',
|
---|
233 | PrintConv => { 0 => 'Enabled', 1 => 'Disabled' },
|
---|
234 | },
|
---|
235 | 405 => { Name => 'RentalFlag', Format => 'int8u' }, #?
|
---|
236 | 406 => 'RentalExpirationDate',
|
---|
237 | 501 => { Name => 'CDEType', Format => 'int32u' },
|
---|
238 | 502 => 'LastUpdateTime',
|
---|
239 | 503 => 'UpdatedTitle',
|
---|
240 | 504 => 'ASIN2',
|
---|
241 | 524 => 'Language',
|
---|
242 | 525 => 'Alignment',
|
---|
243 | 535 => 'CreatorBuildNumber2',
|
---|
244 | );
|
---|
245 |
|
---|
246 | #------------------------------------------------------------------------------
|
---|
247 | # Process the MOBI extended header
|
---|
248 | # Inputs: 0) ExifTool ref, 1) dirInfo ref, 2) tag table ref
|
---|
249 | # Returns: 1 (EXTH should have already been validated)
|
---|
250 | sub ProcessEXTH($$$)
|
---|
251 | {
|
---|
252 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
253 | my $dataPt = $$dirInfo{DataPt};
|
---|
254 | my $dataPos = $$dirInfo{DataPos};
|
---|
255 | my $enc = $$dirInfo{Encoding} || 'UTF8';
|
---|
256 | my $dirLen = length $$dataPt;
|
---|
257 | my ($index, $pos);
|
---|
258 |
|
---|
259 | $et->VerboseDir('EXTH', $$dirInfo{NumEntries}, $dirLen);
|
---|
260 |
|
---|
261 | # process the EXTH entries
|
---|
262 | for ($index=0, $pos=0; ; ++$index) {
|
---|
263 | last if $pos + 8 > $dirLen;
|
---|
264 | my $tag = Get32u($dataPt, $pos);
|
---|
265 | my $len = Get32u($dataPt, $pos + 4);
|
---|
266 | last if $len < 8 or $pos + $len > $dirLen;
|
---|
267 | my $key = $et->HandleTag($tagTablePtr, $tag, undef,
|
---|
268 | DataPt => $dataPt,
|
---|
269 | DataPos => $dataPos,
|
---|
270 | Start => $pos + 8,
|
---|
271 | Size => $len - 8,
|
---|
272 | Index => $index,
|
---|
273 | );
|
---|
274 | # recode text if necessary
|
---|
275 | $$et{VALUE}{$key} = $et->Decode($$et{VALUE}{$key}, $enc) if $key;
|
---|
276 | $pos += $len;
|
---|
277 | }
|
---|
278 | return 1;
|
---|
279 | }
|
---|
280 |
|
---|
281 | #------------------------------------------------------------------------------
|
---|
282 | # Extract information from a Palm DB file
|
---|
283 | # Inputs: 0) ExifTool ref, 1) dirInfo reference
|
---|
284 | # Returns: 1 if this was a recognized PDB file, 0 otherwise
|
---|
285 | sub ProcessPDB($$)
|
---|
286 | {
|
---|
287 | my ($et, $dirInfo) = @_;
|
---|
288 | my $raf = $$dirInfo{RAF};
|
---|
289 | my ($buff, $buf2, $size, $enc);
|
---|
290 | my $verbose = $et->Options('Verbose');
|
---|
291 |
|
---|
292 | # verify this is a valid Palm DB file
|
---|
293 | return 0 unless $raf->Read($buff, 86) == 86;
|
---|
294 | my $type = $palmTypes{substr($buff, 60, 8)};
|
---|
295 | return 0 unless $type;
|
---|
296 | #
|
---|
297 | # Read and process the Palm DB file header
|
---|
298 | #
|
---|
299 | $et->SetFileType($type eq 'Mobipocket' ? 'MOBI' : 'PDB');
|
---|
300 | SetByteOrder('MM');
|
---|
301 |
|
---|
302 | my $tagTablePtr = GetTagTable('Image::ExifTool::Palm::Main');
|
---|
303 | $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
|
---|
304 |
|
---|
305 | return 1 unless $type eq 'Mobipocket' and Get16u(\$buff, 76);
|
---|
306 | #
|
---|
307 | # Read and process MOBI header (should be the first record)
|
---|
308 | #
|
---|
309 | my $offset = Get32u(\$buff, 78); # get offset to first record
|
---|
310 | unless ($raf->Seek($offset, 0) and $raf->Read($buff, 274) == 274) {
|
---|
311 | $et->Warn('Truncated MOBI header');
|
---|
312 | return 1;
|
---|
313 | }
|
---|
314 | unless (substr($buff, 16, 4) eq 'MOBI') {
|
---|
315 | $et->Warn('Invalid MOBI header');
|
---|
316 | return 1;
|
---|
317 | }
|
---|
318 | $tagTablePtr = GetTagTable('Image::ExifTool::Palm::MOBI');
|
---|
319 | $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
|
---|
320 |
|
---|
321 | # get text encoding
|
---|
322 | $enc = $Image::ExifTool::charsetName{"cp$$et{CodePage}"} if $$et{CodePage};
|
---|
323 | $enc = 'UTF8' unless $enc;
|
---|
324 |
|
---|
325 | # extract the BookName string
|
---|
326 | my $off = Get32u(\$buff, 84);
|
---|
327 | my $len = Get32u(\$buff, 88);
|
---|
328 |
|
---|
329 | $raf->Seek($offset+$off, 0) and $raf->Read($buf2, $len) == $len or $buf2 = '<err>';
|
---|
330 | $$et{VALUE}{BookName} = $et->Decode($buf2, $enc);
|
---|
331 | #
|
---|
332 | # Process the MOBI extended header if it exists
|
---|
333 | #
|
---|
334 | # first, check the flag bit to see if the EXTH record should exist
|
---|
335 | my $flag = Get32u(\$buff, 128);
|
---|
336 | return 1 unless $flag & 0x40; # check extended header flag
|
---|
337 |
|
---|
338 | $len = Get32u(\$buff, 20) + 16; # MOBI header length (including PalmDOC header)
|
---|
339 |
|
---|
340 | unless ($raf->Seek($offset+$len, 0) and $raf->Read($buf2, 12) == 12 and
|
---|
341 | substr($buf2,0,4) eq 'EXTH' and ($size = Get32u(\$buf2, 4)) > 12)
|
---|
342 | {
|
---|
343 | $et->Warn('Invalid MOBI extended header');
|
---|
344 | return 1;
|
---|
345 | }
|
---|
346 |
|
---|
347 | # read and process the MOBI extended header
|
---|
348 | $size -= 12;
|
---|
349 | $raf->Read($buff, $size) == $size or $et->Warn('Truncated MOBI extended header'), return 1;
|
---|
350 | my %dirInfo = (
|
---|
351 | DataPt => \$buff,
|
---|
352 | DataPos => $offset + $len + 12,
|
---|
353 | NumEntries => Get32u(\$buf2, 8),
|
---|
354 | Encoding => $enc,
|
---|
355 | );
|
---|
356 | $tagTablePtr = GetTagTable('Image::ExifTool::Palm::EXTH');
|
---|
357 | $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
|
---|
358 |
|
---|
359 | return 1;
|
---|
360 | }
|
---|
361 |
|
---|
362 | 1; # end
|
---|
363 |
|
---|
364 | __END__
|
---|
365 |
|
---|
366 | =head1 NAME
|
---|
367 |
|
---|
368 | Image::ExifTool::Palm - Read Palm Database files
|
---|
369 |
|
---|
370 | =head1 SYNOPSIS
|
---|
371 |
|
---|
372 | This module is used by Image::ExifTool
|
---|
373 |
|
---|
374 | =head1 DESCRIPTION
|
---|
375 |
|
---|
376 | This module contains code to extract metadata from Palm database files (PDB
|
---|
377 | and PRC extensions), Mobipocket electronic books (MOBI), and Amazon Kindle
|
---|
378 | KF7 and KF8 books (AZW and AZW3).
|
---|
379 |
|
---|
380 | =head1 AUTHOR
|
---|
381 |
|
---|
382 | Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
|
---|
383 |
|
---|
384 | This library is free software; you can redistribute it and/or modify it
|
---|
385 | under the same terms as Perl itself.
|
---|
386 |
|
---|
387 | =head1 REFERENCES
|
---|
388 |
|
---|
389 | =over 4
|
---|
390 |
|
---|
391 | =item L<http://wiki.mobileread.com/wiki/PDB>
|
---|
392 |
|
---|
393 | =item L<http://wiki.mobileread.com/wiki/MOBI>
|
---|
394 |
|
---|
395 | =back
|
---|
396 |
|
---|
397 | =head1 SEE ALSO
|
---|
398 |
|
---|
399 | L<Image::ExifTool::TagNames/Palm Tags>,
|
---|
400 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
401 |
|
---|
402 | =cut
|
---|
403 |
|
---|