[34921] | 1 | #------------------------------------------------------------------------------
|
---|
| 2 | # File: Palm.pm
|
---|
| 3 | #
|
---|
| 4 | # Description: Read Palm Database files
|
---|
| 5 | #
|
---|
| 6 | # Revisions: 2014/05/28 - P. Harvey Created
|
---|
| 7 | #
|
---|
| 8 | # References: 1) http://wiki.mobileread.com/wiki/PDB
|
---|
| 9 | # 2) http://wiki.mobileread.com/wiki/MOBI
|
---|
| 10 | #------------------------------------------------------------------------------
|
---|
| 11 |
|
---|
| 12 | package Image::ExifTool::Palm;
|
---|
| 13 |
|
---|
| 14 | use strict;
|
---|
| 15 | use vars qw($VERSION);
|
---|
| 16 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
| 17 |
|
---|
| 18 | $VERSION = '1.00';
|
---|
| 19 |
|
---|
| 20 | sub ProcessEXTH($$$);
|
---|
| 21 |
|
---|
| 22 | # type/creator ID's for Palm database files
|
---|
| 23 | my %palmTypes = (
|
---|
| 24 | '.pdfADBE' => 'Adobe Reader',
|
---|
| 25 | 'TEXtREAd' => 'PalmDOC',
|
---|
| 26 | 'BVokBDIC' => 'BDicty',
|
---|
| 27 | 'DB99DBOS' => 'DB (Database program)',
|
---|
| 28 | 'PNRdPPrs' => 'eReader',
|
---|
| 29 | 'DataPPrs' => 'eReader',
|
---|
| 30 | 'vIMGView' => 'FireViewer (ImageViewer)',
|
---|
| 31 | 'PmDBPmDB' => 'HanDBase',
|
---|
| 32 | 'InfoINDB' => 'InfoView',
|
---|
| 33 | 'ToGoToGo' => 'iSilo',
|
---|
| 34 | 'SDocSilX' => 'iSilo 3',
|
---|
| 35 | 'JbDbJBas' => 'JFile',
|
---|
| 36 | 'JfDbJFil' => 'JFile Pro',
|
---|
| 37 | 'DATALSdb' => 'LIST',
|
---|
| 38 | 'Mdb1Mdb1' => 'MobileDB',
|
---|
| 39 | 'BOOKMOBI' => 'Mobipocket',
|
---|
| 40 | 'DataPlkr' => 'Plucker',
|
---|
| 41 | 'DataSprd' => 'QuickSheet',
|
---|
| 42 | 'SM01SMem' => 'SuperMemo',
|
---|
| 43 | 'TEXtTlDc' => 'TealDoc',
|
---|
| 44 | 'InfoTlIf' => 'TealInfo',
|
---|
| 45 | 'DataTlMl' => 'TealMeal',
|
---|
| 46 | 'DataTlPt' => 'TealPaint',
|
---|
| 47 | 'dataTDBP' => 'ThinkDB',
|
---|
| 48 | 'TdatTide' => 'Tides',
|
---|
| 49 | 'ToRaTRPW' => 'TomeRaider',
|
---|
| 50 | 'zTXTGPlm' => 'Weasel',
|
---|
| 51 | 'BDOCWrdS' => 'WordSmith',
|
---|
| 52 | );
|
---|
| 53 |
|
---|
| 54 | my %dateTimeInfo = (
|
---|
| 55 | # like QuickTime, the time zero should be Jan 1, 1904, but not all software writes this,
|
---|
| 56 | # so assume a time zero of Jan 1, 1970 if the date is before this
|
---|
| 57 | RawConv => q{
|
---|
| 58 | my $offset = (66 * 365 + 17) * 24 * 3600;
|
---|
| 59 | return $val - $offset if $val >= $offset;
|
---|
| 60 | return $val;
|
---|
| 61 | },
|
---|
| 62 | ValueConv => 'ConvertUnixTime($val, 1)', # (UTC written by "EPUB Converter", ref PH)
|
---|
| 63 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
| 64 | );
|
---|
| 65 |
|
---|
| 66 | # Palm Database header information
|
---|
| 67 | %Image::ExifTool::Palm::Main = (
|
---|
| 68 | GROUPS => { 0 => 'Palm', 1 => 'Palm', 2 => 'Document' },
|
---|
| 69 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
| 70 | FORMAT => 'int32u',
|
---|
| 71 | NOTES => q{
|
---|
| 72 | Information extracted from Palm database files (PDB and PRC extensions),
|
---|
| 73 | Mobipocket electronic books (MOBI), and Amazon Kindle KF7 and KF8 books (AZW
|
---|
| 74 | and AZW3).
|
---|
| 75 | },
|
---|
| 76 | 0 => { Name => 'DatabaseName', Format => 'string[32]' },
|
---|
| 77 | # 8 - int16u: file attributes (not very useful)
|
---|
| 78 | # 8.5 - int16u: version
|
---|
| 79 | 9 => {
|
---|
| 80 | Name => 'CreateDate',
|
---|
| 81 | Groups => { 2 => 'Time' },
|
---|
| 82 | %dateTimeInfo,
|
---|
| 83 | },
|
---|
| 84 | 10 => {
|
---|
| 85 | Name => 'ModifyDate',
|
---|
| 86 | Groups => { 2 => 'Time' },
|
---|
| 87 | %dateTimeInfo,
|
---|
| 88 | },
|
---|
| 89 | 11 => {
|
---|
| 90 | Name => 'LastBackupDate',
|
---|
| 91 | Groups => { 2 => 'Time' },
|
---|
| 92 | %dateTimeInfo,
|
---|
| 93 | },
|
---|
| 94 | 12 => 'ModificationNumber',
|
---|
| 95 | 15 => {
|
---|
| 96 | Name => 'PalmFileType',
|
---|
| 97 | Format => 'undef[8]',
|
---|
| 98 | PrintConv => \%palmTypes,
|
---|
| 99 | },
|
---|
| 100 | );
|
---|
| 101 |
|
---|
| 102 |
|
---|
| 103 | # MOBI header tags
|
---|
| 104 | %Image::ExifTool::Palm::MOBI = (
|
---|
| 105 | GROUPS => { 0 => 'Palm', 1 => 'MOBI', 2 => 'Document' },
|
---|
| 106 | NOTES => q{
|
---|
| 107 | Information extracted from the MOBI header of Mobipocket and Amazon Kindle
|
---|
| 108 | KF7 and KF8 files.
|
---|
| 109 | },
|
---|
| 110 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
| 111 | FORMAT => 'int32u',
|
---|
| 112 | 0 => {
|
---|
| 113 | Name => 'Compression',
|
---|
| 114 | Format => 'int16u',
|
---|
| 115 | PrintConv => {
|
---|
| 116 | 1 => 'None',
|
---|
| 117 | 2 => 'PalmDOC',
|
---|
| 118 | 17480 => 'HUFF/CDIC',
|
---|
| 119 | },
|
---|
| 120 | },
|
---|
| 121 | 1 => {
|
---|
| 122 | Name => 'UncompressedTextLength',
|
---|
| 123 | PrintConv => \&Image::ExifTool::ConvertFileSize,
|
---|
| 124 | },
|
---|
| 125 | 3 => {
|
---|
| 126 | Name => 'Encryption',
|
---|
| 127 | PrintConv => {
|
---|
| 128 | 0 => 'None',
|
---|
| 129 | 1 => 'Old Mobipocket',
|
---|
| 130 | 2 => 'Mobipocket',
|
---|
| 131 | },
|
---|
| 132 | },
|
---|
| 133 | 6 => {
|
---|
| 134 | Name => 'MobiType',
|
---|
| 135 | PrintConv => {
|
---|
| 136 | 2 => 'Mobipocket Book',
|
---|
| 137 | 3 => 'PalmDoc Book',
|
---|
| 138 | 4 => 'Audio',
|
---|
| 139 | 232 => 'mobipocket? generated by kindlegen1.2',
|
---|
| 140 | 248 => 'KF8: generated by kindlegen2',
|
---|
| 141 | 257 => 'News',
|
---|
| 142 | 258 => 'News_Feed',
|
---|
| 143 | 259 => 'News_Magazine',
|
---|
| 144 | 513 => 'PICS',
|
---|
| 145 | 514 => 'WORD',
|
---|
| 146 | 515 => 'XLS',
|
---|
| 147 | 516 => 'PPT',
|
---|
| 148 | 517 => 'TEXT',
|
---|
| 149 | 518 => 'HTML',
|
---|
| 150 | },
|
---|
| 151 | },
|
---|
| 152 | 7 => {
|
---|
| 153 | Name => 'CodePage',
|
---|
| 154 | RawConv => '$$self{CodePage} = $val',
|
---|
| 155 | PrintConv => {
|
---|
| 156 | # just define commonly used code pages
|
---|
| 157 | # (a much more complete list may be found in FlashPix.pm)
|
---|
| 158 | 1252 => 'Windows Latin 1 (Western European)',
|
---|
| 159 | 65001 => 'Unicode (UTF-8)',
|
---|
| 160 | },
|
---|
| 161 | },
|
---|
| 162 | 9 => 'MobiVersion',
|
---|
| 163 | 21 => 'BookName', # this is actually an offset, but replace it with the string later
|
---|
| 164 | 26 => 'MinimumVersion',
|
---|
| 165 | );
|
---|
| 166 |
|
---|
| 167 | # MOBI extended header tags
|
---|
| 168 | %Image::ExifTool::Palm::EXTH = (
|
---|
| 169 | GROUPS => { 0 => 'Palm', 1 => 'MOBI', 2 => 'Document' },
|
---|
| 170 | FORMAT => 'string',
|
---|
| 171 | NOTES => 'Information extracted from the MOBI extended header.',
|
---|
| 172 | PROCESS_PROC => \&ProcessEXTH,
|
---|
| 173 | 1 => 'DRMServerID',
|
---|
| 174 | 2 => 'DRMCommerceID',
|
---|
| 175 | 3 => 'DRM_E-BookBaseID',
|
---|
| 176 | 100 => { Name => 'Author', Groups => { 2 => 'Author' } },
|
---|
| 177 | 101 => 'Publisher',
|
---|
| 178 | 102 => 'Imprint',
|
---|
| 179 | 103 => 'Description',
|
---|
| 180 | 104 => 'ISBN',
|
---|
| 181 | 105 => { Name => 'Subject', List => 1 },
|
---|
| 182 | 106 => {
|
---|
| 183 | Name => 'PublishDate',
|
---|
| 184 | Groups => { 2 => 'Time' },
|
---|
| 185 | ValueConv => q{
|
---|
| 186 | require Image::ExifTool::XMP;
|
---|
| 187 | Image::ExifTool::XMP::ConvertXMPDate($val, 1);
|
---|
| 188 | },
|
---|
| 189 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
| 190 | },
|
---|
| 191 | 107 => 'Review',
|
---|
| 192 | 108 => 'Contributor',
|
---|
| 193 | 109 => { Name => 'Rights', Groups => { 2 => 'Author' } },
|
---|
| 194 | 110 => 'SubjectCode',
|
---|
| 195 | 111 => 'BookType',
|
---|
| 196 | 112 => 'Source',
|
---|
| 197 | 113 => 'ASIN',
|
---|
| 198 | 114 => 'BookVersion',
|
---|
| 199 | 115 => { Name => 'SampleFlag', Format => 'int32u' },
|
---|
| 200 | 116 => { Name => 'StartReading', Format => 'int32u' },
|
---|
| 201 | 117 => 'Adult',
|
---|
| 202 | 118 => 'RetailPrice',
|
---|
| 203 | 119 => 'RetailPriceCurrency',
|
---|
| 204 | # 121 => 'KF8BoundaryOffset',
|
---|
| 205 | 125 => { Name => 'ResourceCount', Format => 'int32u' },
|
---|
| 206 | 129 => 'KF8CoverURI',
|
---|
| 207 | 200 => 'DictionaryShortName',
|
---|
| 208 | # 201 => { Name => 'CoverOffset', Format => 'int32u' },
|
---|
| 209 | # 202 => { Name => 'ThumbOffset', Format => 'int32u' },
|
---|
| 210 | # 203 => 'HasFakeCover',
|
---|
| 211 | 204 => {
|
---|
| 212 | Name => 'CreatorSoftware',
|
---|
| 213 | Format => 'int32u',
|
---|
| 214 | PrintConv => {
|
---|
| 215 | 1 => 'Mobigen',
|
---|
| 216 | 2 => 'Mobipocket',
|
---|
| 217 | 200 => 'Kindlegen (Windows)',
|
---|
| 218 | 201 => 'Kindlegen (Linux)',
|
---|
| 219 | 202 => 'Kindlegen (Mac)',
|
---|
| 220 | },
|
---|
| 221 | },
|
---|
| 222 | 205 => { Name => 'CreatorMajorVersion', Format => 'int32u' },
|
---|
| 223 | 206 => { Name => 'CreatorMinorVersion', Format => 'int32u' },
|
---|
| 224 | 207 => { Name => 'CreatorBuildNumber', Format => 'int32u' },
|
---|
| 225 | 208 => 'Watermark',
|
---|
| 226 | 209 => 'Tamper-proofKeys',
|
---|
| 227 | # 300 => 'FontSignature',
|
---|
| 228 | 401 => { Name => 'ClippingLimit', Format => 'int8u' },
|
---|
| 229 | 402 => 'PublisherLimit',
|
---|
| 230 | 404 => {
|
---|
| 231 | Name => 'TextToSpeech',
|
---|
| 232 | Format => 'int8u',
|
---|
| 233 | PrintConv => { 0 => 'Enabled', 1 => 'Disabled' },
|
---|
| 234 | },
|
---|
| 235 | 405 => { Name => 'RentalFlag', Format => 'int8u' }, #?
|
---|
| 236 | 406 => 'RentalExpirationDate',
|
---|
| 237 | 501 => { Name => 'CDEType', Format => 'int32u' },
|
---|
| 238 | 502 => 'LastUpdateTime',
|
---|
| 239 | 503 => 'UpdatedTitle',
|
---|
| 240 | 504 => 'ASIN2',
|
---|
| 241 | 524 => 'Language',
|
---|
| 242 | 525 => 'Alignment',
|
---|
| 243 | 535 => 'CreatorBuildNumber2',
|
---|
| 244 | );
|
---|
| 245 |
|
---|
| 246 | #------------------------------------------------------------------------------
|
---|
| 247 | # Process the MOBI extended header
|
---|
| 248 | # Inputs: 0) ExifTool ref, 1) dirInfo ref, 2) tag table ref
|
---|
| 249 | # Returns: 1 (EXTH should have already been validated)
|
---|
| 250 | sub ProcessEXTH($$$)
|
---|
| 251 | {
|
---|
| 252 | my ($et, $dirInfo, $tagTablePtr) = @_;
|
---|
| 253 | my $dataPt = $$dirInfo{DataPt};
|
---|
| 254 | my $dataPos = $$dirInfo{DataPos};
|
---|
| 255 | my $enc = $$dirInfo{Encoding} || 'UTF8';
|
---|
| 256 | my $dirLen = length $$dataPt;
|
---|
| 257 | my ($index, $pos);
|
---|
| 258 |
|
---|
| 259 | $et->VerboseDir('EXTH', $$dirInfo{NumEntries}, $dirLen);
|
---|
| 260 |
|
---|
| 261 | # process the EXTH entries
|
---|
| 262 | for ($index=0, $pos=0; ; ++$index) {
|
---|
| 263 | last if $pos + 8 > $dirLen;
|
---|
| 264 | my $tag = Get32u($dataPt, $pos);
|
---|
| 265 | my $len = Get32u($dataPt, $pos + 4);
|
---|
| 266 | last if $len < 8 or $pos + $len > $dirLen;
|
---|
| 267 | my $key = $et->HandleTag($tagTablePtr, $tag, undef,
|
---|
| 268 | DataPt => $dataPt,
|
---|
| 269 | DataPos => $dataPos,
|
---|
| 270 | Start => $pos + 8,
|
---|
| 271 | Size => $len - 8,
|
---|
| 272 | Index => $index,
|
---|
| 273 | );
|
---|
| 274 | # recode text if necessary
|
---|
| 275 | $$et{VALUE}{$key} = $et->Decode($$et{VALUE}{$key}, $enc) if $key;
|
---|
| 276 | $pos += $len;
|
---|
| 277 | }
|
---|
| 278 | return 1;
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | #------------------------------------------------------------------------------
|
---|
| 282 | # Extract information from a Palm DB file
|
---|
| 283 | # Inputs: 0) ExifTool ref, 1) dirInfo reference
|
---|
| 284 | # Returns: 1 if this was a recognized PDB file, 0 otherwise
|
---|
| 285 | sub ProcessPDB($$)
|
---|
| 286 | {
|
---|
| 287 | my ($et, $dirInfo) = @_;
|
---|
| 288 | my $raf = $$dirInfo{RAF};
|
---|
| 289 | my ($buff, $buf2, $size, $enc);
|
---|
| 290 | my $verbose = $et->Options('Verbose');
|
---|
| 291 |
|
---|
| 292 | # verify this is a valid Palm DB file
|
---|
| 293 | return 0 unless $raf->Read($buff, 86) == 86;
|
---|
| 294 | my $type = $palmTypes{substr($buff, 60, 8)};
|
---|
| 295 | return 0 unless $type;
|
---|
| 296 | #
|
---|
| 297 | # Read and process the Palm DB file header
|
---|
| 298 | #
|
---|
| 299 | $et->SetFileType($type eq 'Mobipocket' ? 'MOBI' : 'PDB');
|
---|
| 300 | SetByteOrder('MM');
|
---|
| 301 |
|
---|
| 302 | my $tagTablePtr = GetTagTable('Image::ExifTool::Palm::Main');
|
---|
| 303 | $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
|
---|
| 304 |
|
---|
| 305 | return 1 unless $type eq 'Mobipocket' and Get16u(\$buff, 76);
|
---|
| 306 | #
|
---|
| 307 | # Read and process MOBI header (should be the first record)
|
---|
| 308 | #
|
---|
| 309 | my $offset = Get32u(\$buff, 78); # get offset to first record
|
---|
| 310 | unless ($raf->Seek($offset, 0) and $raf->Read($buff, 274) == 274) {
|
---|
| 311 | $et->Warn('Truncated MOBI header');
|
---|
| 312 | return 1;
|
---|
| 313 | }
|
---|
| 314 | unless (substr($buff, 16, 4) eq 'MOBI') {
|
---|
| 315 | $et->Warn('Invalid MOBI header');
|
---|
| 316 | return 1;
|
---|
| 317 | }
|
---|
| 318 | $tagTablePtr = GetTagTable('Image::ExifTool::Palm::MOBI');
|
---|
| 319 | $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
|
---|
| 320 |
|
---|
| 321 | # get text encoding
|
---|
| 322 | $enc = $Image::ExifTool::charsetName{"cp$$et{CodePage}"} if $$et{CodePage};
|
---|
| 323 | $enc = 'UTF8' unless $enc;
|
---|
| 324 |
|
---|
| 325 | # extract the BookName string
|
---|
| 326 | my $off = Get32u(\$buff, 84);
|
---|
| 327 | my $len = Get32u(\$buff, 88);
|
---|
| 328 |
|
---|
| 329 | $raf->Seek($offset+$off, 0) and $raf->Read($buf2, $len) == $len or $buf2 = '<err>';
|
---|
| 330 | $$et{VALUE}{BookName} = $et->Decode($buf2, $enc);
|
---|
| 331 | #
|
---|
| 332 | # Process the MOBI extended header if it exists
|
---|
| 333 | #
|
---|
| 334 | # first, check the flag bit to see if the EXTH record should exist
|
---|
| 335 | my $flag = Get32u(\$buff, 128);
|
---|
| 336 | return 1 unless $flag & 0x40; # check extended header flag
|
---|
| 337 |
|
---|
| 338 | $len = Get32u(\$buff, 20) + 16; # MOBI header length (including PalmDOC header)
|
---|
| 339 |
|
---|
| 340 | unless ($raf->Seek($offset+$len, 0) and $raf->Read($buf2, 12) == 12 and
|
---|
| 341 | substr($buf2,0,4) eq 'EXTH' and ($size = Get32u(\$buf2, 4)) > 12)
|
---|
| 342 | {
|
---|
| 343 | $et->Warn('Invalid MOBI extended header');
|
---|
| 344 | return 1;
|
---|
| 345 | }
|
---|
| 346 |
|
---|
| 347 | # read and process the MOBI extended header
|
---|
| 348 | $size -= 12;
|
---|
| 349 | $raf->Read($buff, $size) == $size or $et->Warn('Truncated MOBI extended header'), return 1;
|
---|
| 350 | my %dirInfo = (
|
---|
| 351 | DataPt => \$buff,
|
---|
| 352 | DataPos => $offset + $len + 12,
|
---|
| 353 | NumEntries => Get32u(\$buf2, 8),
|
---|
| 354 | Encoding => $enc,
|
---|
| 355 | );
|
---|
| 356 | $tagTablePtr = GetTagTable('Image::ExifTool::Palm::EXTH');
|
---|
| 357 | $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
|
---|
| 358 |
|
---|
| 359 | return 1;
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | 1; # end
|
---|
| 363 |
|
---|
| 364 | __END__
|
---|
| 365 |
|
---|
| 366 | =head1 NAME
|
---|
| 367 |
|
---|
| 368 | Image::ExifTool::Palm - Read Palm Database files
|
---|
| 369 |
|
---|
| 370 | =head1 SYNOPSIS
|
---|
| 371 |
|
---|
| 372 | This module is used by Image::ExifTool
|
---|
| 373 |
|
---|
| 374 | =head1 DESCRIPTION
|
---|
| 375 |
|
---|
| 376 | This module contains code to extract metadata from Palm database files (PDB
|
---|
| 377 | and PRC extensions), Mobipocket electronic books (MOBI), and Amazon Kindle
|
---|
| 378 | KF7 and KF8 books (AZW and AZW3).
|
---|
| 379 |
|
---|
| 380 | =head1 AUTHOR
|
---|
| 381 |
|
---|
| 382 | Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
|
---|
| 383 |
|
---|
| 384 | This library is free software; you can redistribute it and/or modify it
|
---|
| 385 | under the same terms as Perl itself.
|
---|
| 386 |
|
---|
| 387 | =head1 REFERENCES
|
---|
| 388 |
|
---|
| 389 | =over 4
|
---|
| 390 |
|
---|
| 391 | =item L<http://wiki.mobileread.com/wiki/PDB>
|
---|
| 392 |
|
---|
| 393 | =item L<http://wiki.mobileread.com/wiki/MOBI>
|
---|
| 394 |
|
---|
| 395 | =back
|
---|
| 396 |
|
---|
| 397 | =head1 SEE ALSO
|
---|
| 398 |
|
---|
| 399 | L<Image::ExifTool::TagNames/Palm Tags>,
|
---|
| 400 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
| 401 |
|
---|
| 402 | =cut
|
---|
| 403 |
|
---|