- Timestamp:
- 2021-02-26T19:39:51+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/cpan/Image/ExifTool/OOXML.pm
r24107 r34921 15 15 use Image::ExifTool::ZIP; 16 16 17 $VERSION = '1.0 5';17 $VERSION = '1.08'; 18 18 19 19 # test for recognized OOXML document extensions … … 22 22 DOTX => 1, DOTM => 1, 23 23 POTX => 1, POTM => 1, 24 PPAX => 1, PPAM => 1, 24 25 PPSX => 1, PPSM => 1, 25 26 PPTX => 1, PPTM => 1, THMX => 1, … … 64 65 65 66 B<Tips:> 66 67 67 68 1) Structural ZIP tags may be ignored (if desired) with C<--ZIP:all> on the 68 69 command line. 69 70 70 71 2) Tags may be grouped by their document number in the ZIP archive with the 71 72 C<-g3> or C<-G3> option. … … 135 136 MMClips => { }, 136 137 modified => { 137 Name => 'ModifyDate', 138 Name => 'ModifyDate', 138 139 Groups => { 2 => 'Time' }, 139 140 Format => 'date', … … 209 210 sub FoundTag($$$$;$) 210 211 { 211 my ($e xifTool, $tagTablePtr, $props, $val, $attrs) = @_;212 my ($et, $tagTablePtr, $props, $val, $attrs) = @_; 212 213 return 0 unless @$props; 213 my $verbose = $e xifTool->Options('Verbose');214 my $verbose = $et->Options('Verbose'); 214 215 215 216 my $tag = $$props[-1]; 216 $e xifTool->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1;217 $et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1; 217 218 218 219 # un-escape XML character entities 219 220 $val = Image::ExifTool::XMP::UnescapeXML($val); 220 # convert OOXML-escaped characters ( ie. "_x0000d_" is a newline)221 # convert OOXML-escaped characters (eg. "_x0000d_" is a newline) 221 222 $val =~ s/_x([0-9a-f]{4})_/Image::ExifTool::PackUTF8(hex($1))/gie; 222 223 # convert from UTF8 to ExifTool Charset 223 $val = $e xifTool->Decode($val, 'UTF8');224 $val = $et->Decode($val, 'UTF8'); 224 225 # queue this attribute for later if necessary 225 226 if ($queueAttrs{$tag}) { … … 247 248 $tagInfo{PrintConv} = '$self->ConvertDateTime($val)'; 248 249 } 249 $e xifTool->VPrint(0, " | [adding $tag]\n") if $verbose;250 Image::ExifTool::AddTagToTable($tagTablePtr, $tag, \%tagInfo);250 $et->VPrint(0, " | [adding $tag]\n") if $verbose; 251 AddTagToTable($tagTablePtr, $tag, \%tagInfo); 251 252 } 252 253 } elsif ($tag eq 'xmlns') { … … 289 290 } 290 291 } else { 291 $e xifTool->VPrint(0, " [adding $tag]\n") if $verbose;292 Image::ExifTool::AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag });292 $et->VPrint(0, " [adding $tag]\n") if $verbose; 293 AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag }); 293 294 } 294 295 # save the tag 295 $e xifTool->HandleTag($tagTablePtr, $tag, $val);296 $et->HandleTag($tagTablePtr, $tag, $val); 296 297 297 298 # start fresh for next tag … … 312 313 sub ProcessDOCX($$) 313 314 { 314 my ($e xifTool, $dirInfo) = @_;315 my ($et, $dirInfo) = @_; 315 316 my $zip = $$dirInfo{ZIP}; 316 317 my $tagTablePtr = GetTagTable('Image::ExifTool::OOXML::Main'); … … 321 322 if ($fileType) { 322 323 # THMX is a special case because its contents.main MIME types is PPTX 323 if ($fileType eq 'PPTX' and $$e xifTool{FILE_EXT} and $$exifTool{FILE_EXT} eq 'THMX') {324 if ($fileType eq 'PPTX' and $$et{FILE_EXT} and $$et{FILE_EXT} eq 'THMX') { 324 325 $fileType = 'THMX'; 325 326 } 326 327 } else { 327 $e xifTool->VPrint(0, "Unrecognized MIME type: $mime\n");328 $et->VPrint(0, "Unrecognized MIME type: $mime\n"); 328 329 # get MIME type according to file extension 329 $fileType = $$e xifTool{FILE_EXT};330 $fileType = $$et{FILE_EXT}; 330 331 # default to 'DOCX' if this isn't a known OOXML extension 331 332 $fileType = 'DOCX' unless $fileType and $isOOXML{$fileType}; 332 333 } 333 $e xifTool->SetFileType($fileType);334 $et->SetFileType($fileType); 334 335 335 336 # must catch all Archive::Zip warnings … … 343 344 my $file = $member->fileName(); 344 345 next unless defined $file; 345 $e xifTool->VPrint(0, "File: $file\n");346 $et->VPrint(0, "File: $file\n"); 346 347 # set the document number and extract ZIP tags 347 $$e xifTool{DOC_NUM} = ++$docNum;348 Image::ExifTool::ZIP::HandleMember($e xifTool, $member);349 # process only XML and JPEG files in "docProps" directory350 next unless $file =~ m{^docProps/ .*\.(xml|jpe?g)$}i;348 $$et{DOC_NUM} = ++$docNum; 349 Image::ExifTool::ZIP::HandleMember($et, $member); 350 # process only XML and JPEG/WMF thumbnail images in "docProps" directory 351 next unless $file =~ m{^docProps/(.*\.xml|(thumbnail\.(jpe?g|wmf)))$}i; 351 352 # get the file contents (CAREFUL! $buff MUST be local since we hand off a value ref) 352 353 my ($buff, $status) = $zip->contents($member); 353 $status and $exifTool->Warn("Error extracting $file"), next; 354 # extract JPEG as PreviewImage (should only be docProps/thumbnail.jpeg) 355 if ($file =~ /\.jpe?g/i) { 356 $exifTool->FoundTag('PreviewImage', \$buff); 354 $status and $et->Warn("Error extracting $file"), next; 355 # extract docProps/thumbnail.(jpg|mwf) as PreviewImage|PreviewMWF 356 if ($file =~ /\.(jpe?g|wmf)$/i) { 357 my $tag = $file =~ /\.wmf$/i ? 'PreviewWMF' : 'PreviewImage'; 358 $et->FoundTag($tag, \$buff); 357 359 next; 358 360 } … … 366 368 }, 367 369 ); 368 $e xifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);370 $et->ProcessDirectory(\%dirInfo, $tagTablePtr); 369 371 undef $buff; # (free memory now) 370 372 } 371 delete $$e xifTool{DOC_NUM};373 delete $$et{DOC_NUM}; 372 374 return 1; 373 375 } … … 394 396 =head1 AUTHOR 395 397 396 Copyright 2003-20 11, Phil Harvey (phil at owl.phy.queensu.ca)398 Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 397 399 398 400 This library is free software; you can redistribute it and/or modify it
Note:
See TracChangeset
for help on using the changeset viewer.