Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/CaptureOne.pm

    r24107 r34921  
    1818use Image::ExifTool::ZIP;
    1919
    20 $VERSION = '1.02';
     20$VERSION = '1.04';
    2121
    2222# CaptureOne COS XML tags
     
    3434# Inputs: 0) attribute list ref, 1) attr hash ref,
    3535#         2) property name ref, 3) property value ref
     36# Returns: true if value was changed
    3637sub HandleCOSAttrs($$$$)
    3738{
    3839    my ($attrList, $attrs, $prop, $valPt) = @_;
     40    my $changed;
    3941    if (not length $$valPt and defined $$attrs{K} and defined $$attrs{V}) {
    4042        $$prop = $$attrs{K};
     
    5153            }
    5254        }
    53     }
     55        $changed = 1;
     56    }
     57    return $changed;
    5458}
    5559
     
    6266sub FoundCOS($$$$;$)
    6367{
    64     my ($exifTool, $tagTablePtr, $props, $val, $attrs) = @_;
     68    my ($et, $tagTablePtr, $props, $val, $attrs) = @_;
    6569
    6670    my $tag = $$props[-1];
    6771    unless ($$tagTablePtr{$tag}) {
    68         $exifTool->VPrint(0, "  | [adding $tag]\n");
     72        $et->VPrint(0, "  | [adding $tag]\n");
    6973        my $name = ucfirst $tag;
    7074        $name =~ tr/-_a-zA-Z0-9//dc;
     
    7882            $tagInfo{PrintConv} = '$self->ConvertDateTime($val)';
    7983        }
    80         Image::ExifTool::AddTagToTable($tagTablePtr, $tag, \%tagInfo);
     84        AddTagToTable($tagTablePtr, $tag, \%tagInfo);
    8185    }
    8286    # convert from UTF8 to ExifTool Charset
    83     $val = $exifTool->Decode($val, "UTF8");
     87    $val = $et->Decode($val, "UTF8");
    8488    # un-escape XML character entities
    8589    $val = Image::ExifTool::XMP::UnescapeXML($val);
    86     $exifTool->HandleTag($tagTablePtr, $tag, $val);
     90    $et->HandleTag($tagTablePtr, $tag, $val);
    8791    return 0;
    8892}
     
    9498sub ProcessCOS($$)
    9599{
    96     my ($exifTool, $dirInfo) = @_;
     100    my ($et, $dirInfo) = @_;
    97101
    98102    # process using XMP module, but override handling of attributes and tags
     
    102106    };
    103107    my $tagTablePtr = GetTagTable('Image::ExifTool::CaptureOne::Main');
    104     my $success = $exifTool->ProcessDirectory($dirInfo, $tagTablePtr);
     108    my $success = $et->ProcessDirectory($dirInfo, $tagTablePtr);
    105109    delete $$dirInfo{XMLParseArgs};
    106110    return $success;
     
    116120sub ProcessEIP($$)
    117121{
    118     my ($exifTool, $dirInfo) = @_;
     122    my ($et, $dirInfo) = @_;
    119123    my $zip = $$dirInfo{ZIP};
    120124    my ($file, $buff, $status, $member, %parseFile);
    121125
    122     $exifTool->SetFileType('EIP');
     126    $et->SetFileType('EIP');
    123127
    124128    # must catch all Archive::Zip warnings
     
    156160        $file = $member->fileName();
    157161        next unless defined $file;
    158         $exifTool->VPrint(0, "File: $file\n");
     162        $et->VPrint(0, "File: $file\n");
    159163        # set the document number and extract ZIP tags
    160         $$exifTool{DOC_NUM} = ++$docNum;
    161         Image::ExifTool::ZIP::HandleMember($exifTool, $member);
     164        $$et{DOC_NUM} = ++$docNum;
     165        Image::ExifTool::ZIP::HandleMember($et, $member);
    162166        if (%parseFile) {
    163167            next unless $parseFile{$file};
     
    170174        # Note: this could use a LOT of memory here for RAW images...
    171175        ($buff, $status) = $zip->contents($member);
    172         $status and $exifTool->Warn("Error extracting $file"), next;
     176        $status and $et->Warn("Error extracting $file"), next;
    173177        if ($file =~ /\.cos$/i) {
    174178            # process Capture One Settings files
     
    178182                DataLen => length $buff,
    179183            );
    180             ProcessCOS($exifTool, \%dirInfo);
     184            ProcessCOS($et, \%dirInfo);
    181185        } else {
    182186            # set HtmlDump error if necessary because it doesn't work with embedded files
    183             if ($$exifTool{HTML_DUMP}) {
    184                 $$exifTool{HTML_DUMP}{Error} = "Sorry, can't dump images embedded in ZIP files";
     187            if ($$et{HTML_DUMP}) {
     188                $$et{HTML_DUMP}{Error} = "Sorry, can't dump images embedded in ZIP files";
    185189            }
    186190            # process IIQ, JPEG and TIFF images
    187             $exifTool->ExtractInfo(\$buff, { ReEntry => 1 });
     191            $et->ExtractInfo(\$buff, { ReEntry => 1 });
    188192        }
    189193        undef $buff;    # (free memory now)
    190194    }
    191     delete $$exifTool{DOC_NUM};
     195    delete $$et{DOC_NUM};
    192196    return 1;
    193197}
     
    218222=head1 AUTHOR
    219223
    220 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     224Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    221225
    222226This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.