Ignore:
Timestamp:
2021-02-26T19:39:51+13:00 (3 years ago)
Author:
anupama
Message:

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cpan/Image/ExifTool/HtmlDump.pm

    r24107 r34921  
    1414use Image::ExifTool::HTML qw(EscapeHTML);
    1515
    16 $VERSION = '1.30';
    17 
    18 sub DumpTable($$$;$$$$$);
     16$VERSION = '1.39';
     17
     18sub DumpTable($$$;$$$$$$);
    1919sub Open($$$;@);
    2020sub Write($@);
     
    3333my $htmlHeader2 = <<_END_PART_2_;
    3434</title>
     35<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    3536<style type="text/css">
    3637<!--
     
    6162    position: absolute;
    6263    background: #ffffdd;
     64    zoom: 1;
     65    -moz-opacity: 0.8;
     66    -khtml-opacity: 0.8;
     67    -ms-filter: 'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)';
     68    filter: alpha(opacity=80);
    6369    opacity: 0.8;
    64     -moz-opacity: 0.8;
    65     filter: alpha(opacity=80);
    66     -ms-filter: 'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)';
    6770    z-index: 2;
    6871}
     
    9598var ie6 = navigator.userAgent.toLowerCase().indexOf('msie 6') >= 0;
    9699var mspan = new Array;
    97 var hlist, tt, tb;
     100var clicked = 0;
     101var hlist, tt, tb, firstOutEvt, lastInEvt;
    98102
    99103function GetElementsByClass(classname, tagname) {
     
    110114    }
    111115  }
    112   delete list;
    113116  return found;
     117}
     118
     119// click mouse
     120function doClick(e)
     121{
     122  if (!clicked) {
     123    firstOutEvt = lastInEvt = undefined;
     124    high(e, 2);
     125    if (hlist) clicked = 1;
     126  } else {
     127    clicked = 0;
     128    if (firstOutEvt) high(firstOutEvt, 0);
     129    if (lastInEvt) high(lastInEvt, 1);
     130  }
    114131}
    115132
     
    161178// highlight/unhighlight text
    162179function high(e,on) {
     180  if (on) {
     181    lastInEvt = e;
     182  } else {
     183    if (!firstOutEvt) firstOutEvt = e;
     184  }
     185  if (clicked) return;
    163186  var targ;
    164187  if (e.target) targ = e.target;
     
    170193    if (hlist) {
    171194      for (var i=0; i<hlist.length; ++i) {
    172         hlist[i].style.background = 'transparent';
     195        for (var j=0; j<hlist[i].length; ++j) {
     196          hlist[i][j].style.background = 'transparent';
     197        }
    173198      }
    174199      hlist = null;
     
    189214      }
    190215      // highlight anchor elements with the same name
    191       hlist = document.getElementsByName(targ.name);
     216      hlist = new Array;
     217      hlist.push(document.getElementsByName(targ.name));
     218      // is this an IFD pointer?
     219      var pos = targ.className.indexOf('Offset_');
     220      if (pos > 0) {
     221        // add elements from this IFD to our highlight list
     222        hlist.push(document.getElementsByClassName(targ.className.substr(pos+7)));
     223      }
    192224      // use class name to highlight span elements if necessary
    193225      for (var i=0; i<mspan.length; ++i) {
    194226        if (mspan[i] != targ.name) continue;
    195         var slist = GetElementsByClass(targ.name, 'span');
    196         // add elements from hlist collection to our array
    197         for (var j=0; j<hlist.length; ++j) {
    198             slist[slist.length] = hlist[j];
    199         }
    200         hlist = slist;
     227        // add these span elements to our highlight list
     228        hlist.push(GetElementsByClass(targ.name, 'span'));
    201229        break;
    202230      }
    203       for (var j=0; j<hlist.length; ++j) {
    204         hlist[j].style.background = '#ffcc99';
     231      for (var i=0; i<hlist.length; ++i) {
     232        for (var j=0; j<hlist[i].length; ++j) {
     233          hlist[i][j].style.background = on == 2 ? '#ffbbbb' : '#ffcc99';
     234        }
    205235      }
    206236    }
     
    218248<tr><td valign='top'><pre>];
    219249
    220 my $preMouse = q(<pre onmouseover="high(event,1)" onmouseout="high(event,0)" onmousemove="move(event)">);
     250my $preMouse = q(<pre onmouseover="high(event,1)" onmouseout="high(event,0)" onmousemove="move(event)" onmousedown="doClick(event)">);
    221251
    222252#------------------------------------------------------------------------------
     
    235265# Inputs: 0) HTML dump hash ref, 1) absolute offset in file, 2) data size,
    236266#         3) comment string, 4) tool tip (or SAME to use previous tip),
    237 #         5) bit flags (see below)
     267#         5) bit flags (see below), 6) IFD name
    238268# Bits: 0x01 - print at start of line
    239269#       0x02 - print red address
     
    243273#       0x100 - (reserved)
    244274# Notes: Block will be shown in 'unused' color if comment string begins with '['
    245 sub Add($$$$;$)
     275sub Add($$$$;$$)
    246276{
    247     my ($self, $start, $size, $msg, $tip, $flag) = @_;
     277    my ($self, $start, $size, $msg, $tip, $flag, $ifd) = @_;
    248278    my $block = $$self{Block};
    249279    $$block{$start} or $$block{$start} = [ ];
     
    262292        ++$self->{TipNum};
    263293    }
    264     push @{$$block{$start}}, [ $size, $msg, $htip, $flag, $self->{TipNum} ];
     294    push @{$$block{$start}}, [ $size, $msg, $htip, $flag, $self->{TipNum}, $ifd ];
    265295}
    266296
     
    313343    # only do dump if we didn't have a serious error
    314344    @starts = sort { $a <=> $b } keys %$block unless $$self{Error};
    315     for ($i=0; $i<@starts; ++$i) {
     345    for ($i=0; $i<=@starts; ++$i) {
    316346        my $start = $starts[$i];
    317         my $parmList = $$block{$start};
     347        my $parmList;
     348        if (defined $start) {
     349            $parmList = $$block{$start};
     350        } elsif ($bkgEnd and $pos < $bkgEnd and not defined $wasUnused) {
     351            $start = $bkgEnd;   # finish last bkg block
     352        } else {
     353            last;
     354        }
    318355        my $len = $start - $pos;
    319356        if ($len > 0 and not $wasUnused) {
    320             # we have an unused bytes before this data block
    321             --$i;           # dump the data block next time around
     357            # we have a unused bytes before this data block
     358            --$i;   # dump the data block next time around
    322359            # split unused data into 2 blocks if it spans end of a bkg block
    323360            my ($nextBkgEnd, $bkg);
     
    343380        my $parms;
    344381        foreach $parms (@$parmList) {
    345             my ($len, $msg, $tip, $flag, $tipNum) = @$parms;
     382            my ($len, $msg, $tip, $flag, $tipNum, $ifd) = @$parms;
    346383            next unless $len > 0;
    347384            $flag = 0 unless defined $flag;
     
    359396            }
    360397            if ($flag & 0x14) {
     398                my $class = $flag & 0x04 ? "$name M" : $name;
     399                $class .= " $ifd" if $ifd;
    361400                my %bkg = (
    362                     Class => $flag & 0x04 ? "$name M" : $name,
     401                    Class => $class,
    363402                    Start => $start - $dataPos,
    364403                    End   => $start - $dataPos + $len,
     
    375414                $end = $start + $len;
    376415                # only load as much of the block as we are going to dump
    377                 my $size = ($len > $limit) ? $limit / 2 : $len;
     416                # (read 32 more bytes than necessary just in case there
     417                # is only one skipped line that we decide to print)
     418                my $size = ($len > $limit + 32) ? $limit / 2 + 16 : $len;
    378419                if ($start >= $dataPos and $end <= $dataEnd) {
    379420                    $buff = substr($$dataPt, $start-$dataPos, $size);
     
    418459            }
    419460            $self->DumpTable($start-$dataPos, \$buff, $msg, $name,
    420                              $flag, $len, $pos-$dataPos);
     461                             $flag, $len, $pos-$dataPos, $ifd);
    421462            undef $buff;
    422463            $pos = $end if $pos < $end;
     
    548589# Inputs: 0) HtmlDump object ref, 1) data position, 2) block pointer,
    549590#         3) message, 4) object name, 5) flag, 6) full block length (actual
    550 #         data may be shorter), 7) data end position
    551 sub DumpTable($$$;$$$$$)
     591#         data may be shorter), 7) data end position, 8) IFD name
     592sub DumpTable($$$;$$$$$$)
    552593{
    553     my ($self, $pos, $blockPt, $msg, $name, $flag, $len, $endPos) = @_;
     594    my ($self, $pos, $blockPt, $msg, $name, $flag, $len, $endPos, $ifd) = @_;
    554595    $len = length $$blockPt unless defined $len;
    555596    $endPos = 0 unless $endPos;
     
    578619            ++$id unless $dblRef;
    579620        }
    580         $name = "<a name=$name class=$id>";
     621        my $class = $ifd ? "'$id $ifd'" : $id;
     622        $name = "<a name=$name class=$class>";
    581623        $msg and $msg = "$name$msg</a>";
    582624    } else {
     
    643685            $dblRef = 0;
    644686            ++$id;
    645             $name =~ s/class=\w\b/class=$id/;
     687            my $class = $ifd ? "'$id $ifd'" : $id;
     688            $name =~ s/class=\w\b/class=$class/;
    646689            $f0 = '';
    647690            $self->Open('fgd', $f0, 0);
     
    713756sub FinishTiffDump($$$)
    714757{
    715     my ($self, $exifTool, $size) = @_;
     758    my ($self, $et, $size) = @_;
    716759    my ($tag, $key, $start, $blockInfo, $i);
    717760
     
    729772        MPImageStart      => 'MPImageLength',
    730773        IDCPreviewStart   => 'IDCPreviewLength',
     774        SamsungRawPointersOffset => 'SamsungRawPointersLength',
    731775    );
    732776
    733777    # add TIFF data to html dump
    734778    foreach $tag (keys %offsetPair) {
    735         my $info = $exifTool->GetInfo($tag);
     779        my $info = $et->GetInfo($tag);
    736780        next unless %$info;
    737781        # Panasonic hack: StripOffsets is not valid for Panasonic RW2 files,
    738782        # and StripRowBytes is not valid for some RAW images
    739         if ($tag eq 'StripOffsets' and $exifTool->{TAG_INFO}{$tag}{PanasonicHack}) {
     783        if ($tag eq 'StripOffsets' and $$et{TAG_INFO}{$tag}{PanasonicHack}) {
    740784            # use RawDataOffset instead if available since it is valid in RW2
    741             my $info2 = $exifTool->GetInfo('RawDataOffset');
     785            my $info2 = $et->GetInfo('RawDataOffset');
    742786            $info2 = $info unless %$info2;
    743787            my @keys = keys %$info2;
    744788            my $offset = $$info2{$keys[0]};
    745             my $raf = $$exifTool{RAF};
     789            my $raf = $$et{RAF};
    746790            # ignore StripByteCounts and assume raw data runs to the end of file
    747791            if (@keys == 1 and $offset =~ /^\d+$/ and $raf) {
     
    759803        foreach $key (keys %$info) {
    760804            my $name = Image::ExifTool::GetTagName($key);
    761             my $grp1 = $exifTool->GetGroup($key, 1);
    762             my $info2 = $exifTool->GetInfo($offsetPair{$tag}, { Group1 => $grp1 });
     805            my $grp1 = $et->GetGroup($key, 1);
     806            my $info2 = $et->GetInfo($offsetPair{$tag}, { Group1 => $grp1 });
    763807            my $key2 = $offsetPair{$tag};
    764808            $key2 .= $1 if $key =~ /( .*)/; # use same instance number as $tag
     
    791835                    }
    792836                }
    793                 my $msg = $exifTool->GetGroup($key, 1) . ':' . $tag;
     837                my $msg = $et->GetGroup($key, 1) . ':' . $tag;
    794838                $msg =~ s/(Offsets?|Start)$/ /;
    795839                if ($num > 1) {
     
    815859    }
    816860    my $diff = $size - $last;
    817     if ($diff > 0 and ($last or $exifTool->Options('Unknown'))) {
     861    if ($diff > 0 and ($last or $et->Options('Unknown'))) {
    818862        if ($diff > 1 or $size & 0x01) {
    819863            $self->Add($last, $diff, "[unknown data]", "Size: $diff bytes", 0x08);
     
    874918=head1 AUTHOR
    875919
    876 Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
     920Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
    877921
    878922This library is free software; you can redistribute it and/or modify it
Note: See TracChangeset for help on using the changeset viewer.