source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/HtmlDump.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 32.8 KB
Line 
1#------------------------------------------------------------------------------
2# File: HtmlDump.pm
3#
4# Description: Dump information in hex to HTML page
5#
6# Revisions: 12/05/2005 - P. Harvey Created
7#------------------------------------------------------------------------------
8
9package Image::ExifTool::HtmlDump;
10
11use strict;
12use vars qw($VERSION);
13use Image::ExifTool; # only for FinishTiffDump()
14use Image::ExifTool::HTML qw(EscapeHTML);
15
16$VERSION = '1.39';
17
18sub DumpTable($$$;$$$$$$);
19sub Open($$$;@);
20sub Write($@);
21
22my ($bkgStart, $bkgEnd, @bkgSpan);
23
24my $htmlHeader1 = <<_END_PART_1_;
25<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
26 "http://www.w3.org/TR/1998/REC-html40-19980424/loose.dtd">
27<html>
28<head>
29<title>
30_END_PART_1_
31
32# Note: Don't change font-weight style because it can affect line height
33my $htmlHeader2 = <<_END_PART_2_;
34</title>
35<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
36<style type="text/css">
37<!--
38/* character style ID's */
39.D { color: #000000 } /* default color */
40.V { color: #ff0000 } /* duplicate block 1 */
41.W { color: #004400 } /* normal block 1 */
42.X { color: #ff4488 } /* duplicate block 2 */
43.Y { color: #448844 } /* normal block 2 */
44.U { color: #cc8844 } /* unused data block */
45.H { color: #0000ff } /* highlighted tag name */
46.F { color: #aa00dd } /* actual offset differs */
47.M { text-decoration: underline } /* maker notes data */
48.tt { /* tooltip text */
49 visibility: hidden;
50 position: absolute;
51 white-space: nowrap;
52 top: 0;
53 left: 0;
54 font-family: Verdana, sans-serif;
55 font-size: .7em;
56 padding: 2px 4px;
57 border: 1px solid gray;
58 z-index: 3;
59}
60.tb { /* tooltip background */
61 visibility: hidden;
62 position: absolute;
63 background: #ffffdd;
64 zoom: 1;
65 -moz-opacity: 0.8;
66 -khtml-opacity: 0.8;
67 -ms-filter: 'progid:DXImageTransform.Microsoft.Alpha(Opacity=80)';
68 filter: alpha(opacity=80);
69 opacity: 0.8;
70 z-index: 2;
71}
72/* table styles */
73table.dump {
74 border-top: 1px solid gray;
75 border-bottom: 1px solid gray;
76}
77table.dump td { padding: .2em .3em }
78td.c2 {
79 border-left: 1px solid gray;
80 border-right: 1px solid gray;
81}
82pre { margin: 0 }
83table { font-size: .9em }
84body { color: black; background: white }
85-->
86</style>
87<script language="JavaScript" type="text/JavaScript">
88<!-- Begin
89// tooltip positioning constants
90var TMAR = 4; // top/left margins
91var BMAR = 16; // bottom/right margins (scrollbars may overhang inner dimensions)
92var XOFF = 10; // x offset from cursor
93var YOFF = 40; // y offset
94var YMIN = 10; // minimum y offset
95var YTOP = 20; // y offset when above cursor
96// common variables
97var safari1 = navigator.userAgent.indexOf("Safari/312.6") >= 0;
98var ie6 = navigator.userAgent.toLowerCase().indexOf('msie 6') >= 0;
99var mspan = new Array;
100var clicked = 0;
101var hlist, tt, tb, firstOutEvt, lastInEvt;
102
103function GetElementsByClass(classname, tagname) {
104 var found = new Array();
105 var list = document.getElementsByTagName(tagname);
106 var len = list.length;
107 for (var i=0, j=0; i<len; ++i) {
108 var classes = list[i].className.split(' ');
109 for (var k=0; k<classes.length; ++k) {
110 if (classes[k] == classname) {
111 found[j++] = list[i];
112 break;
113 }
114 }
115 }
116 return found;
117}
118
119// click mouse
120function doClick(e)
121{
122 if (!clicked) {
123 firstOutEvt = lastInEvt = undefined;
124 high(e, 2);
125 if (hlist) clicked = 1;
126 } else {
127 clicked = 0;
128 if (firstOutEvt) high(firstOutEvt, 0);
129 if (lastInEvt) high(lastInEvt, 1);
130 }
131}
132
133// move tooltip
134function move(e)
135{
136 if (!tt) return;
137 if (ie6 && (tt.style.top == '' || tt.style.top == 0) &&
138 (tt.style.left == '' || tt.style.left == 0))
139 {
140 tt.style.width = tt.offsetWidth + 'px';
141 tt.style.height = tt.offsetHeight + 'px';
142 }
143 var w, h;
144 // browser inconsistencies make getting window size more complex than it should be,
145 // and even then we don't know if it is smaller due to scrollbar width
146 if (typeof(window.innerWidth) == 'number') {
147 w = window.innerWidth;
148 h = window.innerHeight;
149 } else if (document.documentElement && document.documentElement.clientWidth) {
150 w = document.documentElement.clientWidth;
151 h = document.documentElement.clientHeight;
152 } else {
153 w = document.body.clientWidth;
154 h = document.body.clientHeight;
155 }
156 var x = e.clientX + XOFF;
157 var y = e.clientY + YOFF;
158 if (safari1) { // patch for people still using OS X 10.3.9
159 x -= document.body.scrollLeft + document.documentElement.scrollLeft;
160 y -= document.body.scrollTop + document.documentElement.scrollTop;
161 }
162 var mx = w - BMAR - tt.offsetWidth;
163 var my = h - BMAR - tt.offsetHeight;
164 if (y > my + YOFF - YMIN) y = e.clientY - YTOP - tt.offsetHeight;
165 if (x > mx) x = mx;
166 if (y > my) y = my;
167 if (x < TMAR) x = TMAR;
168 if (y < TMAR) y = TMAR;
169 x += document.body.scrollLeft + document.documentElement.scrollLeft;
170 y += document.body.scrollTop + document.documentElement.scrollTop;
171 tb.style.width = tt.offsetWidth + 'px';
172 tb.style.height = tt.offsetHeight + 'px';
173 tt.style.top = tb.style.top = y + 'px';
174 tt.style.left = tb.style.left = x + 'px';
175 tt.style.visibility = tb.style.visibility = 'visible';
176}
177
178// highlight/unhighlight text
179function high(e,on) {
180 if (on) {
181 lastInEvt = e;
182 } else {
183 if (!firstOutEvt) firstOutEvt = e;
184 }
185 if (clicked) return;
186 var targ;
187 if (e.target) targ = e.target;
188 else if (e.srcElement) targ = e.srcElement;
189 if (targ.nodeType == 3) targ = targ.parentNode; // defeat Safari bug
190 if (!targ.name) targ = targ.parentNode; // go up another level if necessary
191 if (targ.name && document.getElementsByName) {
192 // un-highlight current objects
193 if (hlist) {
194 for (var i=0; i<hlist.length; ++i) {
195 for (var j=0; j<hlist[i].length; ++j) {
196 hlist[i][j].style.background = 'transparent';
197 }
198 }
199 hlist = null;
200 }
201 if (tt) {
202 // hide old tooltip
203 tt.style.visibility = tb.style.visibility = 'hidden';
204 tt = null;
205 }
206 if (on) {
207 if (targ.name.substring(0,1) == 't') {
208 // show our tooltip (ID is different than name to avoid confusing IE)
209 tt = document.getElementById('p' + targ.name.substring(1));
210 if (tt) {
211 tb = document.getElementById('tb');
212 move(e);
213 }
214 }
215 // highlight anchor elements with the same name
216 hlist = new Array;
217 hlist.push(document.getElementsByName(targ.name));
218 // is this an IFD pointer?
219 var pos = targ.className.indexOf('Offset_');
220 if (pos > 0) {
221 // add elements from this IFD to our highlight list
222 hlist.push(document.getElementsByClassName(targ.className.substr(pos+7)));
223 }
224 // use class name to highlight span elements if necessary
225 for (var i=0; i<mspan.length; ++i) {
226 if (mspan[i] != targ.name) continue;
227 // add these span elements to our highlight list
228 hlist.push(GetElementsByClass(targ.name, 'span'));
229 break;
230 }
231 for (var i=0; i<hlist.length; ++i) {
232 for (var j=0; j<hlist[i].length; ++j) {
233 hlist[i][j].style.background = on == 2 ? '#ffbbbb' : '#ffcc99';
234 }
235 }
236 }
237 }
238}
239_END_PART_2_
240
241my $htmlHeader3 = q[
242// End --->
243</script></head>
244<body><noscript><b class=V>--&gt;
245Enable JavaScript for active highlighting and information tool tips!
246</b></noscript>
247<table class=dump cellspacing=0 cellpadding=2>
248<tr><td valign='top'><pre>];
249
250my $preMouse = q(<pre onmouseover="high(event,1)" onmouseout="high(event,0)" onmousemove="move(event)" onmousedown="doClick(event)">);
251
252#------------------------------------------------------------------------------
253# New - create new HtmlDump object
254# Inputs: 0) reference to HtmlDump object or HtmlDump class name
255sub new
256{
257 local $_;
258 my $that = shift;
259 my $class = ref($that) || $that || 'Image::ExifTool::HtmlDump';
260 return bless { Block => {}, TipNum => 0 }, $class;
261}
262
263#------------------------------------------------------------------------------
264# Add information to dump
265# Inputs: 0) HTML dump hash ref, 1) absolute offset in file, 2) data size,
266# 3) comment string, 4) tool tip (or SAME to use previous tip),
267# 5) bit flags (see below), 6) IFD name
268# Bits: 0x01 - print at start of line
269# 0x02 - print red address
270# 0x04 - maker notes data ('M'-class span)
271# 0x08 - limit block length
272# 0x10 - allow double references
273# 0x100 - (reserved)
274# Notes: Block will be shown in 'unused' color if comment string begins with '['
275sub Add($$$$;$$)
276{
277 my ($self, $start, $size, $msg, $tip, $flag, $ifd) = @_;
278 my $block = $$self{Block};
279 $$block{$start} or $$block{$start} = [ ];
280 my $htip;
281 if ($tip and $tip eq 'SAME') {
282 $htip = '';
283 } else {
284 # use message as first line of tip, and make bold unless in brackets
285 $htip = ($msg =~ /^[[(]/) ? $msg : "<b>$msg</b>";
286 if (defined $tip) {
287 ($tip = EscapeHTML($tip)) =~ s/\n/<br>/g; # HTML-ize tooltip text
288 $htip .= '<br>' . $tip;
289 }
290 # add size if not already done
291 $htip .= "<br>($size bytes)" unless $htip =~ /<br>Size:/;
292 ++$self->{TipNum};
293 }
294 push @{$$block{$start}}, [ $size, $msg, $htip, $flag, $self->{TipNum}, $ifd ];
295}
296
297#------------------------------------------------------------------------------
298# Print dump information to HTML page
299# Inputs: 0) Dump information hash reference, 1) source file RAF reference,
300# 2) data pointer, 3) data position, 4) output file or scalar reference,
301# 5) limit level (1-3), 6) title
302# Returns: non-zero if useful output was generated,
303# or -1 on error loading data and "ERROR" is set to offending data name
304# Note: The "Error" member may be set externally to print a specific error
305# message instead of doing the dump.
306sub Print($$;$$$$$)
307{
308 local $_;
309 my ($self, $raf, $dataPt, $dataPos, $outfile, $level, $title) = @_;
310 my ($i, $buff, $rtnVal, $limit, $err);
311 my $block = $$self{Block};
312 $dataPos = 0 unless $dataPos;
313 $outfile = \*STDOUT unless ref $outfile;
314 $title = 'HtmlDump' unless $title;
315 $level or $level = 0;
316 my $tell = $raf->Tell();
317 my $pos = 0;
318 my $dataEnd = $dataPos + ($dataPt ? length($$dataPt) : 0);
319 # initialize member variables
320 $$self{Open} = [];
321 $$self{Closed} = [];
322 $$self{TipList} = [];
323 $$self{MSpanList} = [];
324 $$self{Cols} = [ '', '', '', '' ]; # text columns
325 # set dump size limits (limits are 4x smaller if bit 0x08 set in flags)
326 if ($level <= 1) {
327 $limit = 1024;
328 } elsif ($level <= 2) {
329 $limit = 16384;
330 } else {
331 $limit = 256 * 1024 * 1024; # never dump bigger than 256 MB
332 }
333 $$self{Limit} = $limit;
334 # pre-initialize open/closed hashes for all columns
335 for ($i=0; $i<4; ++$i) {
336 $self->{Open}->[$i] = { ID => [ ], Element => { } };
337 $self->{Closed}->[$i] = { ID => [ ], Element => { } };
338 }
339 $bkgStart = $bkgEnd = 0;
340 undef @bkgSpan;
341 my $index = 0; # initialize tooltip index
342 my (@names, $wasUnused, @starts);
343 # only do dump if we didn't have a serious error
344 @starts = sort { $a <=> $b } keys %$block unless $$self{Error};
345 for ($i=0; $i<=@starts; ++$i) {
346 my $start = $starts[$i];
347 my $parmList;
348 if (defined $start) {
349 $parmList = $$block{$start};
350 } elsif ($bkgEnd and $pos < $bkgEnd and not defined $wasUnused) {
351 $start = $bkgEnd; # finish last bkg block
352 } else {
353 last;
354 }
355 my $len = $start - $pos;
356 if ($len > 0 and not $wasUnused) {
357 # we have a unused bytes before this data block
358 --$i; # dump the data block next time around
359 # split unused data into 2 blocks if it spans end of a bkg block
360 my ($nextBkgEnd, $bkg);
361 if (not defined $wasUnused and $bkgEnd) {
362 foreach $bkg (@bkgSpan) {
363 next if $pos >= $$bkg{End} + $dataPos or $pos + $len <= $$bkg{End} + $dataPos;
364 $nextBkgEnd = $$bkg{End} unless $nextBkgEnd and $nextBkgEnd < $$bkg{End};
365 }
366 }
367 if ($nextBkgEnd) {
368 $start = $pos;
369 $len = $nextBkgEnd + $dataPos - $pos;
370 $wasUnused = 0;
371 } else {
372 $start = $pos; # dump the unused bytes now
373 $wasUnused = 1; # avoid re-dumping unused bytes if we get a read error
374 }
375 my $str = ($len > 1) ? "unused $len bytes" : 'pad byte';
376 $parmList = [ [ $len, "[$str]", undef, 0x108 ] ];
377 } else {
378 undef $wasUnused;
379 }
380 my $parms;
381 foreach $parms (@$parmList) {
382 my ($len, $msg, $tip, $flag, $tipNum, $ifd) = @$parms;
383 next unless $len > 0;
384 $flag = 0 unless defined $flag;
385 # generate same name for all blocks indexed by this tooltip
386 my $name;
387 $name = $names[$tipNum] if defined $tipNum;
388 my $idx = $index;
389 if ($name) {
390 # get index from existing ID
391 $idx = substr($name, 1);
392 } else {
393 $name = "t$index";
394 $names[$tipNum] = $name if defined $tipNum;
395 ++$index;
396 }
397 if ($flag & 0x14) {
398 my $class = $flag & 0x04 ? "$name M" : $name;
399 $class .= " $ifd" if $ifd;
400 my %bkg = (
401 Class => $class,
402 Start => $start - $dataPos,
403 End => $start - $dataPos + $len,
404 );
405 push @bkgSpan, \%bkg;
406 $bkgStart = $bkg{Start} unless $bkgStart and $bkgStart < $bkg{Start};
407 $bkgEnd = $bkg{End} unless $bkgEnd and $bkgEnd > $bkg{End};
408 push @{$self->{MSpanList}}, $name;
409 next;
410 }
411 # loop until we read the value properly
412 my ($end, $try);
413 for ($try=0; $try<2; ++$try) {
414 $end = $start + $len;
415 # only load as much of the block as we are going to dump
416 # (read 32 more bytes than necessary just in case there
417 # is only one skipped line that we decide to print)
418 my $size = ($len > $limit + 32) ? $limit / 2 + 16 : $len;
419 if ($start >= $dataPos and $end <= $dataEnd) {
420 $buff = substr($$dataPt, $start-$dataPos, $size);
421 if ($len != $size) {
422 $buff .= substr($$dataPt, $start-$dataPos+$len-$size, $size);
423 }
424 } else {
425 $buff = '';
426 if ($raf->Seek($start, 0) and $raf->Read($buff, $size) == $size) {
427 # read end of block
428 if ($len != $size) {
429 my $buf2 = '';
430 unless ($raf->Seek($start+$len-$size, 0) and
431 $raf->Read($buf2, $size) == $size)
432 {
433 $err = $msg;
434 # reset $len to the actual length of available data
435 $raf->Seek(0, 2);
436 $len = $raf->Tell() - $start;
437 $tip .= "<br>Error: Only $len bytes available!" if $tip;
438 next;
439 }
440 $buff .= $buf2;
441 undef $buf2;
442 }
443 } else {
444 $err = $msg;
445 $len = length $buff;
446 $tip .= "<br>Error: Only $len bytes available!" if $tip;
447 }
448 }
449 last;
450 }
451 $tip and $self->{TipList}->[$idx] = $tip;
452 next unless length $buff;
453 # set flag to continue this line if next block is contiguous
454 if ($i+1 < @starts and $parms eq $$parmList[-1] and
455 ($end == $starts[$i+1] or ($end < $starts[$i+1] and $end >= $pos)))
456 {
457 my $nextFlag = $block->{$starts[$i+1]}->[0]->[3] || 0;
458 $flag |= 0x100 unless $flag & 0x01 or $nextFlag & 0x01;
459 }
460 $self->DumpTable($start-$dataPos, \$buff, $msg, $name,
461 $flag, $len, $pos-$dataPos, $ifd);
462 undef $buff;
463 $pos = $end if $pos < $end;
464 }
465 }
466 $self->Open('',''); # close all open elements
467 $raf->Seek($tell,0);
468
469 # write output HTML file
470 Write($outfile, $htmlHeader1, $title);
471 if ($self->{Cols}->[0]) {
472 Write($outfile, $htmlHeader2);
473 my $mspan = \@{$$self{MSpanList}};
474 for ($i=0; $i<@$mspan; ++$i) {
475 Write($outfile, qq(mspan[$i] = "$$mspan[$i]";\n));
476 }
477 Write($outfile, $htmlHeader3, $self->{Cols}->[0]);
478 Write($outfile, '</pre></td><td valign="top">',
479 $preMouse, $self->{Cols}->[1]);
480 Write($outfile, '</pre></td><td class=c2 valign="top">',
481 $preMouse, $self->{Cols}->[2]);
482 Write($outfile, '</pre></td><td valign="top">',
483 $preMouse, $self->{Cols}->[3]);
484 Write($outfile, "</pre></td></tr></table>\n<div id=tb class=tb> </div>\n");
485 my $tips = \@{$$self{TipList}};
486 for ($i=0; $i<@$tips; ++$i) {
487 my $tip = $$tips[$i];
488 Write($outfile, "<div id=p$i class=tt>$tip</div>\n") if defined $tip;
489 }
490 delete $$self{TipList};
491 $rtnVal = 1;
492 } else {
493 my $err = $$self{Error} || 'No EXIF or TIFF information found in image';
494 Write($outfile, "$title</title></head><body>\n$err\n");
495 $rtnVal = 0;
496 }
497 Write($outfile, "</body></html>\n");
498 for ($i=0; $i<4; ++$i) {
499 $self->{Cols}->[$i] = ''; # free memory
500 }
501 if ($err) {
502 $err =~ tr/()//d;
503 $$self{ERROR} = $err;
504 return -1;
505 }
506 return $rtnVal;
507}
508
509#------------------------------------------------------------------------------
510# Open or close a specified html element
511# Inputs: 0) HtmlDump object ref, 1) element id, 2) element string,
512# 3-N) list of column numbers (empty for all columns)
513# - element id may be '' to close all elements
514# - element string may be '' to close element by ID (or 0 to close without reopening)
515# - element id and string may both be 1 to reopen temporarily closed elements
516sub Open($$$;@)
517{
518 my ($self, $id, $element, @colNums) = @_;
519
520 # loop through specified columns
521 @colNums or @colNums = (0 .. $#{$self->{Open}});
522 my $col;
523 foreach $col (@colNums) {
524 # get information about open elements in this column
525 my $opHash = $self->{Open}->[$col];
526 my $opElem = $$opHash{Element};
527 if ($element) {
528 # next if already open
529 next if $$opElem{$id} and $$opElem{$id} eq $element;
530 } elsif ($id and not $$opElem{$id}) {
531 # next if already closed and nothing to reopen
532 next unless $element eq '' and @{$self->{Closed}->[$col]->{ID}};
533 }
534 my $opID = $$opHash{ID};
535 my $clHash = $self->{Closed}->[$col];
536 my $clID = $$clHash{ID};
537 my $clElem = $$clHash{Element};
538 # get reference to output column list (use temp list if available)
539 my $cols = $$self{TmpCols} || $$self{Cols};
540 # close everything down to this element if necessary
541 if ($$opElem{$id} or not $id) {
542 while (@$opID) {
543 my $tid = pop @$opID;
544 my $e = $$opElem{$tid};
545 $e =~ s/^<(\S+).*/<\/$1>/s;
546 $$cols[$col] .= $e;
547 if ($id eq $tid or not $id) {
548 delete $$opElem{$tid};
549 last if $id;
550 next;
551 }
552 # add this to the temporarily closed list
553 # (because we really didn't want to close it)
554 push @$clID, $tid;
555 $$clElem{$tid} = $$opElem{$tid};
556 delete $$opElem{$tid};
557 }
558 unless ($id) {
559 # forget all temporarily closed elements
560 $clID = $$clHash{ID} = [ ];
561 $clElem = $$clHash{Element} = { };
562 }
563 } elsif ($$clElem{$id}) {
564 # delete from the list of temporarily closed elements
565 delete $$clElem{$id};
566 @$clID = grep !/^$id$/, @$clID;
567 }
568 next if $element eq '0'; # 0 = don't reopen temporarily closed elements
569
570 # re-open temporarily closed elements
571 while (@$clID) {
572 my $tid = pop @$clID;
573 $$cols[$col] .= $$clElem{$tid};
574 push @$opID, $tid;
575 $$opElem{$tid} = $$clElem{$tid};
576 delete $$clElem{$tid};
577 }
578 # open specified element
579 if ($element and $element ne '1') {
580 $$cols[$col] .= $element;
581 push @$opID, $id;
582 $$opElem{$id} = $element;
583 }
584 }
585}
586
587#------------------------------------------------------------------------------
588# Dump a block of data in HTML table form
589# Inputs: 0) HtmlDump object ref, 1) data position, 2) block pointer,
590# 3) message, 4) object name, 5) flag, 6) full block length (actual
591# data may be shorter), 7) data end position, 8) IFD name
592sub DumpTable($$$;$$$$$$)
593{
594 my ($self, $pos, $blockPt, $msg, $name, $flag, $len, $endPos, $ifd) = @_;
595 $len = length $$blockPt unless defined $len;
596 $endPos = 0 unless $endPos;
597 my ($f0, $dblRef, $id);
598 my $skipped = 0;
599 if (($endPos and $pos < $endPos) or $flag & 0x02) {
600 # display double-reference addresses in red
601 $f0 = "<span class=V>";
602 $dblRef = 1 if $endPos and $pos < $endPos;
603 } else {
604 $f0 = '';
605 }
606 my @c = ('','','','');
607 $$self{TmpCols} = \@c;
608 if ($name) {
609 if ($msg and $msg =~ /^\[/) {
610 $id = 'U';
611 } else {
612 if ($$self{A}) {
613 $id = 'X';
614 $$self{A} = 0;
615 } else {
616 $id = 'V';
617 $$self{A} = 1;
618 }
619 ++$id unless $dblRef;
620 }
621 my $class = $ifd ? "'$id $ifd'" : $id;
622 $name = "<a name=$name class=$class>";
623 $msg and $msg = "$name$msg</a>";
624 } else {
625 $name = '';
626 }
627 # use base-relative offsets from now on
628 my $cols = 0;
629 my $p = $pos;
630 if ($$self{Cont}) {
631 $cols = $pos & 0x0f;
632 $c[1] .= ($cols == 8) ? ' ' : ' ';
633 } else {
634 my $addr = $pos < 0 ? sprintf("-%.4x",-$pos) : sprintf("%5.4x",$pos);
635 $self->Open('fgd', $f0, 0);
636 $self->Open('fgd', '', 3);
637 $c[0] .= "$addr";
638 $p -= $pos & 0x0f unless $flag & 0x01;
639 if ($p < $pos) {
640 $self->Open('bkg', '', 1, 2); # don't underline white space
641 $cols = $pos - $p;
642 my $n = 3 * $cols;
643 ++$n if $cols > 7;
644 $c[1] .= ' ' x $n;
645 $c[2] .= ' ' x $cols;
646 $p = $pos;
647 }
648 }
649 # loop through each column of hex numbers
650 for (;;) {
651 my (@spanClass, @spanCont, $spanClose, $bkg);
652 if ($p >= $bkgStart and $p < $bkgEnd) {
653 foreach $bkg (@bkgSpan) {
654 next unless $p >= $$bkg{Start} and $p < $$bkg{End};
655 push @spanClass, $$bkg{Class};
656 if ($p + 1 == $$bkg{End}) {
657 $spanClose = 1;
658 } else {
659 push @spanCont, $$bkg{Class}; # this span continues
660 }
661 }
662 $self->Open('bkg', @spanClass ? "<span class='@spanClass'>" : '', 1, 2);
663 } else {
664 $self->Open('bkg', '', 1, 2);
665 }
666 $self->Open('a', $name, 1, 2);
667 my $ch = substr($$blockPt,$p-$pos-$skipped,1);
668 $c[1] .= sprintf("%.2x", ord($ch));
669 # make the character HTML-friendly
670 $ch =~ tr/\x00-\x1f\x7f-\xff/./;
671 $ch =~ s/&/&amp;/g;
672 $ch =~ s/>/&gt;/g;
673 $ch =~ s/</&lt;/g;
674 $c[2] .= $ch;
675 ++$p;
676 ++$cols;
677 # close necessary elements
678 if ($spanClose) {
679 my $spanCont = @spanCont ? "<span class='@spanCont'>" : '';
680 # close without reopening if closing anchor later
681 my $arg = ($p - $pos >= $len) ? 0 : $spanCont;
682 $self->Open('bkg', $arg, 1, 2);
683 }
684 if ($dblRef and $p >= $endPos) {
685 $dblRef = 0;
686 ++$id;
687 my $class = $ifd ? "'$id $ifd'" : $id;
688 $name =~ s/class=\w\b/class=$class/;
689 $f0 = '';
690 $self->Open('fgd', $f0, 0);
691 }
692 if ($p - $pos >= $len) {
693 $self->Open('a', '', 1, 2); # close our anchor
694 last;
695 }
696 if ($cols < 16) {
697 $c[1] .= ($cols == 8 ? ' ' : ' ');
698 next;
699 } elsif ($flag & 0x01 and $cols < $len) {
700 $c[1] .= ' ';
701 next; # put it all on one line
702 }
703 unless ($$self{Msg}) {
704 $c[3] .= $msg;
705 $msg = '';
706 }
707 $_ .= "\n" foreach @c; # add CR to all lines
708 $$self{Msg} = 0;
709 # limit data length if specified
710 if ($$self{Limit}) {
711 my $div = ($flag & 0x08) ? 4 : 1;
712 my $lim = $$self{Limit} / (2 * $div) - 16;
713 if ($p - $pos > $lim and $len - $p + $pos > $lim) {
714 my $n = ($len - $p + $pos - $lim) & ~0x0f;
715 if ($n > 16) { # (no use just cutting out one line)
716 $self->Open('bkg', '', 1, 2); # no underline
717 my $note = sprintf "[snip %d lines]", $n / 16;
718 $note = (' ' x (24-length($note)/2)) . $note;
719 $c[0] .= " ...\n";
720 $c[1] .= $note . (' ' x (48-length($note))) . "\n";
721 $c[2] .= " [snip] \n";
722 $c[3] .= "\n";
723 $p += $n;
724 $skipped += $len - length $$blockPt;
725 }
726 }
727 }
728 $c[0] .= ($p < 0 ? sprintf("-%.4x",-$p) : sprintf("%5.4x",$p));
729 $cols = 0;
730 }
731 if ($msg) {
732 $msg = " $msg" if $$self{Msg};
733 $c[3] .= $msg;
734 }
735 if ($flag & 0x100 and $cols < 16) { # continue on same line?
736 $$self{Cont} = 1;
737 $$self{Msg} = 1 if $msg;
738 } else {
739 $_ .= "\n" foreach @c;
740 $$self{Msg} = 0;
741 $$self{Cont} = 0;
742 }
743 # add temporary column data to our real columns
744 my $i;
745 for ($i=0; $i<4; ++$i) {
746 $self->{Cols}->[$i] .= $c[$i];
747 }
748 delete $$self{TmpCols};
749}
750
751#------------------------------------------------------------------------------
752# Finish dumping of TIFF image data
753# Inputs: 0) HtmlDump object ref, 1) ExifTool object ref, 2) length of file
754# (this really belongs in Image::ExifTool::Exif, but is placed here so it
755# is only compiled when needed)
756sub FinishTiffDump($$$)
757{
758 my ($self, $et, $size) = @_;
759 my ($tag, $key, $start, $blockInfo, $i);
760
761 # list of all indirectly referenced TIFF data tags
762 my %offsetPair = (
763 StripOffsets => 'StripByteCounts',
764 TileOffsets => 'TileByteCounts',
765 FreeOffsets => 'FreeByteCounts',
766 ThumbnailOffset => 'ThumbnailLength',
767 PreviewImageStart => 'PreviewImageLength',
768 JpgFromRawStart => 'JpgFromRawLength',
769 OtherImageStart => 'OtherImageLength',
770 ImageOffset => 'ImageByteCount',
771 AlphaOffset => 'AlphaByteCount',
772 MPImageStart => 'MPImageLength',
773 IDCPreviewStart => 'IDCPreviewLength',
774 SamsungRawPointersOffset => 'SamsungRawPointersLength',
775 );
776
777 # add TIFF data to html dump
778 foreach $tag (keys %offsetPair) {
779 my $info = $et->GetInfo($tag);
780 next unless %$info;
781 # Panasonic hack: StripOffsets is not valid for Panasonic RW2 files,
782 # and StripRowBytes is not valid for some RAW images
783 if ($tag eq 'StripOffsets' and $$et{TAG_INFO}{$tag}{PanasonicHack}) {
784 # use RawDataOffset instead if available since it is valid in RW2
785 my $info2 = $et->GetInfo('RawDataOffset');
786 $info2 = $info unless %$info2;
787 my @keys = keys %$info2;
788 my $offset = $$info2{$keys[0]};
789 my $raf = $$et{RAF};
790 # ignore StripByteCounts and assume raw data runs to the end of file
791 if (@keys == 1 and $offset =~ /^\d+$/ and $raf) {
792 my $pos = $raf->Tell();
793 $raf->Seek(0, 2); # seek to end
794 my $len = $raf->Tell() - $offset;
795 $raf->Seek($pos, 0);
796 if ($len > 0) {
797 $self->Add($offset, $len, "(Panasonic raw data)", "Size: $len bytes", 0x08);
798 next;
799 }
800 }
801 }
802 # loop through all offsets tags
803 foreach $key (keys %$info) {
804 my $name = Image::ExifTool::GetTagName($key);
805 my $grp1 = $et->GetGroup($key, 1);
806 my $info2 = $et->GetInfo($offsetPair{$tag}, { Group1 => $grp1 });
807 my $key2 = $offsetPair{$tag};
808 $key2 .= $1 if $key =~ /( .*)/; # use same instance number as $tag
809 next unless $$info2{$key2};
810 my $offsets = $$info{$key};
811 my $byteCounts = $$info2{$key2};
812 # ignore primary MPImage (this is the whole JPEG)
813 next if $tag eq 'MPImageStart' and $offsets eq '0';
814 # (long lists may be SCALAR references)
815 my @offsets = split ' ', (ref $offsets ? $$offsets : $offsets);
816 my @byteCounts = split ' ', (ref $byteCounts ? $$byteCounts : $byteCounts);
817 my $num = scalar @offsets;
818 my $li = 0;
819 my $padBytes = 0;
820 for ($i=0; @offsets and @byteCounts; ++$i) {
821 my $offset = shift @offsets;
822 my $byteCount = shift @byteCounts;
823 my $end = $offset + $byteCount;
824 if (@offsets and @byteCounts) {
825 # show data as contiguous if only normal pad bytes between blocks
826 if ($end & 0x01 and $end + 1 == $offsets[0]) {
827 $end += 1;
828 ++$padBytes; # count them
829 }
830 if ($end == $offsets[0]) {
831 # combine these two blocks
832 $byteCounts[0] += $offsets[0] - $offset;
833 $offsets[0] = $offset;
834 next;
835 }
836 }
837 my $msg = $et->GetGroup($key, 1) . ':' . $tag;
838 $msg =~ s/(Offsets?|Start)$/ /;
839 if ($num > 1) {
840 $msg .= "$li-" if $li != $i;
841 $msg .= "$i ";
842 $li = $i + 1;
843 }
844 $msg .= "data";
845 my $tip = "Size: $byteCount bytes";
846 $tip .= ", incl. $padBytes pad bytes" if $padBytes;
847 $self->Add($offset, $byteCount, "($msg)", $tip, 0x08);
848 }
849 }
850 }
851 # find offset of last dumped information, and dump any unknown trailer
852 my $last = 0;
853 my $block = $$self{Block};
854 foreach $start (keys %$block) {
855 foreach $blockInfo (@{$$block{$start}}) {
856 my $end = $start + $$blockInfo[0];
857 $last = $end if $last < $end;
858 }
859 }
860 my $diff = $size - $last;
861 if ($diff > 0 and ($last or $et->Options('Unknown'))) {
862 if ($diff > 1 or $size & 0x01) {
863 $self->Add($last, $diff, "[unknown data]", "Size: $diff bytes", 0x08);
864 } else {
865 $self->Add($last, $diff, "[trailing pad byte]", undef, 0x08);
866 }
867 }
868}
869
870#------------------------------------------------------------------------------
871# utility routine to write to file or memory
872# Inputs: 0) file or scalar reference, 1-N) list of stuff to write
873# Returns: true on success
874sub Write($@)
875{
876 my $outfile = shift;
877 if (UNIVERSAL::isa($outfile,'GLOB')) {
878 return print $outfile @_;
879 } elsif (ref $outfile eq 'SCALAR') {
880 $$outfile .= join('', @_);
881 return 1;
882 }
883 return 0;
884}
885
8861; # end
887
888__END__
889
890=head1 NAME
891
892Image::ExifTool::HtmlDump - Dump information in hex to HTML page
893
894=head1 SYNOPSIS
895
896 use Image::ExifTool::HtmlDump;
897 my $dump = new Image::ExifTool::HtmlDump;
898 $dump->Add($start, $size, $comment);
899 $dump->Print($dumpInfo, $raf, $dataPt, $dataPos, $outfile);
900
901=head1 DESCRIPTION
902
903This module contains code used to generate an HTML-based hex dump of
904information for debugging purposes. This is code is called when the
905ExifTool 'HtmlDump' option is used.
906
907Currently, only EXIF/TIFF and JPEG information is dumped.
908
909=head1 BUGS
910
911Due to a memory allocation bug in ActivePerl 5.8.x for Windows, this code
912may run extremely slowly when processing large files with this version of
913Perl.
914
915An HTML 4 compliant browser is needed to properly display the generated HTML
916page.
917
918=head1 AUTHOR
919
920Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
921
922This library is free software; you can redistribute it and/or modify it
923under the same terms as Perl itself.
924
925=head1 SEE ALSO
926
927L<Image::ExifTool(3pm)|Image::ExifTool>
928
929=cut
930
Note: See TracBrowser for help on using the repository browser.