source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/XMPStruct.pl@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 36.0 KB
Line 
1#------------------------------------------------------------------------------
2# File: XMPStruct.pl
3#
4# Description: XMP structure support
5#
6# Revisions: 01/01/2011 - P. Harvey Created
7#------------------------------------------------------------------------------
8
9package Image::ExifTool::XMP;
10
11use strict;
12use vars qw(%specialStruct %stdXlatNS);
13
14use Image::ExifTool qw(:Utils);
15use Image::ExifTool::XMP;
16
17sub SerializeStruct($;$);
18sub InflateStruct($;$);
19sub DumpStruct($;$);
20sub CheckStruct($$$);
21sub AddNewStruct($$$$$$);
22sub ConvertStruct($$$$;$);
23
24#------------------------------------------------------------------------------
25# Serialize a structure (or other object) into a simple string
26# Inputs: 0) HASH ref, ARRAY ref, or SCALAR, 1) closing bracket (or undef)
27# Returns: serialized structure string
28# eg) "{field=text with {braces|}|, and a comma, field2=val2,field3={field4=[a,b]}}"
29sub SerializeStruct($;$)
30{
31 my ($obj, $ket) = @_;
32 my ($key, $val, @vals, $rtnVal);
33
34 if (ref $obj eq 'HASH') {
35 foreach $key (sort keys %$obj) {
36 push @vals, $key . '=' . SerializeStruct($$obj{$key}, '}');
37 }
38 $rtnVal = '{' . join(',', @vals) . '}';
39 } elsif (ref $obj eq 'ARRAY') {
40 foreach $val (@$obj) {
41 push @vals, SerializeStruct($val, ']');
42 }
43 $rtnVal = '[' . join(',', @vals) . ']';
44 } elsif (defined $obj) {
45 $obj = $$obj if ref $obj eq 'SCALAR';
46 # escape necessary characters in string (closing bracket plus "," and "|")
47 my $pat = $ket ? "\\$ket|,|\\|" : ',|\\|';
48 ($rtnVal = $obj) =~ s/($pat)/|$1/g;
49 # also must escape opening bracket or whitespace at start of string
50 $rtnVal =~ s/^([\s\[\{])/|$1/;
51 } else {
52 $rtnVal = ''; # allow undefined list items
53 }
54 return $rtnVal;
55}
56
57#------------------------------------------------------------------------------
58# Inflate structure (or other object) from a serialized string
59# Inputs: 0) reference to object in string form (serialized using the '|' escape)
60# 1) extra delimiter for scalar values delimiters
61# Returns: 0) object as a SCALAR, HASH ref, or ARRAY ref (or undef on error),
62# 1) warning string (or undef)
63# Notes: modifies input string to remove parsed objects
64sub InflateStruct($;$)
65{
66 my ($obj, $delim) = @_;
67 my ($val, $warn, $part);
68
69 if ($$obj =~ s/^\s*\{//) {
70 my %struct;
71 while ($$obj =~ s/^\s*([-\w:]+#?)\s*=//s) {
72 my $tag = $1;
73 my ($v, $w) = InflateStruct($obj, '}');
74 $warn = $w if $w and not $warn;
75 return(undef, $warn) unless defined $v;
76 $struct{$tag} = $v;
77 # eat comma separator, or all done if there wasn't one
78 last unless $$obj =~ s/^\s*,//s;
79 }
80 # eat closing brace and warn if we didn't find one
81 unless ($$obj =~ s/^\s*\}//s or $warn) {
82 if (length $$obj) {
83 ($part = $$obj) =~ s/^\s*//s;
84 $part =~ s/[\x0d\x0a].*//s;
85 $part = substr($part,0,27) . '...' if length($part) > 30;
86 $warn = "Invalid structure field at '${part}'";
87 } else {
88 $warn = 'Missing closing brace for structure';
89 }
90 }
91 $val = \%struct;
92 } elsif ($$obj =~ s/^\s*\[//) {
93 my @list;
94 for (;;) {
95 my ($v, $w) = InflateStruct($obj, ']');
96 $warn = $w if $w and not $warn;
97 return(undef, $warn) unless defined $v;
98 push @list, $v;
99 last unless $$obj =~ s/^\s*,//s;
100 }
101 # eat closing bracket and warn if we didn't find one
102 $$obj =~ s/^\s*\]//s or $warn or $warn = 'Missing closing bracket for list';
103 $val = \@list;
104 } else {
105 $$obj =~ s/^\s+//s; # remove leading whitespace
106 # read scalar up to specified delimiter (or "," if not defined)
107 $val = '';
108 $delim = $delim ? "\\$delim|,|\\||\$" : ',|\\||$';
109 for (;;) {
110 $$obj =~ s/^(.*?)($delim)//s or last;
111 $val .= $1;
112 last unless $2;
113 $2 eq '|' or $$obj = $2 . $$obj, last;
114 $$obj =~ s/^(.)//s and $val .= $1; # add escaped character
115 }
116 }
117 return($val, $warn);
118}
119
120#------------------------------------------------------------------------------
121# Get XMP language code from tag name string
122# Inputs: 0) tag name string
123# Returns: 0) separated tag name, 1) language code (in standard case), or '' if
124# language code was 'x-default', or undef if the tag had no language code
125sub GetLangCode($)
126{
127 my $tag = shift;
128 if ($tag =~ /^(\w+)[-_]([a-z]{2,3}|[xi])([-_][a-z\d]{2,8}([-_][a-z\d]{1,8})*)?$/i) {
129 # normalize case of language codes
130 my ($tg, $langCode) = ($1, lc($2));
131 $langCode .= (length($3) == 3 ? uc($3) : lc($3)) if $3;
132 $langCode =~ tr/_/-/; # RFC 3066 specifies '-' as a separator
133 $langCode = '' if lc($langCode) eq 'x-default';
134 return($tg, $langCode);
135 } else {
136 return($tag, undef);
137 }
138}
139
140#------------------------------------------------------------------------------
141# Debugging routine to dump a structure, list or scalar
142# Inputs: 0) scalar, ARRAY ref or HASH ref, 1) indent (or undef)
143sub DumpStruct($;$)
144{
145 local $_;
146 my ($obj, $indent) = @_;
147
148 $indent or $indent = '';
149 if (ref $obj eq 'HASH') {
150 print "{\n";
151 foreach (sort keys %$obj) {
152 print "$indent $_ = ";
153 DumpStruct($$obj{$_}, "$indent ");
154 }
155 print $indent, "},\n";
156 } elsif (ref $obj eq 'ARRAY') {
157 print "[\n";
158 foreach (@$obj) {
159 print "$indent ";
160 DumpStruct($_, "$indent ");
161 }
162 print $indent, "],\n",
163 } else {
164 print "\"$obj\",\n";
165 }
166}
167
168#------------------------------------------------------------------------------
169# Recursively validate structure fields (tags)
170# Inputs: 0) ExifTool ref, 1) Structure ref, 2) structure table definition ref
171# Returns: 0) validated structure ref, 1) error string, or undef on success
172# Notes:
173# - fixes field names in structure and applies inverse conversions to values
174# - copies structure to avoid interdependencies with calling code on referenced values
175# - handles lang-alt tags, and '#' on field names
176# - resets UTF-8 flag of SCALAR values
177# - un-escapes for XML or HTML as per Escape option setting
178sub CheckStruct($$$)
179{
180 my ($et, $struct, $strTable) = @_;
181
182 my $strName = $$strTable{STRUCT_NAME} || ('XMP ' . RegisterNamespace($strTable));
183 ref $struct eq 'HASH' or return wantarray ? (undef, "Expecting $strName structure") : undef;
184
185 my ($key, $err, $warn, %copy, $rtnVal, $val);
186Key:
187 foreach $key (keys %$struct) {
188 my $tag = $key;
189 # allow trailing '#' to disable print conversion on a per-field basis
190 my ($type, $fieldInfo);
191 $type = 'ValueConv' if $tag =~ s/#$//;
192 $fieldInfo = $$strTable{$tag} unless $specialStruct{$tag};
193 # fix case of field name if necessary
194 unless ($fieldInfo) {
195 # (sort in reverse to get lower case (not special) tags first)
196 my ($fix) = reverse sort grep /^$tag$/i, keys %$strTable;
197 $fieldInfo = $$strTable{$tag = $fix} if $fix and not $specialStruct{$fix};
198 }
199 until (ref $fieldInfo eq 'HASH') {
200 # generate wildcard fields on the fly (eg. mwg-rs:Extensions)
201 unless ($$strTable{NAMESPACE}) {
202 my ($grp, $tg, $langCode);
203 ($grp, $tg) = $tag =~ /^(.+):(.+)/ ? (lc $1, $2) : ('', $tag);
204 undef $grp if $grp eq 'XMP'; # (a group of 'XMP' is implied)
205 require Image::ExifTool::TagLookup;
206 my @matches = Image::ExifTool::TagLookup::FindTagInfo($tg);
207 # also look for lang-alt tags
208 unless (@matches) {
209 ($tg, $langCode) = GetLangCode($tg);
210 @matches = Image::ExifTool::TagLookup::FindTagInfo($tg) if defined $langCode;
211 }
212 my ($tagInfo, $priority, $ti, $g1);
213 # find best matching tag
214 foreach $ti (@matches) {
215 my @grps = $et->GetGroup($ti);
216 next unless $grps[0] eq 'XMP';
217 next if $grp and $grp ne lc $grps[1];
218 # must be lang-alt tag if we are writing an alternate language
219 next if defined $langCode and not ($$ti{Writable} and $$ti{Writable} eq 'lang-alt');
220 my $pri = $$ti{Priority} || 1;
221 $pri -= 10 if $$ti{Avoid};
222 next if defined $priority and $priority >= $pri;
223 $priority = $pri;
224 $tagInfo = $ti;
225 $g1 = $grps[1];
226 }
227 $tagInfo or $warn = "'${tag}' is not a writable XMP tag", next Key;
228 GetPropertyPath($tagInfo); # make sure property path is generated for this tag
229 $tag = $$tagInfo{Name};
230 $tag = "$g1:$tag" if $grp;
231 $tag .= "-$langCode" if $langCode;
232 $fieldInfo = $$strTable{$tag};
233 # create new structure field if necessary
234 $fieldInfo or $fieldInfo = $$strTable{$tag} = {
235 %$tagInfo, # (also copies the necessary TagID and PropertyPath)
236 Namespace => $$tagInfo{Namespace} || $$tagInfo{Table}{NAMESPACE},
237 LangCode => $langCode,
238 };
239 # delete stuff we don't need (shouldn't cause harm, but better safe than sorry)
240 # - need to keep StructType and Table in case we need to call AddStructType later
241 delete $$fieldInfo{Description};
242 delete $$fieldInfo{Groups};
243 last; # write this dynamically-generated field
244 }
245 # generate lang-alt fields on the fly (eg. Iptc4xmpExt:AOTitle)
246 my ($tg, $langCode) = GetLangCode($tag);
247 if (defined $langCode) {
248 $fieldInfo = $$strTable{$tg} unless $specialStruct{$tg};
249 unless ($fieldInfo) {
250 my ($fix) = reverse sort grep /^$tg$/i, keys %$strTable;
251 $fieldInfo = $$strTable{$tg = $fix} if $fix and not $specialStruct{$fix};
252 }
253 if (ref $fieldInfo eq 'HASH' and $$fieldInfo{Writable} and
254 $$fieldInfo{Writable} eq 'lang-alt')
255 {
256 my $srcInfo = $fieldInfo;
257 $tag = $tg . '-' . $langCode if $langCode;
258 $fieldInfo = $$strTable{$tag};
259 # create new structure field if necessary
260 $fieldInfo or $fieldInfo = $$strTable{$tag} = {
261 %$srcInfo,
262 TagID => $tg,
263 LangCode => $langCode,
264 };
265 last; # write this lang-alt field
266 }
267 }
268 $warn = "'${tag}' is not a field of $strName";
269 next Key;
270 }
271 if (ref $$struct{$key} eq 'HASH') {
272 $$fieldInfo{Struct} or $warn = "$tag is not a structure in $strName", next Key;
273 # recursively check this structure
274 ($val, $err) = CheckStruct($et, $$struct{$key}, $$fieldInfo{Struct});
275 $err and $warn = $err, next Key;
276 $copy{$tag} = $val;
277 } elsif (ref $$struct{$key} eq 'ARRAY') {
278 $$fieldInfo{List} or $warn = "$tag is not a list in $strName", next Key;
279 # check all items in the list
280 my ($item, @copy);
281 my $i = 0;
282 foreach $item (@{$$struct{$key}}) {
283 if (not ref $item) {
284 $item = '' unless defined $item; # use empty string for missing items
285 if ($$fieldInfo{Struct}) {
286 # (allow empty structures)
287 $item =~ /^\s*$/ or $warn = "$tag items are not valid structures", next Key;
288 $copy[$i] = { }; # create hash for empty structure
289 } else {
290 $et->Sanitize(\$item);
291 ($copy[$i],$err) = $et->ConvInv($item,$fieldInfo,$tag,$strName,$type,'');
292 $copy[$i] = '' unless defined $copy[$i]; # avoid undefined item
293 $err and $warn = $err, next Key;
294 $err = CheckXMP($et, $fieldInfo, \$copy[$i]);
295 $err and $warn = "$err in $strName $tag", next Key;
296 }
297 } elsif (ref $item eq 'HASH') {
298 $$fieldInfo{Struct} or $warn = "$tag is not a structure in $strName", next Key;
299 ($copy[$i], $err) = CheckStruct($et, $item, $$fieldInfo{Struct});
300 $err and $warn = $err, next Key;
301 } else {
302 $warn = "Invalid value for $tag in $strName";
303 next Key;
304 }
305 ++$i;
306 }
307 $copy{$tag} = \@copy;
308 } elsif ($$fieldInfo{Struct}) {
309 $warn = "Improperly formed structure in $strName $tag";
310 } else {
311 $et->Sanitize(\$$struct{$key});
312 ($val,$err) = $et->ConvInv($$struct{$key},$fieldInfo,$tag,$strName,$type,'');
313 $err and $warn = $err, next Key;
314 next Key unless defined $val; # check for undefined
315 $err = CheckXMP($et, $fieldInfo, \$val);
316 $err and $warn = "$err in $strName $tag", next Key;
317 # turn this into a list if necessary
318 $copy{$tag} = $$fieldInfo{List} ? [ $val ] : $val;
319 }
320 }
321 if (%copy or not $warn) {
322 $rtnVal = \%copy;
323 undef $err;
324 $$et{CHECK_WARN} = $warn if $warn;
325 } else {
326 $err = $warn;
327 }
328 return wantarray ? ($rtnVal, $err) : $rtnVal;
329}
330
331#------------------------------------------------------------------------------
332# Delete matching structures from existing linearized XMP
333# Inputs: 0) ExifTool ref, 1) capture hash ref, 2) structure path ref,
334# 3) new value hash ref, 4) reference to change counter
335# Returns: 0) delete flag, 1) list index of deleted structure if adding to list
336# 2) flag set if structure existed
337# Notes: updates path to new base path for structure to be added
338sub DeleteStruct($$$$$)
339{
340 my ($et, $capture, $pathPt, $nvHash, $changed) = @_;
341 my ($deleted, $added, $existed, $p, $pp, $val, $delPath);
342 my (@structPaths, @matchingPaths, @delPaths);
343
344 # find all existing elements belonging to this structure
345 ($pp = $$pathPt) =~ s/ \d+/ \\d\+/g;
346 @structPaths = sort grep(/^$pp(\/|$)/, keys %$capture);
347 $existed = 1 if @structPaths;
348 # delete only structures with matching fields if necessary
349 if ($$nvHash{DelValue}) {
350 if (@{$$nvHash{DelValue}}) {
351 my $strTable = $$nvHash{TagInfo}{Struct};
352 # all fields must match corresponding elements in the same
353 # root structure for it to be deleted
354 foreach $val (@{$$nvHash{DelValue}}) {
355 next unless ref $val eq 'HASH';
356 my (%cap, $p2, %match);
357 next unless AddNewStruct(undef, undef, \%cap, $$pathPt, $val, $strTable);
358 foreach $p (keys %cap) {
359 if ($p =~ / /) {
360 ($p2 = $p) =~ s/ \d+/ \\d\+/g;
361 @matchingPaths = sort grep(/^$p2$/, @structPaths);
362 } else {
363 push @matchingPaths, $p;
364 }
365 foreach $p2 (@matchingPaths) {
366 $p2 =~ /^($pp)/ or next;
367 # language attribute must also match if it exists
368 my $attr = $cap{$p}[1];
369 if ($$attr{'xml:lang'}) {
370 my $a2 = $$capture{$p2}[1];
371 next unless $$a2{'xml:lang'} and $$a2{'xml:lang'} eq $$attr{'xml:lang'};
372 }
373 if ($$capture{$p2} and $$capture{$p2}[0] eq $cap{$p}[0]) {
374 # ($1 contains root path for this structure)
375 $match{$1} = ($match{$1} || 0) + 1;
376 }
377 }
378 }
379 my $num = scalar(keys %cap);
380 foreach $p (keys %match) {
381 # do nothing unless all fields matched the same structure
382 next unless $match{$p} == $num;
383 # delete all elements of this structure
384 foreach $p2 (@structPaths) {
385 push @delPaths, $p2 if $p2 =~ /^$p/;
386 }
387 # remember path of first deleted structure
388 $delPath = $p if not $delPath or $delPath gt $p;
389 }
390 }
391 } # (else don't delete anything)
392 } elsif (@structPaths) {
393 @delPaths = @structPaths; # delete all
394 $structPaths[0] =~ /^($pp)/;
395 $delPath = $1;
396 }
397 if (@delPaths) {
398 my $verbose = $et->Options('Verbose');
399 @delPaths = sort @delPaths if $verbose > 1;
400 foreach $p (@delPaths) {
401 if ($verbose > 1) {
402 my $p2 = $p;
403 $p2 =~ s/^(\w+)/$stdXlatNS{$1} || $1/e;
404 $et->VerboseValue("- XMP-$p2", $$capture{$p}[0]);
405 }
406 delete $$capture{$p};
407 $deleted = 1;
408 ++$$changed;
409 }
410 $delPath or warn("Internal error 1 in DeleteStruct\n"), return(undef,undef,$existed);
411 $$pathPt = $delPath; # return path of first element deleted
412 } elsif ($$nvHash{TagInfo}{List}) {
413 # NOTE: we don't yet properly handle lang-alt elements!!!!
414 if (@structPaths) {
415 $structPaths[-1] =~ /^($pp)/ or warn("Internal error 2 in DeleteStruct\n"), return(undef,undef,$existed);
416 my $path = $1;
417 # delete any improperly formatted xmp
418 if ($$capture{$path}) {
419 my $cap = $$capture{$path};
420 # an error unless this was an empty structure
421 $et->Error("Improperly structured XMP ($path)",1) if ref $cap ne 'ARRAY' or $$cap[0];
422 delete $$capture{$path};
423 }
424 # (match last index to put in same lang-alt list for Bag of lang-alt items)
425 $path =~ m/.* (\d+)/g or warn("Internal error 3 in DeleteStruct\n"), return(undef,undef,$existed);
426 $added = $1;
427 # add after last item in list
428 my $len = length $added;
429 my $pos = pos($path) - $len;
430 my $nxt = substr($added, 1) + 1;
431 substr($path, $pos, $len) = length($nxt) . $nxt;
432 $$pathPt = $path;
433 } else {
434 $added = '10';
435 }
436 }
437 return($deleted, $added, $existed);
438}
439
440#------------------------------------------------------------------------------
441# Add new element to XMP capture hash
442# Inputs: 0) ExifTool ref, 1) TagInfo ref, 2) capture hash ref,
443# 3) resource path, 4) value ref, 5) hash ref for last used index numbers
444sub AddNewTag($$$$$$)
445{
446 my ($et, $tagInfo, $capture, $path, $valPtr, $langIdx) = @_;
447 my $val = EscapeXML($$valPtr);
448 my %attrs;
449 # support writing RDF "resource" values
450 if ($$tagInfo{Resource}) {
451 $attrs{'rdf:resource'} = $val;
452 $val = '';
453 }
454 if ($$tagInfo{Writable} and $$tagInfo{Writable} eq 'lang-alt') {
455 # write the lang-alt tag
456 my $langCode = $$tagInfo{LangCode};
457 # add indexed lang-alt list properties
458 my $i = $$langIdx{$path} || 0;
459 $$langIdx{$path} = $i + 1; # save next list index
460 if ($i) {
461 my $idx = length($i) . $i;
462 $path =~ s/(.*) \d+/$1 $idx/; # set list index
463 }
464 $attrs{'xml:lang'} = $langCode || 'x-default';
465 }
466 $$capture{$path} = [ $val, \%attrs ];
467 # print verbose message
468 if ($et and $et->Options('Verbose') > 1) {
469 my $p = $path;
470 $p =~ s/^(\w+)/$stdXlatNS{$1} || $1/e;
471 $et->VerboseValue("+ XMP-$p", $val);
472 }
473}
474
475#------------------------------------------------------------------------------
476# Add new structure to capture hash for writing
477# Inputs: 0) ExifTool object ref (or undef for no warnings),
478# 1) tagInfo ref (or undef if no ExifTool), 2) capture hash ref,
479# 3) base path, 4) struct ref, 5) struct hash ref
480# Returns: number of tags changed
481# Notes: Escapes values for XML
482sub AddNewStruct($$$$$$)
483{
484 my ($et, $tagInfo, $capture, $basePath, $struct, $strTable) = @_;
485 my $verbose = $et ? $et->Options('Verbose') : 0;
486 my ($tag, %langIdx);
487
488 my $ns = $$strTable{NAMESPACE} || '';
489 my $changed = 0;
490
491 # add dummy field to allow empty structures (name starts with '~' so it will come
492 # after all valid structure fields, which is necessary when serializing the XMP later)
493 %$struct or $$struct{'~dummy~'} = '';
494
495 foreach $tag (sort keys %$struct) {
496 my $fieldInfo = $$strTable{$tag};
497 unless ($fieldInfo) {
498 next unless $tag eq '~dummy~'; # check for dummy field
499 $fieldInfo = { }; # create dummy field info for dummy structure
500 }
501 my $val = $$struct{$tag};
502 my $propPath = $$fieldInfo{PropertyPath};
503 unless ($propPath) {
504 $propPath = ($$fieldInfo{Namespace} || $ns) . ':' . ($$fieldInfo{TagID} || $tag);
505 if ($$fieldInfo{List}) {
506 $propPath .= "/rdf:$$fieldInfo{List}/rdf:li 10";
507 }
508 if ($$fieldInfo{Writable} and $$fieldInfo{Writable} eq 'lang-alt') {
509 $propPath .= "/rdf:Alt/rdf:li 10";
510 }
511 $$fieldInfo{PropertyPath} = $propPath; # save for next time
512 }
513 my $path = $basePath . '/' . ConformPathToNamespace($et, $propPath);
514 my $addedTag;
515 if (ref $val eq 'HASH') {
516 my $subStruct = $$fieldInfo{Struct} or next;
517 $changed += AddNewStruct($et, $tagInfo, $capture, $path, $val, $subStruct);
518 } elsif (ref $val eq 'ARRAY') {
519 next unless $$fieldInfo{List};
520 my $i = 0;
521 my ($item, $p);
522 my $level = scalar(() = ($propPath =~ / \d+/g));
523 # loop through all list items (note: can't yet write multi-dimensional lists)
524 foreach $item (@{$val}) {
525 if ($i) {
526 # update first index in field property (may be list of lang-alt lists)
527 $p = ConformPathToNamespace($et, $propPath);
528 my $idx = length($i) . $i;
529 $p =~ s/ \d+/ $idx/;
530 $p = "$basePath/$p";
531 } else {
532 $p = $path;
533 }
534 if (ref $item eq 'HASH') {
535 my $subStruct = $$fieldInfo{Struct} or next;
536 AddNewStruct($et, $tagInfo, $capture, $p, $item, $subStruct) or next;
537 # don't write empty items in upper-level list
538 } elsif (length $item or (defined $item and $level == 1)) {
539 AddNewTag($et, $fieldInfo, $capture, $p, \$item, \%langIdx);
540 $addedTag = 1;
541 }
542 ++$changed;
543 ++$i;
544 }
545 } else {
546 AddNewTag($et, $fieldInfo, $capture, $path, \$val, \%langIdx);
547 $addedTag = 1;
548 ++$changed;
549 }
550 # this is tricky, but we must add the rdf:type for contained structures
551 # in the case that a whole hierarchy was added at once by writing a
552 # flattened tag inside a variable-namespace structure
553 if ($addedTag and $$fieldInfo{StructType} and $$fieldInfo{Table}) {
554 AddStructType($et, $$fieldInfo{Table}, $capture, $propPath, $basePath);
555 }
556 }
557 # add 'rdf:type' property if necessary
558 if ($$strTable{TYPE} and $changed) {
559 my $path = $basePath . '/' . ConformPathToNamespace($et, "rdf:type");
560 unless ($$capture{$path}) {
561 $$capture{$path} = [ '', { 'rdf:resource' => $$strTable{TYPE} } ];
562 if ($verbose > 1) {
563 my $p = $path;
564 $p =~ s/^(\w+)/$stdXlatNS{$1} || $1/e;
565 $et->VerboseValue("+ XMP-$p", $$strTable{TYPE});
566 }
567 }
568 }
569 return $changed;
570}
571
572#------------------------------------------------------------------------------
573# Convert structure field values for printing
574# Inputs: 0) ExifTool ref, 1) tagInfo ref for structure tag, 2) value,
575# 3) conversion type: PrintConv, ValueConv or Raw (Both not allowed)
576# 4) tagID of parent structure (needed only if there was no flattened tag)
577# Notes: Makes a copy of the hash so any applied escapes won't affect raw values
578sub ConvertStruct($$$$;$)
579{
580 my ($et, $tagInfo, $value, $type, $parentID) = @_;
581 if (ref $value eq 'HASH') {
582 my (%struct, $key);
583 my $table = $$tagInfo{Table};
584 $parentID = $$tagInfo{TagID} unless $parentID;
585 foreach $key (keys %$value) {
586 my $tagID = $parentID . ucfirst($key);
587 my $flatInfo = $$table{$tagID};
588 unless ($flatInfo) {
589 # handle variable-namespace structures
590 if ($key =~ /^XMP-(.*?:)(.*)/) {
591 $tagID = $1 . $parentID . ucfirst($2);
592 $flatInfo = $$table{$tagID};
593 }
594 $flatInfo or $flatInfo = $tagInfo;
595 }
596 my $v = $$value{$key};
597 if (ref $v) {
598 $v = ConvertStruct($et, $flatInfo, $v, $type, $tagID);
599 } else {
600 $v = $et->GetValue($flatInfo, $type, $v);
601 }
602 $struct{$key} = $v if defined $v; # save the converted value
603 }
604 return \%struct;
605 } elsif (ref $value eq 'ARRAY') {
606 if (defined $$et{OPTIONS}{ListItem}) {
607 my $li = $$et{OPTIONS}{ListItem};
608 return undef unless defined $$value[$li];
609 undef $$et{OPTIONS}{ListItem}; # only do top-level list
610 my $val = ConvertStruct($et, $tagInfo, $$value[$li], $type, $parentID);
611 $$et{OPTIONS}{ListItem} = $li;
612 return $val;
613 } else {
614 my (@list, $val);
615 foreach $val (@$value) {
616 my $v = ConvertStruct($et, $tagInfo, $val, $type, $parentID);
617 push @list, $v if defined $v;
618 }
619 return \@list;
620 }
621 } else {
622 return $et->GetValue($tagInfo, $type, $value);
623 }
624}
625
626#------------------------------------------------------------------------------
627# Restore XMP structures in extracted information
628# Inputs: 0) ExifTool object ref, 1) flag to keep original flattened tags
629# Notes: also restores lists (including multi-dimensional)
630sub RestoreStruct($;$)
631{
632 local $_;
633 my ($et, $keepFlat) = @_;
634 my ($key, %structs, %var, %lists, $si, %listKeys, @siList);
635 my $ex = $$et{TAG_EXTRA};
636 my $valueHash = $$et{VALUE};
637 my $fileOrder = $$et{FILE_ORDER};
638 my $tagExtra = $$et{TAG_EXTRA};
639 foreach $key (keys %{$$et{TAG_INFO}}) {
640 $$ex{$key} or next;
641 my $structProps = $$ex{$key}{Struct} or next;
642 delete $$ex{$key}{Struct}; # (don't re-use)
643 my $tagInfo = $$et{TAG_INFO}{$key}; # tagInfo for flattened tag
644 my $table = $$tagInfo{Table};
645 my $prop = shift @$structProps;
646 my $tag = $$prop[0];
647 # get reference to structure tag (or normal list tag if not a structure)
648 my $strInfo = @$structProps ? $$table{$tag} : $tagInfo;
649 if ($strInfo) {
650 ref $strInfo eq 'HASH' or next; # (just to be safe)
651 if (@$structProps and not $$strInfo{Struct}) {
652 # this could happen for invalid XMP containing mixed lists
653 # (or for something like this -- what should we do here?:
654 # <meta:user-defined meta:name="License">test</meta:user-defined>)
655 $et->Warn("$$strInfo{Name} is not a structure!") unless $$et{NO_STRUCT_WARN};
656 next;
657 }
658 } else {
659 # create new entry in tag table for this structure
660 my $g1 = $$table{GROUPS}{0} || 'XMP';
661 my $name = $tag;
662 # tag keys will have a group 1 prefix when coming from import of XML from -X option
663 if ($tag =~ /(.+):(.+)/) {
664 my $ns;
665 ($ns, $name) = ($1, $2);
666 $ns =~ s/^XMP-//; # remove leading "XMP-" if it exists because we add it later
667 $ns = $stdXlatNS{$ns} if $stdXlatNS{$ns};
668 $g1 .= "-$ns";
669 }
670 $strInfo = {
671 Name => ucfirst $name,
672 Groups => { 1 => $g1 },
673 Struct => 'Unknown',
674 };
675 # add Struct entry if this is a structure
676 if (@$structProps) {
677 # this is a structure
678 $$strInfo{Struct} = { STRUCT_NAME => 'XMP Unknown' } if @$structProps;
679 } elsif ($$tagInfo{LangCode}) {
680 # this is lang-alt list
681 $tag = $tag . '-' . $$tagInfo{LangCode};
682 $$strInfo{LangCode} = $$tagInfo{LangCode};
683 }
684 AddTagToTable($table, $tag, $strInfo);
685 }
686 # use strInfo ref for base key to avoid collisions
687 $tag = $strInfo;
688 my $struct = \%structs;
689 my $oldStruct = $structs{$strInfo};
690 # (fyi: 'lang-alt' Writable type will be valid even if tag is not pre-defined)
691 my $writable = $$tagInfo{Writable} || '';
692 # walk through the stored structure property information
693 # to rebuild this structure
694 my ($err, $i);
695 for (;;) {
696 my $index = $$prop[1];
697 if ($index and not @$structProps) {
698 # ignore this list if it is a simple lang-alt tag
699 if ($writable eq 'lang-alt') {
700 pop @$prop; # remove lang-alt index
701 undef $index if @$prop < 2;
702 }
703 # add language code if necessary
704 if ($$tagInfo{LangCode} and not ref $tag) {
705 $tag = $tag . '-' . $$tagInfo{LangCode};
706 }
707 }
708 my $nextStruct = $$struct{$tag};
709 if (defined $index) {
710 # the field is a list
711 $index = substr $index, 1; # remove digit count
712 if ($nextStruct) {
713 ref $nextStruct eq 'ARRAY' or $err = 2, last;
714 $struct = $nextStruct;
715 } else {
716 $struct = $$struct{$tag} = [ ];
717 }
718 $nextStruct = $$struct[$index];
719 # descend into multi-dimensional lists
720 for ($i=2; $$prop[$i]; ++$i) {
721 if ($nextStruct) {
722 ref $nextStruct eq 'ARRAY' or last;
723 $struct = $nextStruct;
724 } else {
725 $lists{$struct} = $struct;
726 $struct = $$struct[$index] = [ ];
727 }
728 $nextStruct = $$struct[$index];
729 $index = substr $$prop[$i], 1;
730 }
731 if (ref $nextStruct eq 'HASH') {
732 $struct = $nextStruct; # continue building sub-structure
733 } elsif (@$structProps) {
734 $lists{$struct} = $struct;
735 $struct = $$struct[$index] = { };
736 } else {
737 $lists{$struct} = $struct;
738 $$struct[$index] = $$valueHash{$key};
739 last;
740 }
741 } else {
742 if ($nextStruct) {
743 ref $nextStruct eq 'HASH' or $err = 3, last;
744 $struct = $nextStruct;
745 } elsif (@$structProps) {
746 $struct = $$struct{$tag} = { };
747 } else {
748 $$struct{$tag} = $$valueHash{$key};
749 last;
750 }
751 }
752 $prop = shift @$structProps or last;
753 $tag = $$prop[0];
754 if ($tag =~ /(.+):(.+)/) {
755 # tag in variable-namespace tables will have a leading
756 # XMP namespace on the tag name. In this case, add
757 # the corresponding group1 name to the tag ID.
758 my ($ns, $name) = ($1, $2);
759 $ns = $stdXlatNS{$ns} if $stdXlatNS{$ns};
760 $tag = "XMP-$ns:" . ucfirst $name;
761 } else {
762 $tag = ucfirst $tag;
763 }
764 }
765 if ($err) {
766 # this may happen if we have a structural error in the XMP
767 # (like an improperly contained list for example)
768 unless ($$et{NO_STRUCT_WARN}) {
769 my $ns = $$tagInfo{Namespace} || $$tagInfo{Table}{NAMESPACE} || '';
770 $et->Warn("Error $err placing $ns:$$tagInfo{TagID} in structure or list", 1);
771 }
772 delete $structs{$strInfo} unless $oldStruct;
773 } elsif ($tagInfo eq $strInfo) {
774 # just a regular list tag (or an empty structure)
775 if ($oldStruct) {
776 # keep tag with lowest numbered key (well, not exactly, since
777 # "Tag (10)" is lt "Tag (2)", but at least "Tag" is lt
778 # everything else, and this is really what we care about)
779 my $k = $listKeys{$oldStruct};
780 if ($k) { # ($k will be undef for an empty structure)
781 if ($k lt $key) {
782 # keep lowest file order
783 $$fileOrder{$k} = $$fileOrder{$key} if $$fileOrder{$k} > $$fileOrder{$key};
784 $et->DeleteTag($key);
785 next;
786 }
787 $$fileOrder{$key} = $$fileOrder{$k} if $$fileOrder{$key} > $$fileOrder{$k};
788 $et->DeleteTag($k); # remove tag with greater copy number
789 }
790 }
791 # replace existing value with new list
792 $$valueHash{$key} = $structs{$strInfo};
793 $listKeys{$structs{$strInfo}} = $key; # save key for this list tag
794 } else {
795 # save strInfo ref and file order
796 if ($var{$strInfo}) {
797 # set file order to just before the first associated flattened tag
798 if ($var{$strInfo}[1] > $$fileOrder{$key}) {
799 $var{$strInfo}[1] = $$fileOrder{$key} - 0.5;
800 }
801 } else {
802 $var{$strInfo} = [ $strInfo, $$fileOrder{$key} - 0.5 ];
803 }
804 # preserve original flattened tags if requested
805 if ($keepFlat) {
806 my $extra = $$tagExtra{$key} or next;
807 # restore list behaviour of this flattened tag
808 if ($$extra{NoList}) {
809 $$valueHash{$key} = $$extra{NoList};
810 delete $$extra{NoList};
811 } elsif ($$extra{NoListDel}) {
812 # delete this tag since its value was included another list
813 $et->DeleteTag($key);
814 }
815 } else {
816 $et->DeleteTag($key); # delete the flattened tag
817 }
818 }
819 }
820 # fill in undefined items in lists. In theory, undefined list items should
821 # be fine, but in practice the calling code may not check for this (and
822 # historically this wasn't necessary, so do this for backward compatibility)
823 foreach $si (keys %lists) {
824 defined $_ or $_ = '' foreach @{$lists{$si}};
825 }
826 # make a list of all new structures we generated
827 $var{$_} and push @siList, $_ foreach keys %structs;
828 # save new structures in the same order they were read from file
829 foreach $si (sort { $var{$a}[1] <=> $var{$b}[1] } @siList) {
830 $key = $et->FoundTag($var{$si}[0], '');
831 $$valueHash{$key} = $structs{$si};
832 $$fileOrder{$key} = $var{$si}[1];
833 }
834}
835
836
8371; #end
838
839__END__
840
841=head1 NAME
842
843Image::ExifTool::XMPStruct.pl - XMP structure support
844
845=head1 SYNOPSIS
846
847This module is loaded automatically by Image::ExifTool when required.
848
849=head1 DESCRIPTION
850
851This file contains routines to provide read/write support of structured XMP
852information.
853
854=head1 AUTHOR
855
856Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
857
858This library is free software; you can redistribute it and/or modify it
859under the same terms as Perl itself.
860
861=head1 SEE ALSO
862
863L<Image::ExifTool::TagNames/XMP Tags>,
864L<Image::ExifTool(3pm)|Image::ExifTool>
865
866=cut
Note: See TracBrowser for help on using the repository browser.