source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/WTV.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 13.0 KB
Line 
1#------------------------------------------------------------------------------
2# File: WTV.pm
3#
4# Description: Read WTV meta information
5#
6# Revisions: 2018-05-30 - P. Harvey Created
7#
8# References: 1) https://wiki.multimedia.cx/index.php?title=WTV
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::WTV;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16
17$VERSION = '1.00';
18
19sub ProcessMetadata($$$);
20
21my %timeInfo = (
22 # time looks like 100 ns intervals since 0:00 UTC Jan 1, 0001 (ref PH)
23 ValueConv => q{ # (719162 days from 0001:01:01 to 1970:01:01)
24 my $t = $val / 1e7 - 719162*24*3600;
25 return Image::ExifTool::ConvertUnixTime($t) . 'Z';
26 },
27 PrintConv => '$self->ConvertDateTime($val)',
28);
29
30my %bool = ( PrintConv => { 0 => 'No', 1 => 'Yes' }, PrintConvColumns => 2 );
31
32# WTV chunks
33%Image::ExifTool::WTV::Main = (
34 GROUPS => { 0 => 'WTV', 1 => 'WTV', 2 => 'Video' },
35 NOTES => 'Tags found in Windows recorded TV (WTV) videos.',
36 # 'timeline.table.0.header.Events' (not decoded)
37 # 'timeline.table.0.entries.Events' (not decoded)
38 # 'timeline' (not decoded)
39 # 'table.0.header.legacy_attrib' (not decoded)
40 'table.0.entries.legacy_attrib' => {
41 Name => 'Metdata',
42 SubDirectory => { TagTable => 'Image::ExifTool::WTV::Metadata' },
43 },
44 # 'table.0.redirector.legacy_attrib' (not decoded)
45 # 'table.0.header.time' (not decoded)
46 # 'table.0.entries.time' (not decoded)
47);
48
49# Note: Many of these tags are similar to those found in Image::ExifTool::Microsoft::Xtra
50# and Image::ExifTool::ASF::ExtendedDescr
51%Image::ExifTool::WTV::Metadata = (
52 GROUPS => { 0 => 'WTV', 1 => 'WTV', 2 => 'Video' },
53 PROCESS_PROC => \&ProcessMetadata,
54 NOTES => 'ExifTool will extract any tag found, even if not in this table.',
55 VARS => { NO_ID => 1 },
56 'Duration' => {
57 Name => 'Duration',
58 ValueConv => '$val/1e7',
59 PrintConv => 'ConvertDuration($val)',
60 },
61 'Title' => { },
62 'WM/Genre' => 'Genre',
63 'WM/Language' => 'Language',
64 'WM/MediaClassPrimaryID' => 'MediaClassPrimaryID',
65 'WM/MediaClassSecondaryID' => 'MediaClassSecondaryID',
66 'WM/MediaCredits' => 'MediaCredits',
67 'WM/MediaIsDelay' => { Name => 'MediaIsDelay', %bool },
68 'WM/MediaIsFinale' => { Name => 'MediaIsFinale', %bool },
69 'WM/MediaIsLive' => { Name => 'MediaIsLive', %bool },
70 'WM/MediaIsMovie' => { Name => 'MediaIsMovie', %bool },
71 'WM/MediaIsPremiere' => { Name => 'MediaIsPremiere', %bool },
72 'WM/MediaIsRepeat' => { Name => 'MediaIsRepeat', %bool },
73 'WM/MediaIsSAP' => { Name => 'MediaIsSAP', %bool },
74 'WM/MediaIsSport' => { Name => 'MediaIsSport', %bool },
75 'WM/MediaIsStereo' => { Name => 'MediaIsStereo', %bool, Groups => { 2 => 'Audio' } },
76 'WM/MediaIsSubtitled' => { Name => 'MediaIsSubtitled',%bool },
77 'WM/MediaIsTape' => { Name => 'MediaIsTape', %bool },
78 'WM/MediaNetworkAffiliation'=> 'MediaNetworkAffiliation',
79 'WM/MediaOriginalBroadcastDateTime' => {
80 Name => 'MediaOriginalBroadcastDateTime',
81 Groups => { 2 => 'Time' },
82 ValueConv => '$val =~ tr/-T/: /; $val',
83 PrintConv => '$self->ConvertDateTime($val)',
84 },
85 'WM/MediaOriginalChannel' => { Name => 'MediaOriginalChannel' },
86 'WM/MediaOriginalChannelSubNumber' => { Name => 'MediaOriginalChannelSubNumber' },
87 'WM/MediaOriginalRunTime' => {
88 Name => 'MediaOriginalRunTime',
89 ValueConv => '$val / 1e7',
90 PrintConv => 'ConvertDuration($val)',
91 },
92 'WM/MediaStationCallSign' => 'MediaStationCallSign',
93 'WM/MediaStationName' => 'MediaStationName',
94 'WM/MediaThumbAspectRatioX' => 'MediaThumbAspectRatioX',
95 'WM/MediaThumbAspectRatioY' => 'MediaThumbAspectRatioY',
96 'WM/MediaThumbHeight' => 'MediaThumbHeight',
97 'WM/MediaThumbRatingAttributes' => { Name => 'MediaThumbRatingAttributes' },
98 'WM/MediaThumbRatingLevel' => 'MediaThumbRatingLevel',
99 'WM/MediaThumbRatingSystem' => 'MediaThumbRatingSystem',
100 'WM/MediaThumbRet' => 'MediaThumbRet',
101 'WM/MediaThumbStride' => 'MediaThumbStride',
102 'WM/MediaThumbTimeStamp' => { Name => 'MediaThumbTimeStamp', Notes => 'unknown units', Unknown => 1 },
103 'WM/MediaThumbWidth' => 'MediaThumbWidth',
104 'WM/OriginalReleaseTime' => {
105 Name => 'OriginalReleaseTime',
106 Groups => { 2 => 'Time' },
107 ValueConv => '$val=~tr/-T/: /; $val',
108 PrintConv => '$self->ConvertDateTime($val)',
109 },
110 'WM/ParentalRating' => 'ParentalRating',
111 'WM/ParentalRatingReason' => 'ParentalRatingReason',
112 'WM/Provider' => 'Provider',
113 'WM/ProviderCopyright' => 'ProviderCopyright',
114 'WM/ProviderRating' => 'ProviderRating',
115 'WM/SubTitle' => 'Subtitle',
116 'WM/SubTitleDescription' => 'SubtitleDescription',
117 'WM/VideoClosedCaptioning' => { Name => 'VideoClosedCaptioning', %bool },
118 'WM/WMRVATSCContent' => { Name => 'ATSCContent', %bool },
119 'WM/WMRVActualSoftPostPadding' => 'ActualSoftPostPadding',
120 'WM/WMRVActualSoftPrePadding' => 'ActualSoftPrePadding',
121 'WM/WMRVBitrate' => { Name => 'Bitrate', Notes => 'unknown units', Unknown => 1 },
122 'WM/WMRVBrandingImageID' => 'BrandingImageID',
123 'WM/WMRVBrandingName' => 'BrandingName',
124 'WM/WMRVContentProtected' => { Name => 'ContentProtected', %bool },
125 'WM/WMRVContentProtectedPercent' => 'ContentProtectedPercent',
126 'WM/WMRVDTVContent' => { Name => 'DTVContent', %bool },
127 'WM/WMRVEncodeTime' => { Name => 'EncodeTime', Groups => { 2 => 'Time' }, %timeInfo },
128 'WM/WMRVEndTime' => { Name => 'EndTime', Groups => { 2 => 'Time' }, %timeInfo },
129 'WM/WMRVExpirationDate' => { Name => 'ExpirationDate', Groups => { 2 => 'Time' }, %timeInfo, Unknown => 1 },
130 'WM/WMRVExpirationSpan' => { Name => 'ExpirationSpan', Notes => 'unknown units', Unknown => 1 },
131 'WM/WMRVHDContent' => { Name => 'HDContent', %bool },
132 'WM/WMRVHardPostPadding' => 'HardPostPadding',
133 'WM/WMRVHardPrePadding' => 'HardPrePadding',
134 'WM/WMRVInBandRatingAttributes' => 'InBandRatingAttributes',
135 'WM/WMRVInBandRatingLevel' => 'InBandRatingLevel',
136 'WM/WMRVInBandRatingSystem' => 'InBandRatingSystem',
137 'WM/WMRVKeepUntil' => 'KeepUntil',
138 'WM/WMRVOriginalSoftPostPadding'=> 'OriginalSoftPostPadding',
139 'WM/WMRVOriginalSoftPrePadding' => 'OriginalSoftPrePadding',
140 'WM/WMRVProgramID' => 'ProgramID',
141 'WM/WMRVQuality' => 'Quality',
142 'WM/WMRVRequestID' => 'RequestID',
143 'WM/WMRVScheduleItemID' => 'ScheduleItemID',
144 'WM/WMRVSeriesUID' => 'SeriesUID',
145 'WM/WMRVServiceID' => 'ServiceID',
146 'WM/WMRVWatched' => { Name => 'Watched', %bool },
147);
148
149#------------------------------------------------------------------------------
150# Read specified sectors from the file
151# Inputs: 0) raf ref, 1) sector table ref, 2) offset in sector table, 3) sector size
152# Returns: Data or undef on error
153sub ReadSectors($$$$)
154{
155 my ($raf, $secPt, $pos, $secSize) = @_;
156 my ($data, $buff);
157 while ($pos <= length($$secPt) - 4) {
158 my $sec = Get32u($secPt, $pos);
159 return undef if $sec == 0xffff; # (just in case)
160 last unless $sec; # a null marks the end of the sector table
161 defined($data) ? ($data .= $buff) : ($data = $buff);
162 return undef unless $raf->Seek($sec*$secSize,0) and $raf->Read($buff,$secSize) == $secSize;
163 $pos += 4;
164 }
165 return defined($data) ? $data . $buff : $buff;
166}
167
168#------------------------------------------------------------------------------
169# Process WTV metadata
170# Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) tag table ref
171# Returns: 1 on success
172sub ProcessMetadata($$$)
173{
174 my ($et, $dirInfo, $tagTablePtr) = @_;
175 my $dataPt = $$dirInfo{DataPt};
176 my $pos = 0;
177 my $end = length $$dataPt;
178 $et->VerboseDir('WTV Metadata', undef, $end);
179 while ($pos + 0x18 < $end) {
180 last unless substr($$dataPt,$pos,16) eq "\x5a\xfe\xd7\x6d\xc8\x1d\x8f\x4a\x99\x22\xfa\xb1\x1c\x38\x14\x53";
181 my $fmt = Get32u($dataPt, $pos + 0x10);
182 my $len = Get32u($dataPt, $pos + 0x14);
183 my $str = '';
184 $pos += 0x18;
185 for (;;) {
186 $pos + 2 > $end and $et->Warn('Corrupt metadata directory'), last;
187 my $ch = substr($$dataPt, $pos, 2);
188 $pos += 2;
189 last if $ch eq "\0\0";
190 $str .= $ch;
191 }
192 last if $pos + $len > $end;
193 my $tag = $et->Decode($str, 'UCS2', undef, 'UTF8');
194 my $dat = substr($$dataPt, $pos, $len);
195 # add tag if not already there
196 unless ($$tagTablePtr{$tag}) {
197 my $name = $tag;
198 $name =~ s{^(WTV_Metadata_)?WM/(WMRV)?}{};
199 AddTagToTable($tagTablePtr, $tag, $name);
200 $et->VPrint(0, $$et{INDENT}, "[adding WTV:$name]\n");
201 }
202 my $val;
203 if ($fmt==0 or $fmt==3) { # int32u or boolean32
204 $val = Get32s(\$dat, 0);
205 } elsif ($fmt == 1) { # string
206 $val = $et->Decode($dat, 'UCS2');
207 } elsif ($fmt == 6) { # GUID
208 $val = unpack('H*', $dat);
209 } elsif ($fmt == 4) { # int64u (date/time values use this)
210 $val = Get64u(\$dat, 0);
211 } else {
212 $val = $dat;
213 $fmt = "Unknown($fmt)";
214 }
215 $et->HandleTag($tagTablePtr, $tag, $val,
216 Format => "format $fmt",
217 Size => length $dat,
218 );
219 $et->VerboseDump(\$dat);
220 $pos += $len;
221 }
222}
223
224#------------------------------------------------------------------------------
225# Extract information from a WTV video
226# Inputs: 0) ExifTool object reference, 1) dirInfo reference
227# Returns: 1 on success, 0 if this wasn't a valid WTV file
228sub ProcessWTV($$)
229{
230 my ($et, $dirInfo) = @_;
231 my $raf = $$dirInfo{RAF};
232 my $verbose = $et->Options('Verbose');
233 my ($buff, $tagTablePtr, $pos, $len);
234
235 # verify this is a valid WTV file
236 return 0 unless $raf->Read($buff, 0x60) == 0x60;
237 return 0 unless $buff =~ /^\xb7\xd8\x00\x20\x37\x49\xda\x11\xa6\x4e\x00\x07\xe9\x5e\xad\x8d/;
238 $et->SetFileType();
239 SetByteOrder('II');
240 # 0x28 - int32u: sector size? (=0x1000) (PH NC)
241 # 0x38 - int32u: sector number for main WTV directory (PH assume this is a sector table, NC)
242 # 0x58 - int32u: total number of sectors in file
243 my $secSize = Get32u(\$buff, 0x28);
244 # in case I'm wrong about this, constrain sector size to
245 # either 0x1000 (standard) or 0x100 (ExifTool test file) - PH
246 $secSize = 0x1000 unless $secSize == 0x1000 or $secSize == 0x100;
247 $buff = ReadSectors($raf, \$buff, 0x38, $secSize); # read the WTV directory
248 return 0 unless defined $buff;
249 $tagTablePtr = GetTagTable('Image::ExifTool::WTV::Main');
250 # parse the WTV directory
251 $et->VerboseDir('WTV');
252 for ($pos=0; $pos<length($buff)-0x28; $pos+=$len) {
253 unless (substr($buff,$pos,0x10) eq "\x92\xb7\x74\x91\x59\x70\x70\x44\x88\xdf\x06\x3b\x82\xcc\x21\x3d") {
254 $et->Warn("WTV directory wasn't at expected location") unless $pos;
255 last;
256 }
257 $len = Get32u(\$buff, $pos+0x10);
258 last if $pos + $len > length($buff);
259 my $n = Get32u(\$buff, $pos + 0x20);
260 0x28 + $n*2 + 8 > $len and $et->Warn('WTV directory error'), last;
261 my $tag = $et->Decode(substr($buff,$pos+0x28,$n*2), 'UCS2', undef, 'UTF8');
262 my $ptr = $pos + 0x28 + $n * 2;
263 my $flg = Get32u(\$buff, $ptr + 4);
264 if ($verbose) {
265 my $s = Get32s(\$buff, $ptr);
266 $s = sprintf('0x%x', $s) unless $s < 0;
267 $et->VPrint(1,"- Tag '${tag}' (sector=$s, flag=$flg)");
268 }
269 next unless $$tagTablePtr{$tag} and ($flg == 0 or $flg == 1);
270 my $sec = substr($buff, $ptr, 4);
271 my $data = ReadSectors($raf, \$sec, 0, $secSize);
272 last unless defined $data;
273 # read sectors from table if necessary (flag=1 indicates a sector table)
274 $data = ReadSectors($raf, \$data, 0, $secSize) if $flg == 1;
275 defined $data or $et->Warn("Error fetching data for $tag"), next;
276 $et->HandleTag($tagTablePtr, $tag, $data);
277 }
278 return 1;
279}
280
2811; # end
282
283__END__
284
285=head1 NAME
286
287Image::ExifTool::WTV - Read WTV meta information
288
289=head1 SYNOPSIS
290
291This module is used by Image::ExifTool
292
293=head1 DESCRIPTION
294
295This module contains definitions required by Image::ExifTool to read WTV
296(Windows recorded TV show) videos.
297
298=head1 AUTHOR
299
300Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
301
302This library is free software; you can redistribute it and/or modify it
303under the same terms as Perl itself.
304
305=head1 REFERENCES
306
307=over 4
308
309=item L<https://wiki.multimedia.cx/index.php?title=WTV>
310
311=back
312
313=head1 SEE ALSO
314
315L<Image::ExifTool::TagNames/WTV Tags>,
316L<Image::ExifTool(3pm)|Image::ExifTool>
317
318=cut
319
Note: See TracBrowser for help on using the repository browser.