source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/ISO.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

File size: 6.8 KB
Line 
1#------------------------------------------------------------------------------
2# File: ISO.pm
3#
4# Description: Read information from ISO 9660 disk images
5#
6# Revisions: 2016-04-07 - P. Harvey created
7#
8# References: 1) http://wiki.osdev.org/ISO_9660
9#------------------------------------------------------------------------------
10
11package Image::ExifTool::ISO;
12
13use strict;
14use vars qw($VERSION);
15use Image::ExifTool qw(:DataAccess :Utils);
16
17$VERSION = '1.01';
18
19# trim trailing spaces and ignore tag if empty
20my %rawStr = (
21 RawConv => sub {
22 my $val = shift;
23 $val =~ s/ +$//;
24 return length($val) ? $val : undef;
25 },
26);
27
28# tag info for date/time tags
29my %dateInfo = (
30 Format => 'undef[17]',
31 Groups => { 2 => 'Time' },
32 ValueConv => q{
33 return undef if $val !~ /[^0\0 ]/; # ignore if empty
34 if ($val =~ s/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(.)/$1:$2:$3 $4:$5:$6.$7/s) {
35 $val .= TimeZoneString(unpack('c', $8) * 15);
36 }
37 return $val;
38 },
39 PrintConv => '$self->ConvertDateTime($val)',
40);
41
42# lookup for volume descriptor types
43my %volumeDescriptorType = (
44 0 => 'Boot Record',
45 1 => 'Primary Volume',
46 2 => 'Supplementary Volume',
47 3 => 'Volume Partition',
48 255 => 'Terminator',
49);
50
51# ISO tags
52%Image::ExifTool::ISO::Main = (
53 GROUPS => { 2 => 'Other' },
54 NOTES => 'Tags extracted from ISO 9660 disk images.',
55 0 => {
56 Name => 'BootRecord',
57 SubDirectory => { TagTable => 'Image::ExifTool::ISO::BootRecord' },
58 },
59 1 => {
60 Name => 'PrimaryVolume',
61 SubDirectory => { TagTable => 'Image::ExifTool::ISO::PrimaryVolume' },
62 },
63);
64
65%Image::ExifTool::ISO::BootRecord = (
66 GROUPS => { 2 => 'Other' },
67 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
68 # 0 => { Name => 'VolumeType', PrintConv => \%volumeDescriptorType }, # (0 for boot record)
69 # 1 => { Name => 'Identifier', Format => 'undef[5]' }, # (always "CD001")
70 # 6 => 'VolumeDesriptorVersion', # (always 1)
71 # always extract BootSystem, even if empty, as an indication that this is bootable
72 7 => { Name => 'BootSystem', Format => 'string[32]', ValueConv => '$val=~s/ +$//; $val' },
73 39 => { Name => 'BootIdentifier', Format => 'string[32]', %rawStr },
74);
75
76%Image::ExifTool::ISO::PrimaryVolume = (
77 GROUPS => { 2 => 'Other' },
78 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
79 # 0 => { Name => 'VolumeType', PrintConv => \%volumeDescriptorType }, # (1 for primary volume)
80 # 1 => { Name => 'Identifier', Format => 'undef[5]' }, # (always "CD001")
81 # 6 => 'VolumeDesriptorVersion', # (always 1)
82 8 => { Name => 'System', Format => 'string[32]', %rawStr },
83 40 => { Name => 'VolumeName', Format => 'string[32]', %rawStr },
84 80 => { Name => 'VolumeBlockCount', Format => 'int32u' },
85 120 => { Name => 'VolumeSetDiskCount', Format => 'int16u', Unknown => 1 },
86 124 => { Name => 'VolumeSetDiskNumber', Format => 'int16u', Unknown => 1 },
87 128 => { Name => 'VolumeBlockSize', Format => 'int16u' },
88 132 => { Name => 'PathTableSize', Format => 'int32u', Unknown => 1 },
89 140 => { Name => 'PathTableLocation', Format => 'int32u', Unknown => 1 },
90 174 => {
91 Name => 'RootDirectoryCreateDate',
92 Format => 'undef[7]',
93 Groups => { 2 => 'Time' },
94 ValueConv => q{
95 my @a = unpack('C6c', $val);
96 $a[0] += 1900;
97 $a[6] = TimeZoneString($a[6] * 15);
98 return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d%s', @a);
99 },
100 PrintConv => '$self->ConvertDateTime($val)',
101 },
102 190 => { Name => 'VolumeSetName', Format => 'string[128]', %rawStr },
103 318 => { Name => 'Publisher', Format => 'string[128]', %rawStr },
104 446 => { Name => 'DataPreparer', Format => 'string[128]', %rawStr },
105 574 => { Name => 'Software', Format => 'string[128]', %rawStr },
106 702 => { Name => 'CopyrightFileName', Format => 'string[38]', %rawStr },
107 740 => { Name => 'AbstractFileName', Format => 'string[36]', %rawStr },
108 776 => { Name => 'BibligraphicFileName',Format => 'string[37]', %rawStr },
109 813 => { Name => 'VolumeCreateDate', %dateInfo },
110 830 => { Name => 'VolumeModifyDate', %dateInfo },
111 847 => { Name => 'VolumeExpirationDate',%dateInfo },
112 864 => { Name => 'VolumeEffectiveDate', %dateInfo },
113 #881 => 'FileStructureVersion', # (always 1)
114);
115
116# ISO Composite tags
117%Image::ExifTool::ISO::Composite = (
118 GROUPS => { 2 => 'Other' },
119 VolumeSize => {
120 Require => {
121 0 => 'ISO:VolumeBlockCount',
122 1 => 'ISO:VolumeBlockSize',
123 },
124 ValueConv => '$val[0] * $val[1]',
125 PrintConv => \&Image::ExifTool::ConvertFileSize,
126 },
127);
128
129# add our composite tags
130Image::ExifTool::AddCompositeTags('Image::ExifTool::ISO');
131
132#------------------------------------------------------------------------------
133# Extract information from an ISO 9660 disk image
134# Inputs: 0) ExifTool object reference, 1) dirInfo reference
135# Returns: 1 on success, 0 if this wasn't a valid ISO 9660 image
136sub ProcessISO($$)
137{
138 my ($et, $dirInfo) = @_;
139 my $raf = $$dirInfo{RAF};
140 my ($buff, $tagTablePtr);
141
142 # verify this is a valid ISO file
143 return 0 unless $raf->Seek(32768, 0);
144
145 while ($raf->Read($buff, 2048) == 2048) {
146 last unless $buff =~ /^[\0-\x03\xff]CD001/;
147 unless ($tagTablePtr) {
148 $et->SetFileType(); # set the FileType tag
149 SetByteOrder('II'); # read little-endian values only
150 $tagTablePtr = GetTagTable('Image::ExifTool::ISO::Main');
151 }
152 my $type = unpack('C', $buff);
153 $et->VPrint(0, "Volume descriptor type $type ($volumeDescriptorType{$type})\n");
154 last if $type == 255; # stop at terminator
155 next unless $$tagTablePtr{$type};
156 my $subTablePtr = GetTagTable($$tagTablePtr{$type}{SubDirectory}{TagTable});
157 my %dirInfo = (
158 DataPt => \$buff,
159 DataPos => $raf->Tell() - 2048,
160 DirStart => 0,
161 DirLen => length($buff),
162 );
163 $et->ProcessDirectory(\%dirInfo, $subTablePtr);
164 }
165 return $tagTablePtr ? 1 : 0;
166}
167
1681; # end
169
170__END__
171
172=head1 NAME
173
174Image::ExifTool::ISO - Read information from ISO 9660 disk images
175
176=head1 SYNOPSIS
177
178This module is used by Image::ExifTool
179
180=head1 DESCRIPTION
181
182This module contains definitions required by Image::ExifTool to read
183information from ISO 9660 disk images.
184
185=head1 AUTHOR
186
187Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
188
189This library is free software; you can redistribute it and/or modify it
190under the same terms as Perl itself.
191
192=head1 REFERENCES
193
194=over 4
195
196=item L<http://wiki.osdev.org/ISO_9660>
197
198=back
199
200=head1 SEE ALSO
201
202L<Image::ExifTool::TagNames/ISO Tags>,
203L<Image::ExifTool(3pm)|Image::ExifTool>
204
205=cut
206
Note: See TracBrowser for help on using the repository browser.