- Timestamp:
- 2021-02-26T19:39:51+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Charset.pm
r24107 r34921 15 15 use Image::ExifTool qw(:DataAccess :Utils); 16 16 17 $VERSION = '1. 07';17 $VERSION = '1.11'; 18 18 19 19 my %charsetTable; # character set tables we've loaded … … 58 58 Latin => 0x101, 59 59 Latin2 => 0x101, 60 DOSLatinUS => 0x101, 61 DOSLatin1 => 0x101, 62 DOSCyrillic => 0x101, 60 63 MacCroatian => 0x101, 61 64 MacCyrillic => 0x101, … … 107 110 108 111 #------------------------------------------------------------------------------ 112 # Does an array contain valid UTF-16 characters? 113 # Inputs: 0) array reference to list of UCS-2 values 114 # Returns: 0=invalid UTF-16, 1=valid UTF-16 with no surrogates, 2=valid UTF-16 with surrogates 115 sub IsUTF16($) 116 { 117 local $_; 118 my $uni = shift; 119 my $surrogate; 120 foreach (@$uni) { 121 my $hiBits = ($_ & 0xfc00); 122 if ($hiBits == 0xfc00) { 123 # check for invalid values in UTF-16 124 return 0 if $_ == 0xffff or $_ == 0xfffe or ($_ >= 0xfdd0 and $_ <= 0xfdef); 125 } elsif ($surrogate) { 126 return 0 if $hiBits != 0xdc00; 127 $surrogate = 0; 128 } else { 129 return 0 if $hiBits == 0xdc00; 130 $surrogate = 1 if $hiBits == 0xd800; 131 } 132 } 133 return 1 if not defined $surrogate; 134 return 2 unless $surrogate; 135 return 0; 136 } 137 138 #------------------------------------------------------------------------------ 109 139 # Decompose string with specified encoding into an array of integer code points 110 140 # Inputs: 0) ExifTool object ref (or undef), 1) string, 2) character set name, … … 115 145 # - byte order mark observed and then removed with UCS2 and UCS4 116 146 # - no warnings are issued if ExifTool object is not provided 147 # - sets ExifTool WrongByteOrder flag if byte order is Unknown and current order is wrong 117 148 sub Decompose($$$;$) 118 149 { 119 150 local $_; 120 my ($e xifTool, $val, $charset) = @_; # ($byteOrder assigned later if required)151 my ($et, $val, $charset) = @_; # ($byteOrder assigned later if required) 121 152 my $type = $csType{$charset}; 122 153 my (@uni, $conv); … … 126 157 unless ($conv) { 127 158 # (shouldn't happen) 128 $e xifTool->Warn("Invalid character set $charset") if $exifTool;159 $et->Warn("Invalid character set $charset") if $et; 129 160 return \@uni; # error! 130 161 } … … 141 172 @uni = unpack($] < 5.010000 ? 'U0U*' : 'C0U*', $val); 142 173 # issue warning if we had errors 143 if ($Image::ExifTool::evalWarning and $e xifTool and not $$exifTool{WarnBadUTF8}) {144 $e xifTool->Warn('Malformed UTF-8 character(s)');145 $$e xifTool{WarnBadUTF8} = 1;174 if ($Image::ExifTool::evalWarning and $et and not $$et{WarnBadUTF8}) { 175 $et->Warn('Malformed UTF-8 character(s)'); 176 $$et{WarnBadUTF8} = 1; 146 177 } 147 178 } … … 196 227 $fmt =~ tr/nvNV/vnVN/; 197 228 @uni = unpack($fmt, $val); 229 $$et{WrongByteOrder} = 1; 198 230 } 199 231 } … … 225 257 } 226 258 # use this byte order if there are fewer errors 227 return \@try if $e2 < $e1; 259 if ($e2 < $e1) { 260 $$et{WrongByteOrder} = 1; 261 return \@try; 262 } 228 263 } 229 264 } else { … … 275 310 { 276 311 local $_; 277 my ($e xifTool, $uni, $charset) = @_; # ($byteOrder assigned later if required)312 my ($et, $uni, $charset) = @_; # ($byteOrder assigned later if required) 278 313 my ($outVal, $conv, $inv); 279 $charset or $charset = $$e xifTool{OPTIONS}{Charset};314 $charset or $charset = $$et{OPTIONS}{Charset}; 280 315 my $csType = $csType{$charset}; 281 316 if ($csType == 0x100) { # UTF8 (also treat ASCII as UTF8) … … 294 329 $conv = LoadCharset($charset); 295 330 unless ($conv) { 296 $e xifTool->Warn("Missing charset $charset") if $exifTool;331 $et->Warn("Missing charset $charset") if $et; 297 332 return ''; 298 333 } … … 301 336 unless ($inv) { 302 337 if (not $csType or $csType & 0x802) { 303 $e xifTool->Warn("Invalid destination charset $charset") if $exifTool;338 $et->Warn("Invalid destination charset $charset") if $et; 304 339 return ''; 305 340 } … … 322 357 next if $_ < 0x100 and not $$conv{$_}; 323 358 $_ = ord('?'); # set invalid characters to '?' 324 if ($e xifTool and not $$exifTool{EncodingError}) {325 $e xifTool->Warn("Some character(s) could not be encoded in $charset");326 $$e xifTool{EncodingError} = 1;359 if ($et and not $$et{EncodingError}) { 360 $et->Warn("Some character(s) could not be encoded in $charset"); 361 $$et{EncodingError} = 1; 327 362 } 328 363 } … … 374 409 375 410 UTF8, UTF16, UCS2, UCS4, Arabic, Baltic, Cyrillic, Greek, Hebrew, JIS, 376 Latin, Latin2, MacArabic, MacChineseCN, MacChineseTW, MacCroatian, 377 MacCyrillic, MacGreek, MacHebrew, MacIceland, MacJapanese, MacKorean, 378 MacLatin2, MacRSymbol, MacRoman, MacRomanian, MacThai, MacTurkish, 379 PDFDoc, RSymbol, ShiftJIS, Symbol, Thai, Turkish, Vietnam 411 Latin, Latin2, DOSLatinUS, DOSLatin1, DOSCyrillic, MacArabic, 412 MacChineseCN, MacChineseTW, MacCroatian, MacCyrillic, MacGreek, MacHebrew, 413 MacIceland, MacJapanese, MacKorean, MacLatin2, MacRSymbol, MacRoman, 414 MacRomanian, MacThai, MacTurkish, PDFDoc, RSymbol, ShiftJIS, Symbol, Thai, 415 Turkish, Vietnam 380 416 381 417 However, only some of these character sets are available to the user via 382 ExifTool options ;the multi-byte character sets are used only internally418 ExifTool options -- the multi-byte character sets are used only internally 383 419 when decoding certain types of information. 384 420 385 421 =head1 AUTHOR 386 422 387 Copyright 2003-20 11, Phil Harvey (phil at owl.phy.queensu.ca)423 Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 388 424 389 425 This library is free software; you can redistribute it and/or modify it
Note:
See TracChangeset
for help on using the changeset viewer.