- Timestamp:
- 2021-02-26T19:39:51+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Import.pm
r24107 r34921 13 13 use vars qw($VERSION @ISA @EXPORT_OK); 14 14 15 $VERSION = '1. 00';15 $VERSION = '1.10'; 16 16 @ISA = qw(Exporter); 17 17 @EXPORT_OK = qw(ReadCSV ReadJSON); … … 19 19 sub ReadJSONObject($;$); 20 20 21 my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r" );21 my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r", 'b' => "\b", 'f' => "\f" ); 22 22 my $charset; 23 23 24 24 #------------------------------------------------------------------------------ 25 25 # Read CSV file 26 # Inputs: 0) CSV file name, 1) database hash ref, 2) flag to delete "-" tags 26 # Inputs: 0) CSV file name, file ref or RAF ref, 1) database hash ref, 27 # 2) missing tag value, 3) delimiter if other than ',' 27 28 # Returns: undef on success, or error string 28 29 # Notes: There are various flavours of CSV, but here we assume that only 29 30 # double quotes are escaped, and they are escaped by doubling them 30 sub ReadCSV($$;$ )31 sub ReadCSV($$;$$) 31 32 { 32 33 local ($_, $/); 33 my ($file, $database, $delDash) = @_; 34 my ($buff, @tags, $found, $err); 35 36 open CSVFILE, $file or return "Error opening CSV file '$file'"; 37 binmode CSVFILE; 38 my $raf = new File::RandomAccess(\*CSVFILE); 34 my ($file, $database, $missingValue, $delim) = @_; 35 my ($buff, @tags, $found, $err, $raf, $openedFile); 36 37 if (UNIVERSAL::isa($file, 'File::RandomAccess')) { 38 $raf = $file; 39 $file = 'CSV file'; 40 } elsif (ref $file eq 'GLOB') { 41 $raf = new File::RandomAccess($file); 42 $file = 'CSV file'; 43 } else { 44 open CSVFILE, $file or return "Error opening CSV file '${file}'"; 45 binmode CSVFILE; 46 $openedFile = 1; 47 $raf = new File::RandomAccess(\*CSVFILE); 48 } 49 $delim = ',' unless defined $delim; 39 50 # set input record separator by first newline found in the file 40 51 # (safe because first line should contain only tag names) … … 45 56 while ($raf->ReadLine($buff)) { 46 57 my (@vals, $v, $i, %fileInfo); 47 my @toks = split ',', $buff;58 my @toks = split /\Q$delim/, $buff; 48 59 while (@toks) { 49 60 ($v = shift @toks) =~ s/^ +//; # remove leading spaces … … 52 63 while ($v !~ /("+)\s*$/ or not length($1) & 1) { 53 64 if (@toks) { 54 $v .= ','. shift @toks;65 $v .= $delim . shift @toks; 55 66 } else { 56 67 # read another line from the file 57 68 $raf->ReadLine($buff) or last; 58 @toks = split ',', $buff;69 @toks = split /\Q$delim/, $buff; 59 70 last unless @toks; 60 71 $v .= shift @toks; … … 71 82 # save values for each tag 72 83 for ($i=0; $i<@vals and $i<@tags; ++$i) { 73 next unless length $vals[$i]; # ignore empty entries 74 # delete tag if value (set value to undef) is '-' and -f option is used 75 $fileInfo{$tags[$i]} = ($vals[$i] eq '-' and $delDash) ? undef : $vals[$i]; 84 # ignore empty entries unless missingValue is empty too 85 next unless length $vals[$i] or defined $missingValue and $missingValue eq ''; 86 # delete tag (set value to undef) if value is same as missing tag 87 $fileInfo{$tags[$i]} = 88 (defined $missingValue and $vals[$i] eq $missingValue) ? undef : $vals[$i]; 76 89 } 77 90 # figure out the file name to use … … 83 96 # the first row should be the tag names 84 97 foreach (@vals) { 85 /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '$_'", last; 98 # terminate at first blank tag name (eg. extra comma at end of line) 99 last unless length $_; 100 @tags or s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists 101 /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '${_}'", last; 86 102 push(@tags, $_); 87 103 } 88 104 last if $err; 89 105 @tags or $err = 'No tags found', last; 90 } 91 } 92 close CSVFILE; 106 # fix "SourceFile" case if necessary 107 $tags[0] = 'SourceFile' if lc $tags[0] eq 'sourcefile'; 108 } 109 } 110 close CSVFILE if $openedFile; 93 111 undef $raf; 94 112 $err = 'No SourceFile column' unless $found or $err; … … 108 126 #------------------------------------------------------------------------------ 109 127 # Read JSON object from file 110 # Inputs: 0) JSON file handle, 1) optional file buffer reference 128 # Inputs: 0) RAF reference or undef, 1) optional scalar reference for data 129 # to read before reading from file (ie. the file read buffer) 111 130 # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or 112 131 # empty object or array (and sets $$buffPt to empty string on EOF) … … 114 133 sub ReadJSONObject($;$) 115 134 { 116 my ($ fp, $buffPt) = @_;135 my ($raf, $buffPt) = @_; 117 136 # initialize buffer if necessary 118 my ($pos, $readMore, $rtnVal, $tok, $key );137 my ($pos, $readMore, $rtnVal, $tok, $key, $didBOM); 119 138 if ($buffPt) { 120 139 $pos = pos $$buffPt; 140 $pos = pos($$buffPt) = 0 unless defined $pos; 121 141 } else { 122 142 my $buff = ''; … … 125 145 } 126 146 Tok: for (;;) { 147 # (didn't spend the time to understand how $pos could be undef, but 148 # put a test here to be safe because one user reported this problem) 149 last unless defined $pos; 127 150 if ($pos >= length $$buffPt or $readMore) { 151 last unless defined $raf; 128 152 # read another 64kB and add to unparsed data 129 153 my $offset = length($$buffPt) - $pos; 130 $$buffPt = substr($$buffPt, $pos) if $offset; 131 read $fp, $$buffPt, 65536, $offset or $$buffPt = '', last; 154 if ($offset) { 155 my $buff; 156 $raf->Read($buff, 65536) or $$buffPt = '', last; 157 $$buffPt = substr($$buffPt, $pos) . $buff; 158 } else { 159 $raf->Read($$buffPt, 65536) or $$buffPt = '', last; 160 } 161 unless ($didBOM) { 162 $$buffPt =~ s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists 163 $didBOM = 1; 164 } 132 165 $pos = pos($$buffPt) = 0; 133 166 $readMore = 0; … … 145 178 # read "KEY":"VALUE" pairs 146 179 unless (defined $key) { 147 $key = ReadJSONObject($ fp, $buffPt);180 $key = ReadJSONObject($raf, $buffPt); 148 181 $pos = pos $$buffPt; 149 182 } … … 153 186 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok; 154 187 $1 eq ':' or return undef; # error if not a colon 155 my $val = ReadJSONObject($ fp, $buffPt);188 my $val = ReadJSONObject($raf, $buffPt); 156 189 $pos = pos $$buffPt; 157 190 return undef unless defined $val; … … 167 200 $rtnVal = [ ] unless defined $rtnVal; 168 201 for (;;) { 169 my $item = ReadJSONObject($ fp, $buffPt);202 my $item = ReadJSONObject($raf, $buffPt); 170 203 $pos = pos $$buffPt; 171 204 # ($item may be undef for empty array) … … 185 218 $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige; 186 219 $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge; 220 # decode base64 (binary data) values 221 if ($rtnVal =~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and length($rtnVal) % 4 == 3) { 222 require Image::ExifTool::XMP; 223 $rtnVal = ${Image::ExifTool::XMP::DecodeBase64(substr($rtnVal,7))}; 224 } 187 225 } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') { 188 226 # return undef for empty object, array, or list item … … 201 239 #------------------------------------------------------------------------------ 202 240 # Read JSON file 203 # Inputs: 0) JSON file name, 1) database hash ref, 2) flag to delete "-" tags204 # 2) character set241 # Inputs: 0) JSON file name, file ref or RAF ref, 1) database hash ref, 242 # 2) flag to delete "-" tags, 3) character set 205 243 # Returns: undef on success, or error string 206 244 sub ReadJSON($$;$$) 207 245 { 208 246 local $_; 209 my ($file, $database, $delDash, $chset) = @_; 247 my ($file, $database, $missingValue, $chset) = @_; 248 my ($raf, $openedFile); 210 249 211 250 # initialize character set for converting "\uHHHH" chars 212 251 $charset = $chset || 'UTF8'; 213 open JSONFILE, $file or return "Error opening JSON file '$file'"; 214 binmode JSONFILE; 215 my $obj = ReadJSONObject(\*JSONFILE); 216 close JSONFILE; 252 if (UNIVERSAL::isa($file, 'File::RandomAccess')) { 253 $raf = $file; 254 $file = 'JSON file'; 255 } elsif (ref $file eq 'GLOB') { 256 $raf = new File::RandomAccess($file); 257 $file = 'JSON file'; 258 } else { 259 open JSONFILE, $file or return "Error opening JSON file '${file}'"; 260 binmode JSONFILE; 261 $openedFile = 1; 262 $raf = new File::RandomAccess(\*JSONFILE); 263 } 264 my $obj = ReadJSONObject($raf); 265 close JSONFILE if $openedFile; 217 266 unless (ref $obj eq 'ARRAY') { 218 ref $obj eq 'HASH' or return "Format error in JSON file '$ file'";267 ref $obj eq 'HASH' or return "Format error in JSON file '${file}'"; 219 268 $obj = [ $obj ]; 220 269 } 221 270 my ($info, $found); 222 271 foreach $info (@$obj) { 223 next unless ref $info eq 'HASH' and $$info{SourceFile}; 224 if ($delDash) { 225 $$info{$_} eq '-' and $$info{$_} = undef foreach keys %$info; 272 next unless ref $info eq 'HASH'; 273 # fix "SourceFile" case, or assume '*' if SourceFile not specified 274 unless (defined $$info{SourceFile}) { 275 my ($key) = grep /^SourceFile$/i, keys %$info; 276 if ($key) { 277 $$info{SourceFile} = $$info{$key}; 278 delete $$info{$key}; 279 } else { 280 $$info{SourceFile} = '*'; 281 } 282 } 283 if (defined $missingValue) { 284 $$info{$_} eq $missingValue and $$info{$_} = undef foreach keys %$info; 226 285 } 227 286 $$database{$$info{SourceFile}} = $info; 228 287 $found = 1; 229 288 } 230 return $found ? undef : "No SourceFile entries in '$file'";289 return $found ? undef : "No valid JSON objects in '${file}'"; 231 290 } 232 291 … … 267 326 =item Inputs: 268 327 269 0) CSV file name .328 0) CSV file name or file reference. 270 329 271 330 1) Hash reference for database object. 272 331 273 2) Optional flag to set '-' values to undef in the database. (Used for 274 deleting tags.) 275 276 3) [ReadJSON only] Optional character set for converting Unicode escape 277 sequences in strings. Defaults to "UTF8". See the ExifTool Charset option 278 for a list of valid settings. 332 2) Optional string used to represent an undefined (missing) tag value. 333 (Used for deleting tags.) 334 335 3) For ReadCSV this gives the delimiter for CSV entries, with a default of 336 ",". For ReadJSON this is the character set for converting Unicode escape 337 sequences in strings, with a default of "UTF8". See the ExifTool Charset 338 option for a list of valid character sets. 279 339 280 340 =item Return Value: … … 289 349 =head1 AUTHOR 290 350 291 Copyright 2003-20 11, Phil Harvey (phil at owl.phy.queensu.ca)351 Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 292 352 293 353 This library is free software; you can redistribute it and/or modify it
Note:
See TracChangeset
for help on using the changeset viewer.