[24626] | 1 | #------------------------------------------------------------------------------
|
---|
| 2 | # File: Import.pm
|
---|
| 3 | #
|
---|
| 4 | # Description: Import CSV and JSON database files
|
---|
| 5 | #
|
---|
| 6 | # Revisions: 2011-03-05 - P. Harvey Created
|
---|
| 7 | #------------------------------------------------------------------------------
|
---|
| 8 | package Image::ExifTool::Import;
|
---|
| 9 |
|
---|
| 10 | use strict;
|
---|
| 11 | require Exporter;
|
---|
| 12 |
|
---|
| 13 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
| 14 |
|
---|
| 15 | $VERSION = '1.00';
|
---|
| 16 | @ISA = qw(Exporter);
|
---|
| 17 | @EXPORT_OK = qw(ReadCSV ReadJSON);
|
---|
| 18 |
|
---|
| 19 | sub ReadJSONObject($;$);
|
---|
| 20 |
|
---|
| 21 | my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r" );
|
---|
| 22 | my $charset;
|
---|
| 23 |
|
---|
| 24 | #------------------------------------------------------------------------------
|
---|
| 25 | # Read CSV file
|
---|
| 26 | # Inputs: 0) CSV file name, 1) database hash ref, 2) flag to delete "-" tags
|
---|
| 27 | # Returns: undef on success, or error string
|
---|
| 28 | # Notes: There are various flavours of CSV, but here we assume that only
|
---|
| 29 | # double quotes are escaped, and they are escaped by doubling them
|
---|
| 30 | sub ReadCSV($$;$)
|
---|
| 31 | {
|
---|
| 32 | local ($_, $/);
|
---|
| 33 | my ($file, $database, $delDash) = @_;
|
---|
| 34 | my ($buff, @tags, $found, $err);
|
---|
| 35 |
|
---|
| 36 | open CSVFILE, $file or return "Error opening CSV file '$file'";
|
---|
| 37 | binmode CSVFILE;
|
---|
| 38 | my $raf = new File::RandomAccess(\*CSVFILE);
|
---|
| 39 | # set input record separator by first newline found in the file
|
---|
| 40 | # (safe because first line should contain only tag names)
|
---|
| 41 | while ($raf->Read($buff, 65536)) {
|
---|
| 42 | $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
|
---|
| 43 | }
|
---|
| 44 | $raf->Seek(0,0);
|
---|
| 45 | while ($raf->ReadLine($buff)) {
|
---|
| 46 | my (@vals, $v, $i, %fileInfo);
|
---|
| 47 | my @toks = split ',', $buff;
|
---|
| 48 | while (@toks) {
|
---|
| 49 | ($v = shift @toks) =~ s/^ +//; # remove leading spaces
|
---|
| 50 | if ($v =~ s/^"//) {
|
---|
| 51 | # quoted value must end in an odd number of quotes
|
---|
| 52 | while ($v !~ /("+)\s*$/ or not length($1) & 1) {
|
---|
| 53 | if (@toks) {
|
---|
| 54 | $v .= ',' . shift @toks;
|
---|
| 55 | } else {
|
---|
| 56 | # read another line from the file
|
---|
| 57 | $raf->ReadLine($buff) or last;
|
---|
| 58 | @toks = split ',', $buff;
|
---|
| 59 | last unless @toks;
|
---|
| 60 | $v .= shift @toks;
|
---|
| 61 | }
|
---|
| 62 | }
|
---|
| 63 | $v =~ s/"\s*$//; # remove trailing quote and whitespace
|
---|
| 64 | $v =~ s/""/"/g; # un-escape quotes
|
---|
| 65 | } else {
|
---|
| 66 | $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
|
---|
| 67 | }
|
---|
| 68 | push @vals, $v;
|
---|
| 69 | }
|
---|
| 70 | if (@tags) {
|
---|
| 71 | # save values for each tag
|
---|
| 72 | for ($i=0; $i<@vals and $i<@tags; ++$i) {
|
---|
| 73 | next unless length $vals[$i]; # ignore empty entries
|
---|
| 74 | # delete tag if value (set value to undef) is '-' and -f option is used
|
---|
| 75 | $fileInfo{$tags[$i]} = ($vals[$i] eq '-' and $delDash) ? undef : $vals[$i];
|
---|
| 76 | }
|
---|
| 77 | # figure out the file name to use
|
---|
| 78 | if ($fileInfo{SourceFile}) {
|
---|
| 79 | $$database{$fileInfo{SourceFile}} = \%fileInfo;
|
---|
| 80 | $found = 1;
|
---|
| 81 | }
|
---|
| 82 | } else {
|
---|
| 83 | # the first row should be the tag names
|
---|
| 84 | foreach (@vals) {
|
---|
| 85 | /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '$_'", last;
|
---|
| 86 | push(@tags, $_);
|
---|
| 87 | }
|
---|
| 88 | last if $err;
|
---|
| 89 | @tags or $err = 'No tags found', last;
|
---|
| 90 | }
|
---|
| 91 | }
|
---|
| 92 | close CSVFILE;
|
---|
| 93 | undef $raf;
|
---|
| 94 | $err = 'No SourceFile column' unless $found or $err;
|
---|
| 95 | return $err ? "$err in $file" : undef;
|
---|
| 96 | }
|
---|
| 97 |
|
---|
| 98 | #------------------------------------------------------------------------------
|
---|
| 99 | # Convert unicode code point to UTF-8
|
---|
| 100 | # Inputs: 0) integer Unicode character
|
---|
| 101 | # Returns: UTF-8 bytes
|
---|
| 102 | sub ToUTF8($)
|
---|
| 103 | {
|
---|
| 104 | require Image::ExifTool::Charset;
|
---|
| 105 | return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 | #------------------------------------------------------------------------------
|
---|
| 109 | # Read JSON object from file
|
---|
| 110 | # Inputs: 0) JSON file handle, 1) optional file buffer reference
|
---|
| 111 | # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
|
---|
| 112 | # empty object or array (and sets $$buffPt to empty string on EOF)
|
---|
| 113 | # Notes: position in buffer is significant
|
---|
| 114 | sub ReadJSONObject($;$)
|
---|
| 115 | {
|
---|
| 116 | my ($fp, $buffPt) = @_;
|
---|
| 117 | # initialize buffer if necessary
|
---|
| 118 | my ($pos, $readMore, $rtnVal, $tok, $key);
|
---|
| 119 | if ($buffPt) {
|
---|
| 120 | $pos = pos $$buffPt;
|
---|
| 121 | } else {
|
---|
| 122 | my $buff = '';
|
---|
| 123 | $buffPt = \$buff;
|
---|
| 124 | $pos = 0;
|
---|
| 125 | }
|
---|
| 126 | Tok: for (;;) {
|
---|
| 127 | if ($pos >= length $$buffPt or $readMore) {
|
---|
| 128 | # read another 64kB and add to unparsed data
|
---|
| 129 | my $offset = length($$buffPt) - $pos;
|
---|
| 130 | $$buffPt = substr($$buffPt, $pos) if $offset;
|
---|
| 131 | read $fp, $$buffPt, 65536, $offset or $$buffPt = '', last;
|
---|
| 132 | $pos = pos($$buffPt) = 0;
|
---|
| 133 | $readMore = 0;
|
---|
| 134 | }
|
---|
| 135 | unless ($tok) {
|
---|
| 136 | # skip white space and find next character
|
---|
| 137 | $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
|
---|
| 138 | $tok = $1;
|
---|
| 139 | $pos = pos $$buffPt;
|
---|
| 140 | }
|
---|
| 141 | # see what type of object this is
|
---|
| 142 | if ($tok eq '{') { # object (hash)
|
---|
| 143 | $rtnVal = { } unless defined $rtnVal;
|
---|
| 144 | for (;;) {
|
---|
| 145 | # read "KEY":"VALUE" pairs
|
---|
| 146 | unless (defined $key) {
|
---|
| 147 | $key = ReadJSONObject($fp, $buffPt);
|
---|
| 148 | $pos = pos $$buffPt;
|
---|
| 149 | }
|
---|
| 150 | # ($key may be undef for empty JSON object)
|
---|
| 151 | if (defined $key) {
|
---|
| 152 | # scan to delimiting ':'
|
---|
| 153 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
| 154 | $1 eq ':' or return undef; # error if not a colon
|
---|
| 155 | my $val = ReadJSONObject($fp, $buffPt);
|
---|
| 156 | $pos = pos $$buffPt;
|
---|
| 157 | return undef unless defined $val;
|
---|
| 158 | $$rtnVal{$key} = $val;
|
---|
| 159 | undef $key;
|
---|
| 160 | }
|
---|
| 161 | # scan to delimiting ',' or bounding '}'
|
---|
| 162 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
| 163 | last if $1 eq '}'; # check for end of object
|
---|
| 164 | $1 eq ',' or return undef; # error if not a comma
|
---|
| 165 | }
|
---|
| 166 | } elsif ($tok eq '[') { # array
|
---|
| 167 | $rtnVal = [ ] unless defined $rtnVal;
|
---|
| 168 | for (;;) {
|
---|
| 169 | my $item = ReadJSONObject($fp, $buffPt);
|
---|
| 170 | $pos = pos $$buffPt;
|
---|
| 171 | # ($item may be undef for empty array)
|
---|
| 172 | push @$rtnVal, $item if defined $item;
|
---|
| 173 | # scan to delimiting ',' or bounding ']'
|
---|
| 174 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
| 175 | last if $1 eq ']'; # check for end of array
|
---|
| 176 | $1 eq ',' or return undef; # error if not a comma
|
---|
| 177 | }
|
---|
| 178 | } elsif ($tok eq '"') { # quoted string
|
---|
| 179 | for (;;) {
|
---|
| 180 | $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
|
---|
| 181 | last unless length($1) & 1; # check for escaped quote
|
---|
| 182 | }
|
---|
| 183 | $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
|
---|
| 184 | # unescape characters
|
---|
| 185 | $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
|
---|
| 186 | $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
|
---|
| 187 | } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
|
---|
| 188 | # return undef for empty object, array, or list item
|
---|
| 189 | # (empty list item actually not valid JSON)
|
---|
| 190 | pos($$buffPt) = pos($$buffPt) - 1;
|
---|
| 191 | } else { # number, 'true', 'false', 'null'
|
---|
| 192 | $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
|
---|
| 193 | pos($$buffPt) = pos($$buffPt) - 1;
|
---|
| 194 | $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
|
---|
| 195 | }
|
---|
| 196 | last;
|
---|
| 197 | }
|
---|
| 198 | return $rtnVal;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | #------------------------------------------------------------------------------
|
---|
| 202 | # Read JSON file
|
---|
| 203 | # Inputs: 0) JSON file name, 1) database hash ref, 2) flag to delete "-" tags
|
---|
| 204 | # 2) character set
|
---|
| 205 | # Returns: undef on success, or error string
|
---|
| 206 | sub ReadJSON($$;$$)
|
---|
| 207 | {
|
---|
| 208 | local $_;
|
---|
| 209 | my ($file, $database, $delDash, $chset) = @_;
|
---|
| 210 |
|
---|
| 211 | # initialize character set for converting "\uHHHH" chars
|
---|
| 212 | $charset = $chset || 'UTF8';
|
---|
| 213 | open JSONFILE, $file or return "Error opening JSON file '$file'";
|
---|
| 214 | binmode JSONFILE;
|
---|
| 215 | my $obj = ReadJSONObject(\*JSONFILE);
|
---|
| 216 | close JSONFILE;
|
---|
| 217 | unless (ref $obj eq 'ARRAY') {
|
---|
| 218 | ref $obj eq 'HASH' or return "Format error in JSON file '$file'";
|
---|
| 219 | $obj = [ $obj ];
|
---|
| 220 | }
|
---|
| 221 | my ($info, $found);
|
---|
| 222 | foreach $info (@$obj) {
|
---|
| 223 | next unless ref $info eq 'HASH' and $$info{SourceFile};
|
---|
| 224 | if ($delDash) {
|
---|
| 225 | $$info{$_} eq '-' and $$info{$_} = undef foreach keys %$info;
|
---|
| 226 | }
|
---|
| 227 | $$database{$$info{SourceFile}} = $info;
|
---|
| 228 | $found = 1;
|
---|
| 229 | }
|
---|
| 230 | return $found ? undef : "No SourceFile entries in '$file'";
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 |
|
---|
| 234 | 1; # end
|
---|
| 235 |
|
---|
| 236 | __END__
|
---|
| 237 |
|
---|
| 238 | =head1 NAME
|
---|
| 239 |
|
---|
| 240 | Image::ExifTool::Import - Import CSV and JSON database files
|
---|
| 241 |
|
---|
| 242 | =head1 SYNOPSIS
|
---|
| 243 |
|
---|
| 244 | use Image::ExifTool::Import qw(ReadCSV ReadJSON);
|
---|
| 245 |
|
---|
| 246 | $err = ReadCSV($csvFile, \%database);
|
---|
| 247 |
|
---|
| 248 | $err = ReadJSON($jsonfile, \%database);
|
---|
| 249 |
|
---|
| 250 | =head1 DESCRIPTION
|
---|
| 251 |
|
---|
| 252 | This module contains routines for importing tag information from CSV (Comma
|
---|
| 253 | Separated Value) and JSON (JavaScript Object Notation) database files.
|
---|
| 254 |
|
---|
| 255 | =head1 EXPORTS
|
---|
| 256 |
|
---|
| 257 | Exports nothing by default, but ReadCSV and ReadJSON may be exported.
|
---|
| 258 |
|
---|
| 259 | =head1 METHODS
|
---|
| 260 |
|
---|
| 261 | =head2 ReadCSV / ReadJSON
|
---|
| 262 |
|
---|
| 263 | Read CSV or JSON file into a database hash.
|
---|
| 264 |
|
---|
| 265 | =over 4
|
---|
| 266 |
|
---|
| 267 | =item Inputs:
|
---|
| 268 |
|
---|
| 269 | 0) CSV file name.
|
---|
| 270 |
|
---|
| 271 | 1) Hash reference for database object.
|
---|
| 272 |
|
---|
| 273 | 2) Optional flag to set '-' values to undef in the database. (Used for
|
---|
| 274 | deleting tags.)
|
---|
| 275 |
|
---|
| 276 | 3) [ReadJSON only] Optional character set for converting Unicode escape
|
---|
| 277 | sequences in strings. Defaults to "UTF8". See the ExifTool Charset option
|
---|
| 278 | for a list of valid settings.
|
---|
| 279 |
|
---|
| 280 | =item Return Value:
|
---|
| 281 |
|
---|
| 282 | These functions return an error string, or undef on success and populate the
|
---|
| 283 | database hash with entries from the CSV or JSON file. Entries are keyed
|
---|
| 284 | based on the SourceFile column of the CSV or JSON information, and are
|
---|
| 285 | stored as hash lookups of tag name/value for each SourceFile.
|
---|
| 286 |
|
---|
| 287 | =back
|
---|
| 288 |
|
---|
| 289 | =head1 AUTHOR
|
---|
| 290 |
|
---|
| 291 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
| 292 |
|
---|
| 293 | This library is free software; you can redistribute it and/or modify it
|
---|
| 294 | under the same terms as Perl itself.
|
---|
| 295 |
|
---|
| 296 | =head1 SEE ALSO
|
---|
| 297 |
|
---|
| 298 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
| 299 |
|
---|
| 300 | =cut
|
---|