1 | #------------------------------------------------------------------------------
|
---|
2 | # File: Import.pm
|
---|
3 | #
|
---|
4 | # Description: Import CSV and JSON database files
|
---|
5 | #
|
---|
6 | # Revisions: 2011-03-05 - P. Harvey Created
|
---|
7 | #------------------------------------------------------------------------------
|
---|
8 | package Image::ExifTool::Import;
|
---|
9 |
|
---|
10 | use strict;
|
---|
11 | require Exporter;
|
---|
12 |
|
---|
13 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
14 |
|
---|
15 | $VERSION = '1.00';
|
---|
16 | @ISA = qw(Exporter);
|
---|
17 | @EXPORT_OK = qw(ReadCSV ReadJSON);
|
---|
18 |
|
---|
19 | sub ReadJSONObject($;$);
|
---|
20 |
|
---|
21 | my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r" );
|
---|
22 | my $charset;
|
---|
23 |
|
---|
24 | #------------------------------------------------------------------------------
|
---|
25 | # Read CSV file
|
---|
26 | # Inputs: 0) CSV file name, 1) database hash ref, 2) flag to delete "-" tags
|
---|
27 | # Returns: undef on success, or error string
|
---|
28 | # Notes: There are various flavours of CSV, but here we assume that only
|
---|
29 | # double quotes are escaped, and they are escaped by doubling them
|
---|
30 | sub ReadCSV($$;$)
|
---|
31 | {
|
---|
32 | local ($_, $/);
|
---|
33 | my ($file, $database, $delDash) = @_;
|
---|
34 | my ($buff, @tags, $found, $err);
|
---|
35 |
|
---|
36 | open CSVFILE, $file or return "Error opening CSV file '$file'";
|
---|
37 | binmode CSVFILE;
|
---|
38 | my $raf = new File::RandomAccess(\*CSVFILE);
|
---|
39 | # set input record separator by first newline found in the file
|
---|
40 | # (safe because first line should contain only tag names)
|
---|
41 | while ($raf->Read($buff, 65536)) {
|
---|
42 | $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
|
---|
43 | }
|
---|
44 | $raf->Seek(0,0);
|
---|
45 | while ($raf->ReadLine($buff)) {
|
---|
46 | my (@vals, $v, $i, %fileInfo);
|
---|
47 | my @toks = split ',', $buff;
|
---|
48 | while (@toks) {
|
---|
49 | ($v = shift @toks) =~ s/^ +//; # remove leading spaces
|
---|
50 | if ($v =~ s/^"//) {
|
---|
51 | # quoted value must end in an odd number of quotes
|
---|
52 | while ($v !~ /("+)\s*$/ or not length($1) & 1) {
|
---|
53 | if (@toks) {
|
---|
54 | $v .= ',' . shift @toks;
|
---|
55 | } else {
|
---|
56 | # read another line from the file
|
---|
57 | $raf->ReadLine($buff) or last;
|
---|
58 | @toks = split ',', $buff;
|
---|
59 | last unless @toks;
|
---|
60 | $v .= shift @toks;
|
---|
61 | }
|
---|
62 | }
|
---|
63 | $v =~ s/"\s*$//; # remove trailing quote and whitespace
|
---|
64 | $v =~ s/""/"/g; # un-escape quotes
|
---|
65 | } else {
|
---|
66 | $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
|
---|
67 | }
|
---|
68 | push @vals, $v;
|
---|
69 | }
|
---|
70 | if (@tags) {
|
---|
71 | # save values for each tag
|
---|
72 | for ($i=0; $i<@vals and $i<@tags; ++$i) {
|
---|
73 | next unless length $vals[$i]; # ignore empty entries
|
---|
74 | # delete tag if value (set value to undef) is '-' and -f option is used
|
---|
75 | $fileInfo{$tags[$i]} = ($vals[$i] eq '-' and $delDash) ? undef : $vals[$i];
|
---|
76 | }
|
---|
77 | # figure out the file name to use
|
---|
78 | if ($fileInfo{SourceFile}) {
|
---|
79 | $$database{$fileInfo{SourceFile}} = \%fileInfo;
|
---|
80 | $found = 1;
|
---|
81 | }
|
---|
82 | } else {
|
---|
83 | # the first row should be the tag names
|
---|
84 | foreach (@vals) {
|
---|
85 | /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '$_'", last;
|
---|
86 | push(@tags, $_);
|
---|
87 | }
|
---|
88 | last if $err;
|
---|
89 | @tags or $err = 'No tags found', last;
|
---|
90 | }
|
---|
91 | }
|
---|
92 | close CSVFILE;
|
---|
93 | undef $raf;
|
---|
94 | $err = 'No SourceFile column' unless $found or $err;
|
---|
95 | return $err ? "$err in $file" : undef;
|
---|
96 | }
|
---|
97 |
|
---|
98 | #------------------------------------------------------------------------------
|
---|
99 | # Convert unicode code point to UTF-8
|
---|
100 | # Inputs: 0) integer Unicode character
|
---|
101 | # Returns: UTF-8 bytes
|
---|
102 | sub ToUTF8($)
|
---|
103 | {
|
---|
104 | require Image::ExifTool::Charset;
|
---|
105 | return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
|
---|
106 | }
|
---|
107 |
|
---|
108 | #------------------------------------------------------------------------------
|
---|
109 | # Read JSON object from file
|
---|
110 | # Inputs: 0) JSON file handle, 1) optional file buffer reference
|
---|
111 | # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
|
---|
112 | # empty object or array (and sets $$buffPt to empty string on EOF)
|
---|
113 | # Notes: position in buffer is significant
|
---|
114 | sub ReadJSONObject($;$)
|
---|
115 | {
|
---|
116 | my ($fp, $buffPt) = @_;
|
---|
117 | # initialize buffer if necessary
|
---|
118 | my ($pos, $readMore, $rtnVal, $tok, $key);
|
---|
119 | if ($buffPt) {
|
---|
120 | $pos = pos $$buffPt;
|
---|
121 | } else {
|
---|
122 | my $buff = '';
|
---|
123 | $buffPt = \$buff;
|
---|
124 | $pos = 0;
|
---|
125 | }
|
---|
126 | Tok: for (;;) {
|
---|
127 | if ($pos >= length $$buffPt or $readMore) {
|
---|
128 | # read another 64kB and add to unparsed data
|
---|
129 | my $offset = length($$buffPt) - $pos;
|
---|
130 | $$buffPt = substr($$buffPt, $pos) if $offset;
|
---|
131 | read $fp, $$buffPt, 65536, $offset or $$buffPt = '', last;
|
---|
132 | $pos = pos($$buffPt) = 0;
|
---|
133 | $readMore = 0;
|
---|
134 | }
|
---|
135 | unless ($tok) {
|
---|
136 | # skip white space and find next character
|
---|
137 | $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
|
---|
138 | $tok = $1;
|
---|
139 | $pos = pos $$buffPt;
|
---|
140 | }
|
---|
141 | # see what type of object this is
|
---|
142 | if ($tok eq '{') { # object (hash)
|
---|
143 | $rtnVal = { } unless defined $rtnVal;
|
---|
144 | for (;;) {
|
---|
145 | # read "KEY":"VALUE" pairs
|
---|
146 | unless (defined $key) {
|
---|
147 | $key = ReadJSONObject($fp, $buffPt);
|
---|
148 | $pos = pos $$buffPt;
|
---|
149 | }
|
---|
150 | # ($key may be undef for empty JSON object)
|
---|
151 | if (defined $key) {
|
---|
152 | # scan to delimiting ':'
|
---|
153 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
154 | $1 eq ':' or return undef; # error if not a colon
|
---|
155 | my $val = ReadJSONObject($fp, $buffPt);
|
---|
156 | $pos = pos $$buffPt;
|
---|
157 | return undef unless defined $val;
|
---|
158 | $$rtnVal{$key} = $val;
|
---|
159 | undef $key;
|
---|
160 | }
|
---|
161 | # scan to delimiting ',' or bounding '}'
|
---|
162 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
163 | last if $1 eq '}'; # check for end of object
|
---|
164 | $1 eq ',' or return undef; # error if not a comma
|
---|
165 | }
|
---|
166 | } elsif ($tok eq '[') { # array
|
---|
167 | $rtnVal = [ ] unless defined $rtnVal;
|
---|
168 | for (;;) {
|
---|
169 | my $item = ReadJSONObject($fp, $buffPt);
|
---|
170 | $pos = pos $$buffPt;
|
---|
171 | # ($item may be undef for empty array)
|
---|
172 | push @$rtnVal, $item if defined $item;
|
---|
173 | # scan to delimiting ',' or bounding ']'
|
---|
174 | $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
|
---|
175 | last if $1 eq ']'; # check for end of array
|
---|
176 | $1 eq ',' or return undef; # error if not a comma
|
---|
177 | }
|
---|
178 | } elsif ($tok eq '"') { # quoted string
|
---|
179 | for (;;) {
|
---|
180 | $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
|
---|
181 | last unless length($1) & 1; # check for escaped quote
|
---|
182 | }
|
---|
183 | $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
|
---|
184 | # unescape characters
|
---|
185 | $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
|
---|
186 | $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
|
---|
187 | } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
|
---|
188 | # return undef for empty object, array, or list item
|
---|
189 | # (empty list item actually not valid JSON)
|
---|
190 | pos($$buffPt) = pos($$buffPt) - 1;
|
---|
191 | } else { # number, 'true', 'false', 'null'
|
---|
192 | $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
|
---|
193 | pos($$buffPt) = pos($$buffPt) - 1;
|
---|
194 | $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
|
---|
195 | }
|
---|
196 | last;
|
---|
197 | }
|
---|
198 | return $rtnVal;
|
---|
199 | }
|
---|
200 |
|
---|
201 | #------------------------------------------------------------------------------
|
---|
202 | # Read JSON file
|
---|
203 | # Inputs: 0) JSON file name, 1) database hash ref, 2) flag to delete "-" tags
|
---|
204 | # 2) character set
|
---|
205 | # Returns: undef on success, or error string
|
---|
206 | sub ReadJSON($$;$$)
|
---|
207 | {
|
---|
208 | local $_;
|
---|
209 | my ($file, $database, $delDash, $chset) = @_;
|
---|
210 |
|
---|
211 | # initialize character set for converting "\uHHHH" chars
|
---|
212 | $charset = $chset || 'UTF8';
|
---|
213 | open JSONFILE, $file or return "Error opening JSON file '$file'";
|
---|
214 | binmode JSONFILE;
|
---|
215 | my $obj = ReadJSONObject(\*JSONFILE);
|
---|
216 | close JSONFILE;
|
---|
217 | unless (ref $obj eq 'ARRAY') {
|
---|
218 | ref $obj eq 'HASH' or return "Format error in JSON file '$file'";
|
---|
219 | $obj = [ $obj ];
|
---|
220 | }
|
---|
221 | my ($info, $found);
|
---|
222 | foreach $info (@$obj) {
|
---|
223 | next unless ref $info eq 'HASH' and $$info{SourceFile};
|
---|
224 | if ($delDash) {
|
---|
225 | $$info{$_} eq '-' and $$info{$_} = undef foreach keys %$info;
|
---|
226 | }
|
---|
227 | $$database{$$info{SourceFile}} = $info;
|
---|
228 | $found = 1;
|
---|
229 | }
|
---|
230 | return $found ? undef : "No SourceFile entries in '$file'";
|
---|
231 | }
|
---|
232 |
|
---|
233 |
|
---|
234 | 1; # end
|
---|
235 |
|
---|
236 | __END__
|
---|
237 |
|
---|
238 | =head1 NAME
|
---|
239 |
|
---|
240 | Image::ExifTool::Import - Import CSV and JSON database files
|
---|
241 |
|
---|
242 | =head1 SYNOPSIS
|
---|
243 |
|
---|
244 | use Image::ExifTool::Import qw(ReadCSV ReadJSON);
|
---|
245 |
|
---|
246 | $err = ReadCSV($csvFile, \%database);
|
---|
247 |
|
---|
248 | $err = ReadJSON($jsonfile, \%database);
|
---|
249 |
|
---|
250 | =head1 DESCRIPTION
|
---|
251 |
|
---|
252 | This module contains routines for importing tag information from CSV (Comma
|
---|
253 | Separated Value) and JSON (JavaScript Object Notation) database files.
|
---|
254 |
|
---|
255 | =head1 EXPORTS
|
---|
256 |
|
---|
257 | Exports nothing by default, but ReadCSV and ReadJSON may be exported.
|
---|
258 |
|
---|
259 | =head1 METHODS
|
---|
260 |
|
---|
261 | =head2 ReadCSV / ReadJSON
|
---|
262 |
|
---|
263 | Read CSV or JSON file into a database hash.
|
---|
264 |
|
---|
265 | =over 4
|
---|
266 |
|
---|
267 | =item Inputs:
|
---|
268 |
|
---|
269 | 0) CSV file name.
|
---|
270 |
|
---|
271 | 1) Hash reference for database object.
|
---|
272 |
|
---|
273 | 2) Optional flag to set '-' values to undef in the database. (Used for
|
---|
274 | deleting tags.)
|
---|
275 |
|
---|
276 | 3) [ReadJSON only] Optional character set for converting Unicode escape
|
---|
277 | sequences in strings. Defaults to "UTF8". See the ExifTool Charset option
|
---|
278 | for a list of valid settings.
|
---|
279 |
|
---|
280 | =item Return Value:
|
---|
281 |
|
---|
282 | These functions return an error string, or undef on success and populate the
|
---|
283 | database hash with entries from the CSV or JSON file. Entries are keyed
|
---|
284 | based on the SourceFile column of the CSV or JSON information, and are
|
---|
285 | stored as hash lookups of tag name/value for each SourceFile.
|
---|
286 |
|
---|
287 | =back
|
---|
288 |
|
---|
289 | =head1 AUTHOR
|
---|
290 |
|
---|
291 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
292 |
|
---|
293 | This library is free software; you can redistribute it and/or modify it
|
---|
294 | under the same terms as Perl itself.
|
---|
295 |
|
---|
296 | =head1 SEE ALSO
|
---|
297 |
|
---|
298 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
299 |
|
---|
300 | =cut
|
---|