source: gs2-extensions/parallel-building/trunk/src/perllib/cpan/Image/ExifTool/Import.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

  • Property svn:executable set to *
File size: 10.1 KB
RevLine 
[24626]1#------------------------------------------------------------------------------
2# File: Import.pm
3#
4# Description: Import CSV and JSON database files
5#
6# Revisions: 2011-03-05 - P. Harvey Created
7#------------------------------------------------------------------------------
8package Image::ExifTool::Import;
9
10use strict;
11require Exporter;
12
13use vars qw($VERSION @ISA @EXPORT_OK);
14
15$VERSION = '1.00';
16@ISA = qw(Exporter);
17@EXPORT_OK = qw(ReadCSV ReadJSON);
18
19sub ReadJSONObject($;$);
20
21my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r" );
22my $charset;
23
24#------------------------------------------------------------------------------
25# Read CSV file
26# Inputs: 0) CSV file name, 1) database hash ref, 2) flag to delete "-" tags
27# Returns: undef on success, or error string
28# Notes: There are various flavours of CSV, but here we assume that only
29# double quotes are escaped, and they are escaped by doubling them
30sub ReadCSV($$;$)
31{
32 local ($_, $/);
33 my ($file, $database, $delDash) = @_;
34 my ($buff, @tags, $found, $err);
35
36 open CSVFILE, $file or return "Error opening CSV file '$file'";
37 binmode CSVFILE;
38 my $raf = new File::RandomAccess(\*CSVFILE);
39 # set input record separator by first newline found in the file
40 # (safe because first line should contain only tag names)
41 while ($raf->Read($buff, 65536)) {
42 $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
43 }
44 $raf->Seek(0,0);
45 while ($raf->ReadLine($buff)) {
46 my (@vals, $v, $i, %fileInfo);
47 my @toks = split ',', $buff;
48 while (@toks) {
49 ($v = shift @toks) =~ s/^ +//; # remove leading spaces
50 if ($v =~ s/^"//) {
51 # quoted value must end in an odd number of quotes
52 while ($v !~ /("+)\s*$/ or not length($1) & 1) {
53 if (@toks) {
54 $v .= ',' . shift @toks;
55 } else {
56 # read another line from the file
57 $raf->ReadLine($buff) or last;
58 @toks = split ',', $buff;
59 last unless @toks;
60 $v .= shift @toks;
61 }
62 }
63 $v =~ s/"\s*$//; # remove trailing quote and whitespace
64 $v =~ s/""/"/g; # un-escape quotes
65 } else {
66 $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
67 }
68 push @vals, $v;
69 }
70 if (@tags) {
71 # save values for each tag
72 for ($i=0; $i<@vals and $i<@tags; ++$i) {
73 next unless length $vals[$i]; # ignore empty entries
74 # delete tag if value (set value to undef) is '-' and -f option is used
75 $fileInfo{$tags[$i]} = ($vals[$i] eq '-' and $delDash) ? undef : $vals[$i];
76 }
77 # figure out the file name to use
78 if ($fileInfo{SourceFile}) {
79 $$database{$fileInfo{SourceFile}} = \%fileInfo;
80 $found = 1;
81 }
82 } else {
83 # the first row should be the tag names
84 foreach (@vals) {
85 /^[-\w]+(:[-\w+]+)?#?$/ or $err = "Invalid tag name '$_'", last;
86 push(@tags, $_);
87 }
88 last if $err;
89 @tags or $err = 'No tags found', last;
90 }
91 }
92 close CSVFILE;
93 undef $raf;
94 $err = 'No SourceFile column' unless $found or $err;
95 return $err ? "$err in $file" : undef;
96}
97
98#------------------------------------------------------------------------------
99# Convert unicode code point to UTF-8
100# Inputs: 0) integer Unicode character
101# Returns: UTF-8 bytes
102sub ToUTF8($)
103{
104 require Image::ExifTool::Charset;
105 return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
106}
107
108#------------------------------------------------------------------------------
109# Read JSON object from file
110# Inputs: 0) JSON file handle, 1) optional file buffer reference
111# Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
112# empty object or array (and sets $$buffPt to empty string on EOF)
113# Notes: position in buffer is significant
114sub ReadJSONObject($;$)
115{
116 my ($fp, $buffPt) = @_;
117 # initialize buffer if necessary
118 my ($pos, $readMore, $rtnVal, $tok, $key);
119 if ($buffPt) {
120 $pos = pos $$buffPt;
121 } else {
122 my $buff = '';
123 $buffPt = \$buff;
124 $pos = 0;
125 }
126Tok: for (;;) {
127 if ($pos >= length $$buffPt or $readMore) {
128 # read another 64kB and add to unparsed data
129 my $offset = length($$buffPt) - $pos;
130 $$buffPt = substr($$buffPt, $pos) if $offset;
131 read $fp, $$buffPt, 65536, $offset or $$buffPt = '', last;
132 $pos = pos($$buffPt) = 0;
133 $readMore = 0;
134 }
135 unless ($tok) {
136 # skip white space and find next character
137 $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
138 $tok = $1;
139 $pos = pos $$buffPt;
140 }
141 # see what type of object this is
142 if ($tok eq '{') { # object (hash)
143 $rtnVal = { } unless defined $rtnVal;
144 for (;;) {
145 # read "KEY":"VALUE" pairs
146 unless (defined $key) {
147 $key = ReadJSONObject($fp, $buffPt);
148 $pos = pos $$buffPt;
149 }
150 # ($key may be undef for empty JSON object)
151 if (defined $key) {
152 # scan to delimiting ':'
153 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
154 $1 eq ':' or return undef; # error if not a colon
155 my $val = ReadJSONObject($fp, $buffPt);
156 $pos = pos $$buffPt;
157 return undef unless defined $val;
158 $$rtnVal{$key} = $val;
159 undef $key;
160 }
161 # scan to delimiting ',' or bounding '}'
162 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
163 last if $1 eq '}'; # check for end of object
164 $1 eq ',' or return undef; # error if not a comma
165 }
166 } elsif ($tok eq '[') { # array
167 $rtnVal = [ ] unless defined $rtnVal;
168 for (;;) {
169 my $item = ReadJSONObject($fp, $buffPt);
170 $pos = pos $$buffPt;
171 # ($item may be undef for empty array)
172 push @$rtnVal, $item if defined $item;
173 # scan to delimiting ',' or bounding ']'
174 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
175 last if $1 eq ']'; # check for end of array
176 $1 eq ',' or return undef; # error if not a comma
177 }
178 } elsif ($tok eq '"') { # quoted string
179 for (;;) {
180 $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
181 last unless length($1) & 1; # check for escaped quote
182 }
183 $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
184 # unescape characters
185 $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
186 $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
187 } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
188 # return undef for empty object, array, or list item
189 # (empty list item actually not valid JSON)
190 pos($$buffPt) = pos($$buffPt) - 1;
191 } else { # number, 'true', 'false', 'null'
192 $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
193 pos($$buffPt) = pos($$buffPt) - 1;
194 $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
195 }
196 last;
197 }
198 return $rtnVal;
199}
200
201#------------------------------------------------------------------------------
202# Read JSON file
203# Inputs: 0) JSON file name, 1) database hash ref, 2) flag to delete "-" tags
204# 2) character set
205# Returns: undef on success, or error string
206sub ReadJSON($$;$$)
207{
208 local $_;
209 my ($file, $database, $delDash, $chset) = @_;
210
211 # initialize character set for converting "\uHHHH" chars
212 $charset = $chset || 'UTF8';
213 open JSONFILE, $file or return "Error opening JSON file '$file'";
214 binmode JSONFILE;
215 my $obj = ReadJSONObject(\*JSONFILE);
216 close JSONFILE;
217 unless (ref $obj eq 'ARRAY') {
218 ref $obj eq 'HASH' or return "Format error in JSON file '$file'";
219 $obj = [ $obj ];
220 }
221 my ($info, $found);
222 foreach $info (@$obj) {
223 next unless ref $info eq 'HASH' and $$info{SourceFile};
224 if ($delDash) {
225 $$info{$_} eq '-' and $$info{$_} = undef foreach keys %$info;
226 }
227 $$database{$$info{SourceFile}} = $info;
228 $found = 1;
229 }
230 return $found ? undef : "No SourceFile entries in '$file'";
231}
232
233
2341; # end
235
236__END__
237
238=head1 NAME
239
240Image::ExifTool::Import - Import CSV and JSON database files
241
242=head1 SYNOPSIS
243
244 use Image::ExifTool::Import qw(ReadCSV ReadJSON);
245
246 $err = ReadCSV($csvFile, \%database);
247
248 $err = ReadJSON($jsonfile, \%database);
249
250=head1 DESCRIPTION
251
252This module contains routines for importing tag information from CSV (Comma
253Separated Value) and JSON (JavaScript Object Notation) database files.
254
255=head1 EXPORTS
256
257Exports nothing by default, but ReadCSV and ReadJSON may be exported.
258
259=head1 METHODS
260
261=head2 ReadCSV / ReadJSON
262
263Read CSV or JSON file into a database hash.
264
265=over 4
266
267=item Inputs:
268
2690) CSV file name.
270
2711) Hash reference for database object.
272
2732) Optional flag to set '-' values to undef in the database. (Used for
274deleting tags.)
275
2763) [ReadJSON only] Optional character set for converting Unicode escape
277sequences in strings. Defaults to "UTF8". See the ExifTool Charset option
278for a list of valid settings.
279
280=item Return Value:
281
282These functions return an error string, or undef on success and populate the
283database hash with entries from the CSV or JSON file. Entries are keyed
284based on the SourceFile column of the CSV or JSON information, and are
285stored as hash lookups of tag name/value for each SourceFile.
286
287=back
288
289=head1 AUTHOR
290
291Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
292
293This library is free software; you can redistribute it and/or modify it
294under the same terms as Perl itself.
295
296=head1 SEE ALSO
297
298L<Image::ExifTool(3pm)|Image::ExifTool>
299
300=cut
Note: See TracBrowser for help on using the repository browser.