[33235] | 1 | package Text::CSV;
|
---|
| 2 |
|
---|
| 3 |
|
---|
| 4 | use strict;
|
---|
| 5 | use Exporter;
|
---|
| 6 | use Carp ();
|
---|
| 7 | use vars qw( $VERSION $DEBUG @ISA @EXPORT_OK );
|
---|
| 8 | @ISA = qw( Exporter );
|
---|
| 9 | @EXPORT_OK = qw( csv );
|
---|
| 10 |
|
---|
| 11 | BEGIN {
|
---|
| 12 | $VERSION = '1.99';
|
---|
| 13 | $DEBUG = 0;
|
---|
| 14 | }
|
---|
| 15 |
|
---|
| 16 | # if use CSV_XS, requires version
|
---|
| 17 | my $Module_XS = 'Text::CSV_XS';
|
---|
| 18 | my $Module_PP = 'Text::CSV_PP';
|
---|
| 19 | my $XS_Version = '1.02';
|
---|
| 20 |
|
---|
| 21 | my $Is_Dynamic = 0;
|
---|
| 22 |
|
---|
| 23 | my @PublicMethods = qw/
|
---|
| 24 | version error_diag error_input
|
---|
| 25 | known_attributes csv
|
---|
| 26 | PV IV NV
|
---|
| 27 | /;
|
---|
| 28 | #
|
---|
| 29 |
|
---|
| 30 | # Check the environment variable to decide worker module.
|
---|
| 31 |
|
---|
| 32 | unless ($Text::CSV::Worker) {
|
---|
| 33 | $Text::CSV::DEBUG and Carp::carp("Check used worker module...");
|
---|
| 34 |
|
---|
| 35 | if ( exists $ENV{PERL_TEXT_CSV} ) {
|
---|
| 36 | if ($ENV{PERL_TEXT_CSV} eq '0' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_PP') {
|
---|
| 37 | _load_pp() or Carp::croak $@;
|
---|
| 38 | }
|
---|
| 39 | elsif ($ENV{PERL_TEXT_CSV} eq '1' or $ENV{PERL_TEXT_CSV} =~ /Text::CSV_XS\s*,\s*Text::CSV_PP/) {
|
---|
| 40 | _load_xs() or _load_pp() or Carp::croak $@;
|
---|
| 41 | }
|
---|
| 42 | elsif ($ENV{PERL_TEXT_CSV} eq '2' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_XS') {
|
---|
| 43 | _load_xs() or Carp::croak $@;
|
---|
| 44 | }
|
---|
| 45 | else {
|
---|
| 46 | Carp::croak "The value of environmental variable 'PERL_TEXT_CSV' is invalid.";
|
---|
| 47 | }
|
---|
| 48 | }
|
---|
| 49 | else {
|
---|
| 50 | _load_xs() or _load_pp() or Carp::croak $@;
|
---|
| 51 | }
|
---|
| 52 |
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | sub new { # normal mode
|
---|
| 56 | my $proto = shift;
|
---|
| 57 | my $class = ref($proto) || $proto;
|
---|
| 58 |
|
---|
| 59 | unless ( $proto ) { # for Text::CSV_XS/PP::new(0);
|
---|
| 60 | return eval qq| $Text::CSV::Worker\::new( \$proto ) |;
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | #if (ref $_[0] and $_[0]->{module}) {
|
---|
| 64 | # Carp::croak("Can't set 'module' in non dynamic mode.");
|
---|
| 65 | #}
|
---|
| 66 |
|
---|
| 67 | if ( my $obj = $Text::CSV::Worker->new(@_) ) {
|
---|
| 68 | $obj->{_MODULE} = $Text::CSV::Worker;
|
---|
| 69 | bless $obj, $class;
|
---|
| 70 | return $obj;
|
---|
| 71 | }
|
---|
| 72 | else {
|
---|
| 73 | return;
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 |
|
---|
| 80 | sub require_xs_version { $XS_Version; }
|
---|
| 81 |
|
---|
| 82 |
|
---|
| 83 | sub module {
|
---|
| 84 | my $proto = shift;
|
---|
| 85 | return !ref($proto) ? $Text::CSV::Worker
|
---|
| 86 | : ref($proto->{_MODULE}) ? ref($proto->{_MODULE}) : $proto->{_MODULE};
|
---|
| 87 | }
|
---|
| 88 |
|
---|
| 89 | *backend = *module;
|
---|
| 90 |
|
---|
| 91 |
|
---|
| 92 | sub is_xs {
|
---|
| 93 | return $_[0]->module eq $Module_XS;
|
---|
| 94 | }
|
---|
| 95 |
|
---|
| 96 |
|
---|
| 97 | sub is_pp {
|
---|
| 98 | return $_[0]->module eq $Module_PP;
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 |
|
---|
| 102 | sub is_dynamic { $Is_Dynamic; }
|
---|
| 103 |
|
---|
| 104 | sub _load_xs { _load($Module_XS, $XS_Version) }
|
---|
| 105 |
|
---|
| 106 | sub _load_pp { _load($Module_PP) }
|
---|
| 107 |
|
---|
| 108 | sub _load {
|
---|
| 109 | my ($module, $version) = @_;
|
---|
| 110 | $version ||= '';
|
---|
| 111 |
|
---|
| 112 | $Text::CSV::DEBUG and Carp::carp "Load $module.";
|
---|
| 113 |
|
---|
| 114 | eval qq| use $module $version |;
|
---|
| 115 |
|
---|
| 116 | return if $@;
|
---|
| 117 |
|
---|
| 118 | push @Text::CSV::ISA, $module;
|
---|
| 119 | $Text::CSV::Worker = $module;
|
---|
| 120 |
|
---|
| 121 | local $^W;
|
---|
| 122 | no strict qw(refs);
|
---|
| 123 |
|
---|
| 124 | for my $method (@PublicMethods) {
|
---|
| 125 | *{"Text::CSV::$method"} = \&{"$module\::$method"};
|
---|
| 126 | }
|
---|
| 127 | return 1;
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 |
|
---|
| 131 |
|
---|
| 132 | 1;
|
---|
| 133 | __END__
|
---|
| 134 |
|
---|
| 135 | =pod
|
---|
| 136 |
|
---|
| 137 | =head1 NAME
|
---|
| 138 |
|
---|
| 139 | Text::CSV - comma-separated values manipulator (using XS or PurePerl)
|
---|
| 140 |
|
---|
| 141 |
|
---|
| 142 | =head1 SYNOPSIS
|
---|
| 143 |
|
---|
| 144 | This section is taken from Text::CSV_XS.
|
---|
| 145 |
|
---|
| 146 | # Functional interface
|
---|
| 147 | use Text::CSV qw( csv );
|
---|
| 148 |
|
---|
| 149 | # Read whole file in memory
|
---|
| 150 | my $aoa = csv (in => "data.csv"); # as array of array
|
---|
| 151 | my $aoh = csv (in => "data.csv",
|
---|
| 152 | headers => "auto"); # as array of hash
|
---|
| 153 |
|
---|
| 154 | # Write array of arrays as csv file
|
---|
| 155 | csv (in => $aoa, out => "file.csv", sep_char=> ";");
|
---|
| 156 |
|
---|
| 157 | # Only show lines where "code" is odd
|
---|
| 158 | csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
|
---|
| 159 |
|
---|
| 160 | # Object interface
|
---|
| 161 | use Text::CSV;
|
---|
| 162 |
|
---|
| 163 | my @rows;
|
---|
| 164 | # Read/parse CSV
|
---|
| 165 | my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
|
---|
| 166 | open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
|
---|
| 167 | while (my $row = $csv->getline ($fh)) {
|
---|
| 168 | $row->[2] =~ m/pattern/ or next; # 3rd field should match
|
---|
| 169 | push @rows, $row;
|
---|
| 170 | }
|
---|
| 171 | close $fh;
|
---|
| 172 |
|
---|
| 173 | # and write as CSV
|
---|
| 174 | open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
|
---|
| 175 | $csv->say ($fh, $_) for @rows;
|
---|
| 176 | close $fh or die "new.csv: $!";
|
---|
| 177 |
|
---|
| 178 | =head1 DESCRIPTION
|
---|
| 179 |
|
---|
| 180 | Text::CSV is a thin wrapper for L<Text::CSV_XS>-compatible modules now.
|
---|
| 181 | All the backend modules provide facilities for the composition and
|
---|
| 182 | decomposition of comma-separated values. Text::CSV uses Text::CSV_XS
|
---|
| 183 | by default, and when Text::CSV_XS is not available, falls back on
|
---|
| 184 | L<Text::CSV_PP>, which is bundled in the same distribution as this module.
|
---|
| 185 |
|
---|
| 186 | =head1 CHOOSING BACKEND
|
---|
| 187 |
|
---|
| 188 | This module respects an environmental variable called C<PERL_TEXT_CSV>
|
---|
| 189 | when it decides a backend module to use. If this environmental variable
|
---|
| 190 | is not set, it tries to load Text::CSV_XS, and if Text::CSV_XS is not
|
---|
| 191 | available, falls back on Text::CSV_PP;
|
---|
| 192 |
|
---|
| 193 | If you always don't want it to fall back on Text::CSV_PP, set the variable
|
---|
| 194 | like this (C<export> may be C<setenv>, C<set> and the likes, depending
|
---|
| 195 | on your environment):
|
---|
| 196 |
|
---|
| 197 | > export PERL_TEXT_CSV=Text::CSV_XS
|
---|
| 198 |
|
---|
| 199 | If you prefer Text::CSV_XS to Text::CSV_PP (default), then:
|
---|
| 200 |
|
---|
| 201 | > export PERL_TEXT_CSV=Text::CSV_XS,Text::CSV_PP
|
---|
| 202 |
|
---|
| 203 | You may also want to set this variable at the top of your test files, in order
|
---|
| 204 | not to be bothered with incompatibilities between backends (you need to wrap
|
---|
| 205 | this in C<BEGIN>, and set before actually C<use>-ing Text::CSV module, as it
|
---|
| 206 | decides its backend as soon as it's loaded):
|
---|
| 207 |
|
---|
| 208 | BEGIN { $ENV{PERL_TEXT_CSV}='Text::CSV_PP'; }
|
---|
| 209 | use Text::CSV;
|
---|
| 210 |
|
---|
| 211 | =head1 NOTES
|
---|
| 212 |
|
---|
| 213 | This section is also taken from Text::CSV_XS.
|
---|
| 214 |
|
---|
| 215 | =head2 Embedded newlines
|
---|
| 216 |
|
---|
| 217 | B<Important Note>: The default behavior is to accept only ASCII characters
|
---|
| 218 | in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
|
---|
| 219 | fields can not contain newlines. If your data contains newlines embedded in
|
---|
| 220 | fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
|
---|
| 221 | set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
|
---|
| 222 | parsing options, you will always want to set binary.
|
---|
| 223 |
|
---|
| 224 | But you still have the problem that you have to pass a correct line to the
|
---|
| 225 | L</parse> method, which is more complicated from the usual point of usage:
|
---|
| 226 |
|
---|
| 227 | my $csv = Text::CSV->new ({ binary => 1, eol => $/ });
|
---|
| 228 | while (<>) { # WRONG!
|
---|
| 229 | $csv->parse ($_);
|
---|
| 230 | my @fields = $csv->fields ();
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | this will break, as the C<while> might read broken lines: it does not care
|
---|
| 234 | about the quoting. If you need to support embedded newlines, the way to go
|
---|
| 235 | is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
|
---|
| 236 | B<and> C<\r\n> by default) and then
|
---|
| 237 |
|
---|
| 238 | my $csv = Text::CSV->new ({ binary => 1 });
|
---|
| 239 | open my $fh, "<", $file or die "$file: $!";
|
---|
| 240 | while (my $row = $csv->getline ($fh)) {
|
---|
| 241 | my @fields = @$row;
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | The old(er) way of using global file handles is still supported
|
---|
| 245 |
|
---|
| 246 | while (my $row = $csv->getline (*ARGV)) { ... }
|
---|
| 247 |
|
---|
| 248 | =head2 Unicode
|
---|
| 249 |
|
---|
| 250 | Unicode is only tested to work with perl-5.8.2 and up.
|
---|
| 251 |
|
---|
| 252 | See also L</BOM>.
|
---|
| 253 |
|
---|
| 254 | The simplest way to ensure the correct encoding is used for in- and output
|
---|
| 255 | is by either setting layers on the filehandles, or setting the L</encoding>
|
---|
| 256 | argument for L</csv>.
|
---|
| 257 |
|
---|
| 258 | open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
|
---|
| 259 | or
|
---|
| 260 | my $aoa = csv (in => "in.csv", encoding => "UTF-8");
|
---|
| 261 |
|
---|
| 262 | open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
|
---|
| 263 | or
|
---|
| 264 | csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
|
---|
| 265 |
|
---|
| 266 | On parsing (both for L</getline> and L</parse>), if the source is marked
|
---|
| 267 | being UTF8, then all fields that are marked binary will also be marked UTF8.
|
---|
| 268 |
|
---|
| 269 | On combining (L</print> and L</combine>): if any of the combining fields
|
---|
| 270 | was marked UTF8, the resulting string will be marked as UTF8. Note however
|
---|
| 271 | that all fields I<before> the first field marked UTF8 and contained 8-bit
|
---|
| 272 | characters that were not upgraded to UTF8, these will be C<bytes> in the
|
---|
| 273 | resulting string too, possibly causing unexpected errors. If you pass data
|
---|
| 274 | of different encoding, or you don't know if there is different encoding,
|
---|
| 275 | force it to be upgraded before you pass them on:
|
---|
| 276 |
|
---|
| 277 | $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
|
---|
| 278 |
|
---|
| 279 | For complete control over encoding, please use L<Text::CSV::Encoded>:
|
---|
| 280 |
|
---|
| 281 | use Text::CSV::Encoded;
|
---|
| 282 | my $csv = Text::CSV::Encoded->new ({
|
---|
| 283 | encoding_in => "iso-8859-1", # the encoding comes into Perl
|
---|
| 284 | encoding_out => "cp1252", # the encoding comes out of Perl
|
---|
| 285 | });
|
---|
| 286 |
|
---|
| 287 | $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
|
---|
| 288 | # combine () and print () accept *literally* utf8 encoded data
|
---|
| 289 | # parse () and getline () return *literally* utf8 encoded data
|
---|
| 290 |
|
---|
| 291 | $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
|
---|
| 292 | # combine () and print () accept UTF8 marked data
|
---|
| 293 | # parse () and getline () return UTF8 marked data
|
---|
| 294 |
|
---|
| 295 | =head2 BOM
|
---|
| 296 |
|
---|
| 297 | BOM (or Byte Order Mark) handling is available only inside the L</header>
|
---|
| 298 | method. This method supports the following encodings: C<utf-8>, C<utf-1>,
|
---|
| 299 | C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
|
---|
| 300 | C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>.
|
---|
| 301 |
|
---|
| 302 | If a file has a BOM, the easiest way to deal with that is
|
---|
| 303 |
|
---|
| 304 | my $aoh = csv (in => $file, detect_bom => 1);
|
---|
| 305 |
|
---|
| 306 | All records will be encoded based on the detected BOM.
|
---|
| 307 |
|
---|
| 308 | This implies a call to the L</header> method, which defaults to also set
|
---|
| 309 | the L</column_names>. So this is B<not> the same as
|
---|
| 310 |
|
---|
| 311 | my $aoh = csv (in => $file, headers => "auto");
|
---|
| 312 |
|
---|
| 313 | which only reads the first record to set L</column_names> but ignores any
|
---|
| 314 | meaning of possible present BOM.
|
---|
| 315 |
|
---|
| 316 | =head1 METHODS
|
---|
| 317 |
|
---|
| 318 | This section is also taken from Text::CSV_XS.
|
---|
| 319 |
|
---|
| 320 | =head2 version
|
---|
| 321 |
|
---|
| 322 | (Class method) Returns the current module version.
|
---|
| 323 |
|
---|
| 324 | =head2 new
|
---|
| 325 |
|
---|
| 326 | (Class method) Returns a new instance of class Text::CSV. The attributes
|
---|
| 327 | are described by the (optional) hash ref C<\%attr>.
|
---|
| 328 |
|
---|
| 329 | my $csv = Text::CSV->new ({ attributes ... });
|
---|
| 330 |
|
---|
| 331 | The following attributes are available:
|
---|
| 332 |
|
---|
| 333 | =head3 eol
|
---|
| 334 |
|
---|
| 335 | my $csv = Text::CSV->new ({ eol => $/ });
|
---|
| 336 | $csv->eol (undef);
|
---|
| 337 | my $eol = $csv->eol;
|
---|
| 338 |
|
---|
| 339 | The end-of-line string to add to rows for L</print> or the record separator
|
---|
| 340 | for L</getline>.
|
---|
| 341 |
|
---|
| 342 | When not passed in a B<parser> instance, the default behavior is to accept
|
---|
| 343 | C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
|
---|
| 344 | all. Passing C<undef> or the empty string behave the same.
|
---|
| 345 |
|
---|
| 346 | When not passed in a B<generating> instance, records are not terminated at
|
---|
| 347 | all, so it is probably wise to pass something you expect. A safe choice for
|
---|
| 348 | C<eol> on output is either C<$/> or C<\r\n>.
|
---|
| 349 |
|
---|
| 350 | Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
|
---|
| 351 | (C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
|
---|
| 352 | Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
|
---|
| 353 |
|
---|
| 354 | If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
|
---|
| 355 | only a Carriage Return without Line Feed, will be L</parse>d correct.
|
---|
| 356 |
|
---|
| 357 | =head3 sep_char
|
---|
| 358 |
|
---|
| 359 | my $csv = Text::CSV->new ({ sep_char => ";" });
|
---|
| 360 | $csv->sep_char (";");
|
---|
| 361 | my $c = $csv->sep_char;
|
---|
| 362 |
|
---|
| 363 | The char used to separate fields, by default a comma. (C<,>). Limited to a
|
---|
| 364 | single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
|
---|
| 365 | (tilde). When longer sequences are required, use L<C<sep>|/sep>.
|
---|
| 366 |
|
---|
| 367 | The separation character can not be equal to the quote character or to the
|
---|
| 368 | escape character.
|
---|
| 369 |
|
---|
| 370 | =head3 sep
|
---|
| 371 |
|
---|
| 372 | my $csv = Text::CSV->new ({ sep => "\N{FULLWIDTH COMMA}" });
|
---|
| 373 | $csv->sep (";");
|
---|
| 374 | my $sep = $csv->sep;
|
---|
| 375 |
|
---|
| 376 | The chars used to separate fields, by default undefined. Limited to 8 bytes.
|
---|
| 377 |
|
---|
| 378 | When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
|
---|
| 379 | acts as an alias to L<C<sep_char>|/sep_char>.
|
---|
| 380 |
|
---|
| 381 | =head3 quote_char
|
---|
| 382 |
|
---|
| 383 | my $csv = Text::CSV->new ({ quote_char => "'" });
|
---|
| 384 | $csv->quote_char (undef);
|
---|
| 385 | my $c = $csv->quote_char;
|
---|
| 386 |
|
---|
| 387 | The character to quote fields containing blanks or binary data, by default
|
---|
| 388 | the double quote character (C<">). A value of undef suppresses quote chars
|
---|
| 389 | (for simple cases only). Limited to a single-byte character, usually in the
|
---|
| 390 | range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
|
---|
| 391 | required, use L<C<quote>|/quote>.
|
---|
| 392 |
|
---|
| 393 | C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
|
---|
| 394 |
|
---|
| 395 | =head3 quote
|
---|
| 396 |
|
---|
| 397 | my $csv = Text::CSV->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
|
---|
| 398 | $csv->quote ("'");
|
---|
| 399 | my $quote = $csv->quote;
|
---|
| 400 |
|
---|
| 401 | The chars used to quote fields, by default undefined. Limited to 8 bytes.
|
---|
| 402 |
|
---|
| 403 | When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
|
---|
| 404 | it acts as an alias to L<C<quote_char>|/quote_char>.
|
---|
| 405 |
|
---|
| 406 | =head3 escape_char
|
---|
| 407 |
|
---|
| 408 | my $csv = Text::CSV->new ({ escape_char => "\\" });
|
---|
| 409 | $csv->escape_char (":");
|
---|
| 410 | my $c = $csv->escape_char;
|
---|
| 411 |
|
---|
| 412 | The character to escape certain characters inside quoted fields. This is
|
---|
| 413 | limited to a single-byte character, usually in the range from C<0x20>
|
---|
| 414 | (space) to C<0x7E> (tilde).
|
---|
| 415 |
|
---|
| 416 | The C<escape_char> defaults to being the double-quote mark (C<">). In other
|
---|
| 417 | words the same as the default L<C<quote_char>|/quote_char>. This means that
|
---|
| 418 | doubling the quote mark in a field escapes it:
|
---|
| 419 |
|
---|
| 420 | "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
|
---|
| 421 |
|
---|
| 422 | If you change the L<C<quote_char>|/quote_char> without changing the
|
---|
| 423 | C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
|
---|
| 424 | If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
|
---|
| 425 | it you will need to also change the C<escape_char> to be the same as what
|
---|
| 426 | you have changed the L<C<quote_char>|/quote_char> to.
|
---|
| 427 |
|
---|
| 428 | Setting C<escape_char> to <undef> or C<""> will disable escaping completely
|
---|
| 429 | and is greatly discouraged. This will also disable C<escape_null>.
|
---|
| 430 |
|
---|
| 431 | The escape character can not be equal to the separation character.
|
---|
| 432 |
|
---|
| 433 | =head3 binary
|
---|
| 434 |
|
---|
| 435 | my $csv = Text::CSV->new ({ binary => 1 });
|
---|
| 436 | $csv->binary (0);
|
---|
| 437 | my $f = $csv->binary;
|
---|
| 438 |
|
---|
| 439 | If this attribute is C<1>, you may use binary characters in quoted fields,
|
---|
| 440 | including line feeds, carriage returns and C<NULL> bytes. (The latter could
|
---|
| 441 | be escaped as C<"0>.) By default this feature is off.
|
---|
| 442 |
|
---|
| 443 | If a string is marked UTF8, C<binary> will be turned on automatically when
|
---|
| 444 | binary characters other than C<CR> and C<NL> are encountered. Note that a
|
---|
| 445 | simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
|
---|
| 446 | so setting C<< { binary => 1 } >> is still a wise option.
|
---|
| 447 |
|
---|
| 448 | =head3 strict
|
---|
| 449 |
|
---|
| 450 | my $csv = Text::CSV->new ({ strict => 1 });
|
---|
| 451 | $csv->strict (0);
|
---|
| 452 | my $f = $csv->strict;
|
---|
| 453 |
|
---|
| 454 | If this attribute is set to C<1>, any row that parses to a different number
|
---|
| 455 | of fields than the previous row will cause the parser to throw error 2014.
|
---|
| 456 |
|
---|
| 457 | =head3 formula_handling
|
---|
| 458 |
|
---|
| 459 | =head3 formula
|
---|
| 460 |
|
---|
| 461 | my $csv = Text::CSV->new ({ formula => "none" });
|
---|
| 462 | $csv->formula ("none");
|
---|
| 463 | my $f = $csv->formula;
|
---|
| 464 |
|
---|
| 465 | This defines the behavior of fields containing I<formulas>. As formulas are
|
---|
| 466 | considered dangerous in spreadsheets, this attribute can define an optional
|
---|
| 467 | action to be taken if a field starts with an equal sign (C<=>).
|
---|
| 468 |
|
---|
| 469 | For purpose of code-readability, this can also be written as
|
---|
| 470 |
|
---|
| 471 | my $csv = Text::CSV->new ({ formula_handling => "none" });
|
---|
| 472 | $csv->formula_handling ("none");
|
---|
| 473 | my $f = $csv->formula_handling;
|
---|
| 474 |
|
---|
| 475 | Possible values for this attribute are
|
---|
| 476 |
|
---|
| 477 | =over 2
|
---|
| 478 |
|
---|
| 479 | =item none
|
---|
| 480 |
|
---|
| 481 | Take no specific action. This is the default.
|
---|
| 482 |
|
---|
| 483 | $csv->formula ("none");
|
---|
| 484 |
|
---|
| 485 | =item die
|
---|
| 486 |
|
---|
| 487 | Cause the process to C<die> whenever a leading C<=> is encountered.
|
---|
| 488 |
|
---|
| 489 | $csv->formula ("die");
|
---|
| 490 |
|
---|
| 491 | =item croak
|
---|
| 492 |
|
---|
| 493 | Cause the process to C<croak> whenever a leading C<=> is encountered. (See
|
---|
| 494 | L<Carp>)
|
---|
| 495 |
|
---|
| 496 | $csv->formula ("croak");
|
---|
| 497 |
|
---|
| 498 | =item diag
|
---|
| 499 |
|
---|
| 500 | Report position and content of the field whenever a leading C<=> is found.
|
---|
| 501 | The value of the field is unchanged.
|
---|
| 502 |
|
---|
| 503 | $csv->formula ("diag");
|
---|
| 504 |
|
---|
| 505 | =item empty
|
---|
| 506 |
|
---|
| 507 | Replace the content of fields that start with a C<=> with the empty string.
|
---|
| 508 |
|
---|
| 509 | $csv->formula ("empty");
|
---|
| 510 | $csv->formula ("");
|
---|
| 511 |
|
---|
| 512 | =item undef
|
---|
| 513 |
|
---|
| 514 | Replace the content of fields that start with a C<=> with C<undef>.
|
---|
| 515 |
|
---|
| 516 | $csv->formula ("undef");
|
---|
| 517 | $csv->formula (undef);
|
---|
| 518 |
|
---|
| 519 | =back
|
---|
| 520 |
|
---|
| 521 | All other values will give a warning and then fallback to C<diag>.
|
---|
| 522 |
|
---|
| 523 | =head3 decode_utf8
|
---|
| 524 |
|
---|
| 525 | my $csv = Text::CSV->new ({ decode_utf8 => 1 });
|
---|
| 526 | $csv->decode_utf8 (0);
|
---|
| 527 | my $f = $csv->decode_utf8;
|
---|
| 528 |
|
---|
| 529 | This attributes defaults to TRUE.
|
---|
| 530 |
|
---|
| 531 | While I<parsing>, fields that are valid UTF-8, are automatically set to be
|
---|
| 532 | UTF-8, so that
|
---|
| 533 |
|
---|
| 534 | $csv->parse ("\xC4\xA8\n");
|
---|
| 535 |
|
---|
| 536 | results in
|
---|
| 537 |
|
---|
| 538 | PV("\304\250"\0) [UTF8 "\x{128}"]
|
---|
| 539 |
|
---|
| 540 | Sometimes it might not be a desired action. To prevent those upgrades, set
|
---|
| 541 | this attribute to false, and the result will be
|
---|
| 542 |
|
---|
| 543 | PV("\304\250"\0)
|
---|
| 544 |
|
---|
| 545 | =head3 auto_diag
|
---|
| 546 |
|
---|
| 547 | my $csv = Text::CSV->new ({ auto_diag => 1 });
|
---|
| 548 | $csv->auto_diag (2);
|
---|
| 549 | my $l = $csv->auto_diag;
|
---|
| 550 |
|
---|
| 551 | Set this attribute to a number between C<1> and C<9> causes L</error_diag>
|
---|
| 552 | to be automatically called in void context upon errors.
|
---|
| 553 |
|
---|
| 554 | In case of error C<2012 - EOF>, this call will be void.
|
---|
| 555 |
|
---|
| 556 | If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
|
---|
| 557 | on errors instead of C<warn>. If set to anything unrecognized, it will be
|
---|
| 558 | silently ignored.
|
---|
| 559 |
|
---|
| 560 | Future extensions to this feature will include more reliable auto-detection
|
---|
| 561 | of C<autodie> being active in the scope of which the error occurred which
|
---|
| 562 | will increment the value of C<auto_diag> with C<1> the moment the error is
|
---|
| 563 | detected.
|
---|
| 564 |
|
---|
| 565 | =head3 diag_verbose
|
---|
| 566 |
|
---|
| 567 | my $csv = Text::CSV->new ({ diag_verbose => 1 });
|
---|
| 568 | $csv->diag_verbose (2);
|
---|
| 569 | my $l = $csv->diag_verbose;
|
---|
| 570 |
|
---|
| 571 | Set the verbosity of the output triggered by C<auto_diag>. Currently only
|
---|
| 572 | adds the current input-record-number (if known) to the diagnostic output
|
---|
| 573 | with an indication of the position of the error.
|
---|
| 574 |
|
---|
| 575 | =head3 blank_is_undef
|
---|
| 576 |
|
---|
| 577 | my $csv = Text::CSV->new ({ blank_is_undef => 1 });
|
---|
| 578 | $csv->blank_is_undef (0);
|
---|
| 579 | my $f = $csv->blank_is_undef;
|
---|
| 580 |
|
---|
| 581 | Under normal circumstances, C<CSV> data makes no distinction between quoted-
|
---|
| 582 | and unquoted empty fields. These both end up in an empty string field once
|
---|
| 583 | read, thus
|
---|
| 584 |
|
---|
| 585 | 1,"",," ",2
|
---|
| 586 |
|
---|
| 587 | is read as
|
---|
| 588 |
|
---|
| 589 | ("1", "", "", " ", "2")
|
---|
| 590 |
|
---|
| 591 | When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote>
|
---|
| 592 | or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the
|
---|
| 593 | result of an undefined value. To enable this distinction when I<reading>
|
---|
| 594 | C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
|
---|
| 595 | fields to be set to C<undef>, causing the above to be parsed as
|
---|
| 596 |
|
---|
| 597 | ("1", "", undef, " ", "2")
|
---|
| 598 |
|
---|
| 599 | note that this is specifically important when loading C<CSV> fields into a
|
---|
| 600 | database that allows C<NULL> values, as the perl equivalent for C<NULL> is
|
---|
| 601 | C<undef> in L<DBI> land.
|
---|
| 602 |
|
---|
| 603 | =head3 empty_is_undef
|
---|
| 604 |
|
---|
| 605 | my $csv = Text::CSV->new ({ empty_is_undef => 1 });
|
---|
| 606 | $csv->empty_is_undef (0);
|
---|
| 607 | my $f = $csv->empty_is_undef;
|
---|
| 608 |
|
---|
| 609 | Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
|
---|
| 610 | attribute converts all empty fields to C<undef>, so
|
---|
| 611 |
|
---|
| 612 | 1,"",," ",2
|
---|
| 613 |
|
---|
| 614 | is read as
|
---|
| 615 |
|
---|
| 616 | (1, undef, undef, " ", 2)
|
---|
| 617 |
|
---|
| 618 | Note that this effects only fields that are originally empty, not fields
|
---|
| 619 | that are empty after stripping allowed whitespace. YMMV.
|
---|
| 620 |
|
---|
| 621 | =head3 allow_whitespace
|
---|
| 622 |
|
---|
| 623 | my $csv = Text::CSV->new ({ allow_whitespace => 1 });
|
---|
| 624 | $csv->allow_whitespace (0);
|
---|
| 625 | my $f = $csv->allow_whitespace;
|
---|
| 626 |
|
---|
| 627 | When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
|
---|
| 628 | surrounding the separation character is removed when parsing. If either
|
---|
| 629 | C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
|
---|
| 630 | L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
|
---|
| 631 | be considered whitespace.
|
---|
| 632 |
|
---|
| 633 | Now lines like:
|
---|
| 634 |
|
---|
| 635 | 1 , "foo" , bar , 3 , zapp
|
---|
| 636 |
|
---|
| 637 | are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
|
---|
| 638 |
|
---|
| 639 | Note that B<all> whitespace is stripped from both start and end of each
|
---|
| 640 | field. That would make it I<more> than a I<feature> to enable parsing bad
|
---|
| 641 | C<CSV> lines, as
|
---|
| 642 |
|
---|
| 643 | 1, 2.0, 3, ape , monkey
|
---|
| 644 |
|
---|
| 645 | will now be parsed as
|
---|
| 646 |
|
---|
| 647 | ("1", "2.0", "3", "ape", "monkey")
|
---|
| 648 |
|
---|
| 649 | even if the original line was perfectly acceptable C<CSV>.
|
---|
| 650 |
|
---|
| 651 | =head3 allow_loose_quotes
|
---|
| 652 |
|
---|
| 653 | my $csv = Text::CSV->new ({ allow_loose_quotes => 1 });
|
---|
| 654 | $csv->allow_loose_quotes (0);
|
---|
| 655 | my $f = $csv->allow_loose_quotes;
|
---|
| 656 |
|
---|
| 657 | By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
|
---|
| 658 | characters like
|
---|
| 659 |
|
---|
| 660 | 1,foo "bar" baz,42
|
---|
| 661 |
|
---|
| 662 | would result in parse error 2034. Though it is still bad practice to allow
|
---|
| 663 | this format, we cannot help the fact that some vendors make their
|
---|
| 664 | applications spit out lines styled this way.
|
---|
| 665 |
|
---|
| 666 | If there is B<really> bad C<CSV> data, like
|
---|
| 667 |
|
---|
| 668 | 1,"foo "bar" baz",42
|
---|
| 669 |
|
---|
| 670 | or
|
---|
| 671 |
|
---|
| 672 | 1,""foo bar baz"",42
|
---|
| 673 |
|
---|
| 674 | there is a way to get this data-line parsed and leave the quotes inside the
|
---|
| 675 | quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
|
---|
| 676 | B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
|
---|
| 677 | to L<C<quote_char>|/quote_char>.
|
---|
| 678 |
|
---|
| 679 | =head3 allow_loose_escapes
|
---|
| 680 |
|
---|
| 681 | my $csv = Text::CSV->new ({ allow_loose_escapes => 1 });
|
---|
| 682 | $csv->allow_loose_escapes (0);
|
---|
| 683 | my $f = $csv->allow_loose_escapes;
|
---|
| 684 |
|
---|
| 685 | Parsing fields that have L<C<escape_char>|/escape_char> characters that
|
---|
| 686 | escape characters that do not need to be escaped, like:
|
---|
| 687 |
|
---|
| 688 | my $csv = Text::CSV->new ({ escape_char => "\\" });
|
---|
| 689 | $csv->parse (qq{1,"my bar\'s",baz,42});
|
---|
| 690 |
|
---|
| 691 | would result in parse error 2025. Though it is bad practice to allow this
|
---|
| 692 | format, this attribute enables you to treat all escape character sequences
|
---|
| 693 | equal.
|
---|
| 694 |
|
---|
| 695 | =head3 allow_unquoted_escape
|
---|
| 696 |
|
---|
| 697 | my $csv = Text::CSV->new ({ allow_unquoted_escape => 1 });
|
---|
| 698 | $csv->allow_unquoted_escape (0);
|
---|
| 699 | my $f = $csv->allow_unquoted_escape;
|
---|
| 700 |
|
---|
| 701 | A backward compatibility issue where L<C<escape_char>|/escape_char> differs
|
---|
| 702 | from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
|
---|
| 703 | to be in the first position of a field. If L<C<quote_char>|/quote_char> is
|
---|
| 704 | equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
|
---|
| 705 | this would be illegal:
|
---|
| 706 |
|
---|
| 707 | 1,\0,2
|
---|
| 708 |
|
---|
| 709 | Setting this attribute to C<1> might help to overcome issues with backward
|
---|
| 710 | compatibility and allow this style.
|
---|
| 711 |
|
---|
| 712 | =head3 always_quote
|
---|
| 713 |
|
---|
| 714 | my $csv = Text::CSV->new ({ always_quote => 1 });
|
---|
| 715 | $csv->always_quote (0);
|
---|
| 716 | my $f = $csv->always_quote;
|
---|
| 717 |
|
---|
| 718 | By default the generated fields are quoted only if they I<need> to be. For
|
---|
| 719 | example, if they contain the separator character. If you set this attribute
|
---|
| 720 | to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
|
---|
| 721 | quoted, see L</blank_is_undef>). This makes it quite often easier to handle
|
---|
| 722 | exported data in external applications.
|
---|
| 723 |
|
---|
| 724 | =head3 quote_space
|
---|
| 725 |
|
---|
| 726 | my $csv = Text::CSV->new ({ quote_space => 1 });
|
---|
| 727 | $csv->quote_space (0);
|
---|
| 728 | my $f = $csv->quote_space;
|
---|
| 729 |
|
---|
| 730 | By default, a space in a field would trigger quotation. As no rule exists
|
---|
| 731 | this to be forced in C<CSV>, nor any for the opposite, the default is true
|
---|
| 732 | for safety. You can exclude the space from this trigger by setting this
|
---|
| 733 | attribute to 0.
|
---|
| 734 |
|
---|
| 735 | =head3 quote_empty
|
---|
| 736 |
|
---|
| 737 | my $csv = Text::CSV->new ({ quote_empty => 1 });
|
---|
| 738 | $csv->quote_empty (0);
|
---|
| 739 | my $f = $csv->quote_empty;
|
---|
| 740 |
|
---|
| 741 | By default the generated fields are quoted only if they I<need> to be. An
|
---|
| 742 | empty (defined) field does not need quotation. If you set this attribute to
|
---|
| 743 | C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
|
---|
| 744 | quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
|
---|
| 745 |
|
---|
| 746 | =head3 quote_binary
|
---|
| 747 |
|
---|
| 748 | my $csv = Text::CSV->new ({ quote_binary => 1 });
|
---|
| 749 | $csv->quote_binary (0);
|
---|
| 750 | my $f = $csv->quote_binary;
|
---|
| 751 |
|
---|
| 752 | By default, all "unsafe" bytes inside a string cause the combined field to
|
---|
| 753 | be quoted. By setting this attribute to C<0>, you can disable that trigger
|
---|
| 754 | for bytes >= C<0x7F>.
|
---|
| 755 |
|
---|
| 756 | =head3 escape_null
|
---|
| 757 |
|
---|
| 758 | my $csv = Text::CSV->new ({ escape_null => 1 });
|
---|
| 759 | $csv->escape_null (0);
|
---|
| 760 | my $f = $csv->escape_null;
|
---|
| 761 |
|
---|
| 762 | By default, a C<NULL> byte in a field would be escaped. This option enables
|
---|
| 763 | you to treat the C<NULL> byte as a simple binary character in binary mode
|
---|
| 764 | (the C<< { binary => 1 } >> is set). The default is true. You can prevent
|
---|
| 765 | C<NULL> escapes by setting this attribute to C<0>.
|
---|
| 766 |
|
---|
| 767 | When the C<escape_char> attribute is set to undefined, this attribute will
|
---|
| 768 | be set to false.
|
---|
| 769 |
|
---|
| 770 | The default setting will encode "=\x00=" as
|
---|
| 771 |
|
---|
| 772 | "="0="
|
---|
| 773 |
|
---|
| 774 | With C<escape_null> set, this will result in
|
---|
| 775 |
|
---|
| 776 | "=\x00="
|
---|
| 777 |
|
---|
| 778 | The default when using the C<csv> function is C<false>.
|
---|
| 779 |
|
---|
| 780 | For backward compatibility reasons, the deprecated old name C<quote_null>
|
---|
| 781 | is still recognized.
|
---|
| 782 |
|
---|
| 783 | =head3 keep_meta_info
|
---|
| 784 |
|
---|
| 785 | my $csv = Text::CSV->new ({ keep_meta_info => 1 });
|
---|
| 786 | $csv->keep_meta_info (0);
|
---|
| 787 | my $f = $csv->keep_meta_info;
|
---|
| 788 |
|
---|
| 789 | By default, the parsing of input records is as simple and fast as possible.
|
---|
| 790 | However, some parsing information - like quotation of the original field -
|
---|
| 791 | is lost in that process. Setting this flag to true enables retrieving that
|
---|
| 792 | information after parsing with the methods L</meta_info>, L</is_quoted>,
|
---|
| 793 | and L</is_binary> described below. Default is false for performance.
|
---|
| 794 |
|
---|
| 795 | If you set this attribute to a value greater than 9, than you can control
|
---|
| 796 | output quotation style like it was used in the input of the the last parsed
|
---|
| 797 | record (unless quotation was added because of other reasons).
|
---|
| 798 |
|
---|
| 799 | my $csv = Text::CSV->new ({
|
---|
| 800 | binary => 1,
|
---|
| 801 | keep_meta_info => 1,
|
---|
| 802 | quote_space => 0,
|
---|
| 803 | });
|
---|
| 804 |
|
---|
| 805 | my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
|
---|
| 806 |
|
---|
| 807 | $csv->print (*STDOUT, \@row);
|
---|
| 808 | # 1,,, , ,f,g,"h""h",help,help
|
---|
| 809 | $csv->keep_meta_info (11);
|
---|
| 810 | $csv->print (*STDOUT, \@row);
|
---|
| 811 | # 1,,"", ," ",f,"g","h""h",help,"help"
|
---|
| 812 |
|
---|
| 813 | =head3 undef_str
|
---|
| 814 |
|
---|
| 815 | my $csv = Text::CSV->new ({ undef_str => "\\N" });
|
---|
| 816 | $csv->undef_str (undef);
|
---|
| 817 | my $s = $csv->undef_str;
|
---|
| 818 |
|
---|
| 819 | This attribute optionally defines the output of undefined fields. The value
|
---|
| 820 | passed is not changed at all, so if it needs quotation, the quotation needs
|
---|
| 821 | to be included in the value of the attribute. Use with caution, as passing
|
---|
| 822 | a value like C<",",,,,"""> will for sure mess up your output. The default
|
---|
| 823 | for this attribute is C<undef>, meaning no special treatment.
|
---|
| 824 |
|
---|
| 825 | This attribute is useful when exporting CSV data to be imported in custom
|
---|
| 826 | loaders, like for MySQL, that recognize special sequences for C<NULL> data.
|
---|
| 827 |
|
---|
| 828 | This attribute has no meaning when parsing CSV data.
|
---|
| 829 |
|
---|
| 830 | =head3 verbatim
|
---|
| 831 |
|
---|
| 832 | my $csv = Text::CSV->new ({ verbatim => 1 });
|
---|
| 833 | $csv->verbatim (0);
|
---|
| 834 | my $f = $csv->verbatim;
|
---|
| 835 |
|
---|
| 836 | This is a quite controversial attribute to set, but makes some hard things
|
---|
| 837 | possible.
|
---|
| 838 |
|
---|
| 839 | The rationale behind this attribute is to tell the parser that the normally
|
---|
| 840 | special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
|
---|
| 841 | special when this flag is set, and be dealt with as being ordinary binary
|
---|
| 842 | characters. This will ease working with data with embedded newlines.
|
---|
| 843 |
|
---|
| 844 | When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
|
---|
| 845 | every line.
|
---|
| 846 |
|
---|
| 847 | Imagine a file format like
|
---|
| 848 |
|
---|
| 849 | M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
|
---|
| 850 |
|
---|
| 851 | where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
|
---|
| 852 | C<^> (caret). None of the fields is quoted, but embedded binary data is
|
---|
| 853 | likely to be present. With the specific line ending, this should not be too
|
---|
| 854 | hard to detect.
|
---|
| 855 |
|
---|
| 856 | By default, Text::CSV' parse function is instructed to only know about
|
---|
| 857 | C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
|
---|
| 858 | embedded newline as a real C<end-of-line>, so it can scan the next line if
|
---|
| 859 | binary is true, and the newline is inside a quoted field. With this option,
|
---|
| 860 | we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
|
---|
| 861 | a binary character.
|
---|
| 862 |
|
---|
| 863 | For L</parse> this means that the parser has no more idea about line ending
|
---|
| 864 | and L</getline> C<chomp>s line endings on reading.
|
---|
| 865 |
|
---|
| 866 | =head3 types
|
---|
| 867 |
|
---|
| 868 | A set of column types; the attribute is immediately passed to the L</types>
|
---|
| 869 | method.
|
---|
| 870 |
|
---|
| 871 | =head3 callbacks
|
---|
| 872 |
|
---|
| 873 | See the L</Callbacks> section below.
|
---|
| 874 |
|
---|
| 875 | =head3 accessors
|
---|
| 876 |
|
---|
| 877 | To sum it up,
|
---|
| 878 |
|
---|
| 879 | $csv = Text::CSV->new ();
|
---|
| 880 |
|
---|
| 881 | is equivalent to
|
---|
| 882 |
|
---|
| 883 | $csv = Text::CSV->new ({
|
---|
| 884 | eol => undef, # \r, \n, or \r\n
|
---|
| 885 | sep_char => ',',
|
---|
| 886 | sep => undef,
|
---|
| 887 | quote_char => '"',
|
---|
| 888 | quote => undef,
|
---|
| 889 | escape_char => '"',
|
---|
| 890 | binary => 0,
|
---|
| 891 | decode_utf8 => 1,
|
---|
| 892 | auto_diag => 0,
|
---|
| 893 | diag_verbose => 0,
|
---|
| 894 | blank_is_undef => 0,
|
---|
| 895 | empty_is_undef => 0,
|
---|
| 896 | allow_whitespace => 0,
|
---|
| 897 | allow_loose_quotes => 0,
|
---|
| 898 | allow_loose_escapes => 0,
|
---|
| 899 | allow_unquoted_escape => 0,
|
---|
| 900 | always_quote => 0,
|
---|
| 901 | quote_empty => 0,
|
---|
| 902 | quote_space => 1,
|
---|
| 903 | escape_null => 1,
|
---|
| 904 | quote_binary => 1,
|
---|
| 905 | keep_meta_info => 0,
|
---|
| 906 | strict => 0,
|
---|
| 907 | formula => 0,
|
---|
| 908 | verbatim => 0,
|
---|
| 909 | undef_str => undef,
|
---|
| 910 | types => undef,
|
---|
| 911 | callbacks => undef,
|
---|
| 912 | });
|
---|
| 913 |
|
---|
| 914 | For all of the above mentioned flags, an accessor method is available where
|
---|
| 915 | you can inquire the current value, or change the value
|
---|
| 916 |
|
---|
| 917 | my $quote = $csv->quote_char;
|
---|
| 918 | $csv->binary (1);
|
---|
| 919 |
|
---|
| 920 | It is not wise to change these settings halfway through writing C<CSV> data
|
---|
| 921 | to a stream. If however you want to create a new stream using the available
|
---|
| 922 | C<CSV> object, there is no harm in changing them.
|
---|
| 923 |
|
---|
| 924 | If the L</new> constructor call fails, it returns C<undef>, and makes the
|
---|
| 925 | fail reason available through the L</error_diag> method.
|
---|
| 926 |
|
---|
| 927 | $csv = Text::CSV->new ({ ecs_char => 1 }) or
|
---|
| 928 | die "".Text::CSV->error_diag ();
|
---|
| 929 |
|
---|
| 930 | L</error_diag> will return a string like
|
---|
| 931 |
|
---|
| 932 | "INI - Unknown attribute 'ecs_char'"
|
---|
| 933 |
|
---|
| 934 | =head2 known_attributes
|
---|
| 935 |
|
---|
| 936 | @attr = Text::CSV->known_attributes;
|
---|
| 937 | @attr = Text::CSV::known_attributes;
|
---|
| 938 | @attr = $csv->known_attributes;
|
---|
| 939 |
|
---|
| 940 | This method will return an ordered list of all the supported attributes as
|
---|
| 941 | described above. This can be useful for knowing what attributes are valid
|
---|
| 942 | in classes that use or extend Text::CSV.
|
---|
| 943 |
|
---|
| 944 | =head2 print
|
---|
| 945 |
|
---|
| 946 | $status = $csv->print ($fh, $colref);
|
---|
| 947 |
|
---|
| 948 | Similar to L</combine> + L</string> + L</print>, but much more efficient.
|
---|
| 949 | It expects an array ref as input (not an array!) and the resulting string
|
---|
| 950 | is not really created, but immediately written to the C<$fh> object,
|
---|
| 951 | typically an IO handle or any other object that offers a L</print> method.
|
---|
| 952 |
|
---|
| 953 | For performance reasons C<print> does not create a result string, so all
|
---|
| 954 | L</string>, L</status>, L</fields>, and L</error_input> methods will return
|
---|
| 955 | undefined information after executing this method.
|
---|
| 956 |
|
---|
| 957 | If C<$colref> is C<undef> (explicit, not through a variable argument) and
|
---|
| 958 | L</bind_columns> was used to specify fields to be printed, it is possible
|
---|
| 959 | to make performance improvements, as otherwise data would have to be copied
|
---|
| 960 | as arguments to the method call:
|
---|
| 961 |
|
---|
| 962 | $csv->bind_columns (\($foo, $bar));
|
---|
| 963 | $status = $csv->print ($fh, undef);
|
---|
| 964 |
|
---|
| 965 | A short benchmark
|
---|
| 966 |
|
---|
| 967 | my @data = ("aa" .. "zz");
|
---|
| 968 | $csv->bind_columns (\(@data));
|
---|
| 969 |
|
---|
| 970 | $csv->print ($fh, [ @data ]); # 11800 recs/sec
|
---|
| 971 | $csv->print ($fh, \@data ); # 57600 recs/sec
|
---|
| 972 | $csv->print ($fh, undef ); # 48500 recs/sec
|
---|
| 973 |
|
---|
| 974 | =head2 say
|
---|
| 975 |
|
---|
| 976 | $status = $csv->say ($fh, $colref);
|
---|
| 977 |
|
---|
| 978 | Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
|
---|
| 979 |
|
---|
| 980 | =head2 print_hr
|
---|
| 981 |
|
---|
| 982 | $csv->print_hr ($fh, $ref);
|
---|
| 983 |
|
---|
| 984 | Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
|
---|
| 985 | provided the column names are set with L</column_names>.
|
---|
| 986 |
|
---|
| 987 | It is just a wrapper method with basic parameter checks over
|
---|
| 988 |
|
---|
| 989 | $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
|
---|
| 990 |
|
---|
| 991 | =head2 combine
|
---|
| 992 |
|
---|
| 993 | $status = $csv->combine (@fields);
|
---|
| 994 |
|
---|
| 995 | This method constructs a C<CSV> record from C<@fields>, returning success
|
---|
| 996 | or failure. Failure can result from lack of arguments or an argument that
|
---|
| 997 | contains an invalid character. Upon success, L</string> can be called to
|
---|
| 998 | retrieve the resultant C<CSV> string. Upon failure, the value returned by
|
---|
| 999 | L</string> is undefined and L</error_input> could be called to retrieve the
|
---|
| 1000 | invalid argument.
|
---|
| 1001 |
|
---|
| 1002 | =head2 string
|
---|
| 1003 |
|
---|
| 1004 | $line = $csv->string ();
|
---|
| 1005 |
|
---|
| 1006 | This method returns the input to L</parse> or the resultant C<CSV> string
|
---|
| 1007 | of L</combine>, whichever was called more recently.
|
---|
| 1008 |
|
---|
| 1009 | =head2 getline
|
---|
| 1010 |
|
---|
| 1011 | $colref = $csv->getline ($fh);
|
---|
| 1012 |
|
---|
| 1013 | This is the counterpart to L</print>, as L</parse> is the counterpart to
|
---|
| 1014 | L</combine>: it parses a row from the C<$fh> handle using the L</getline>
|
---|
| 1015 | method associated with C<$fh> and parses this row into an array ref. This
|
---|
| 1016 | array ref is returned by the function or C<undef> for failure. When C<$fh>
|
---|
| 1017 | does not support C<getline>, you are likely to hit errors.
|
---|
| 1018 |
|
---|
| 1019 | When fields are bound with L</bind_columns> the return value is a reference
|
---|
| 1020 | to an empty list.
|
---|
| 1021 |
|
---|
| 1022 | The L</string>, L</fields>, and L</status> methods are meaningless again.
|
---|
| 1023 |
|
---|
| 1024 | =head2 getline_all
|
---|
| 1025 |
|
---|
| 1026 | $arrayref = $csv->getline_all ($fh);
|
---|
| 1027 | $arrayref = $csv->getline_all ($fh, $offset);
|
---|
| 1028 | $arrayref = $csv->getline_all ($fh, $offset, $length);
|
---|
| 1029 |
|
---|
| 1030 | This will return a reference to a list of L<getline ($fh)|/getline> results.
|
---|
| 1031 | In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
|
---|
| 1032 | with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
|
---|
| 1033 | into consideration.
|
---|
| 1034 |
|
---|
| 1035 | Given a CSV file with 10 lines:
|
---|
| 1036 |
|
---|
| 1037 | lines call
|
---|
| 1038 | ----- ---------------------------------------------------------
|
---|
| 1039 | 0..9 $csv->getline_all ($fh) # all
|
---|
| 1040 | 0..9 $csv->getline_all ($fh, 0) # all
|
---|
| 1041 | 8..9 $csv->getline_all ($fh, 8) # start at 8
|
---|
| 1042 | - $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
|
---|
| 1043 | 0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
|
---|
| 1044 | 4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
|
---|
| 1045 | 8..9 $csv->getline_all ($fh, -2) # last 2 rows
|
---|
| 1046 | 6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
|
---|
| 1047 |
|
---|
| 1048 | =head2 getline_hr
|
---|
| 1049 |
|
---|
| 1050 | The L</getline_hr> and L</column_names> methods work together to allow you
|
---|
| 1051 | to have rows returned as hashrefs. You must call L</column_names> first to
|
---|
| 1052 | declare your column names.
|
---|
| 1053 |
|
---|
| 1054 | $csv->column_names (qw( code name price description ));
|
---|
| 1055 | $hr = $csv->getline_hr ($fh);
|
---|
| 1056 | print "Price for $hr->{name} is $hr->{price} EUR\n";
|
---|
| 1057 |
|
---|
| 1058 | L</getline_hr> will croak if called before L</column_names>.
|
---|
| 1059 |
|
---|
| 1060 | Note that L</getline_hr> creates a hashref for every row and will be much
|
---|
| 1061 | slower than the combined use of L</bind_columns> and L</getline> but still
|
---|
| 1062 | offering the same ease of use hashref inside the loop:
|
---|
| 1063 |
|
---|
| 1064 | my @cols = @{$csv->getline ($fh)};
|
---|
| 1065 | $csv->column_names (@cols);
|
---|
| 1066 | while (my $row = $csv->getline_hr ($fh)) {
|
---|
| 1067 | print $row->{price};
|
---|
| 1068 | }
|
---|
| 1069 |
|
---|
| 1070 | Could easily be rewritten to the much faster:
|
---|
| 1071 |
|
---|
| 1072 | my @cols = @{$csv->getline ($fh)};
|
---|
| 1073 | my $row = {};
|
---|
| 1074 | $csv->bind_columns (\@{$row}{@cols});
|
---|
| 1075 | while ($csv->getline ($fh)) {
|
---|
| 1076 | print $row->{price};
|
---|
| 1077 | }
|
---|
| 1078 |
|
---|
| 1079 | Your mileage may vary for the size of the data and the number of rows. With
|
---|
| 1080 | perl-5.14.2 the comparison for a 100_000 line file with 14 rows:
|
---|
| 1081 |
|
---|
| 1082 | Rate hashrefs getlines
|
---|
| 1083 | hashrefs 1.00/s -- -76%
|
---|
| 1084 | getlines 4.15/s 313% --
|
---|
| 1085 |
|
---|
| 1086 | =head2 getline_hr_all
|
---|
| 1087 |
|
---|
| 1088 | $arrayref = $csv->getline_hr_all ($fh);
|
---|
| 1089 | $arrayref = $csv->getline_hr_all ($fh, $offset);
|
---|
| 1090 | $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
|
---|
| 1091 |
|
---|
| 1092 | This will return a reference to a list of L<getline_hr ($fh)|/getline_hr>
|
---|
| 1093 | results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
|
---|
| 1094 |
|
---|
| 1095 | =head2 parse
|
---|
| 1096 |
|
---|
| 1097 | $status = $csv->parse ($line);
|
---|
| 1098 |
|
---|
| 1099 | This method decomposes a C<CSV> string into fields, returning success or
|
---|
| 1100 | failure. Failure can result from a lack of argument or the given C<CSV>
|
---|
| 1101 | string is improperly formatted. Upon success, L</fields> can be called to
|
---|
| 1102 | retrieve the decomposed fields. Upon failure calling L</fields> will return
|
---|
| 1103 | undefined data and L</error_input> can be called to retrieve the invalid
|
---|
| 1104 | argument.
|
---|
| 1105 |
|
---|
| 1106 | You may use the L</types> method for setting column types. See L</types>'
|
---|
| 1107 | description below.
|
---|
| 1108 |
|
---|
| 1109 | The C<$line> argument is supposed to be a simple scalar. Everything else is
|
---|
| 1110 | supposed to croak and set error 1500.
|
---|
| 1111 |
|
---|
| 1112 | =head2 fragment
|
---|
| 1113 |
|
---|
| 1114 | This function tries to implement RFC7111 (URI Fragment Identifiers for the
|
---|
| 1115 | text/csv Media Type) - http://tools.ietf.org/html/rfc7111
|
---|
| 1116 |
|
---|
| 1117 | my $AoA = $csv->fragment ($fh, $spec);
|
---|
| 1118 |
|
---|
| 1119 | In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
|
---|
| 1120 | to indicate a range. All indices are C<1>-based: the first row or column
|
---|
| 1121 | has index C<1>. Selections can be combined with the semi-colon (C<;>).
|
---|
| 1122 |
|
---|
| 1123 | When using this method in combination with L</column_names>, the returned
|
---|
| 1124 | reference will point to a list of hashes instead of a list of lists. A
|
---|
| 1125 | disjointed cell-based combined selection might return rows with different
|
---|
| 1126 | number of columns making the use of hashes unpredictable.
|
---|
| 1127 |
|
---|
| 1128 | $csv->column_names ("Name", "Age");
|
---|
| 1129 | my $AoH = $csv->fragment ($fh, "col=3;8");
|
---|
| 1130 |
|
---|
| 1131 | If the L</after_parse> callback is active, it is also called on every line
|
---|
| 1132 | parsed and skipped before the fragment.
|
---|
| 1133 |
|
---|
| 1134 | =over 2
|
---|
| 1135 |
|
---|
| 1136 | =item row
|
---|
| 1137 |
|
---|
| 1138 | row=4
|
---|
| 1139 | row=5-7
|
---|
| 1140 | row=6-*
|
---|
| 1141 | row=1-2;4;6-*
|
---|
| 1142 |
|
---|
| 1143 | =item col
|
---|
| 1144 |
|
---|
| 1145 | col=2
|
---|
| 1146 | col=1-3
|
---|
| 1147 | col=4-*
|
---|
| 1148 | col=1-2;4;7-*
|
---|
| 1149 |
|
---|
| 1150 | =item cell
|
---|
| 1151 |
|
---|
| 1152 | In cell-based selection, the comma (C<,>) is used to pair row and column
|
---|
| 1153 |
|
---|
| 1154 | cell=4,1
|
---|
| 1155 |
|
---|
| 1156 | The range operator (C<->) using C<cell>s can be used to define top-left and
|
---|
| 1157 | bottom-right C<cell> location
|
---|
| 1158 |
|
---|
| 1159 | cell=3,1-4,6
|
---|
| 1160 |
|
---|
| 1161 | The C<*> is only allowed in the second part of a pair
|
---|
| 1162 |
|
---|
| 1163 | cell=3,2-*,2 # row 3 till end, only column 2
|
---|
| 1164 | cell=3,2-3,* # column 2 till end, only row 3
|
---|
| 1165 | cell=3,2-*,* # strip row 1 and 2, and column 1
|
---|
| 1166 |
|
---|
| 1167 | Cells and cell ranges may be combined with C<;>, possibly resulting in rows
|
---|
| 1168 | with different number of columns
|
---|
| 1169 |
|
---|
| 1170 | cell=1,1-2,2;3,3-4,4;1,4;4,1
|
---|
| 1171 |
|
---|
| 1172 | Disjointed selections will only return selected cells. The cells that are
|
---|
| 1173 | not specified will not be included in the returned set, not even as
|
---|
| 1174 | C<undef>. As an example given a C<CSV> like
|
---|
| 1175 |
|
---|
| 1176 | 11,12,13,...19
|
---|
| 1177 | 21,22,...28,29
|
---|
| 1178 | : :
|
---|
| 1179 | 91,...97,98,99
|
---|
| 1180 |
|
---|
| 1181 | with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
|
---|
| 1182 |
|
---|
| 1183 | 11,12,14
|
---|
| 1184 | 21,22
|
---|
| 1185 | 33,34
|
---|
| 1186 | 41,43,44
|
---|
| 1187 |
|
---|
| 1188 | Overlapping cell-specs will return those cells only once, So
|
---|
| 1189 | C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
|
---|
| 1190 |
|
---|
| 1191 | 11,12,13
|
---|
| 1192 | 21,22,23,24
|
---|
| 1193 | 31,32,33,34
|
---|
| 1194 | 42,43,44
|
---|
| 1195 |
|
---|
| 1196 | =back
|
---|
| 1197 |
|
---|
| 1198 | L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different
|
---|
| 1199 | types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
|
---|
| 1200 | Passing an invalid fragment specification will croak and set error 2013.
|
---|
| 1201 |
|
---|
| 1202 | =head2 column_names
|
---|
| 1203 |
|
---|
| 1204 | Set the "keys" that will be used in the L</getline_hr> calls. If no keys
|
---|
| 1205 | (column names) are passed, it will return the current setting as a list.
|
---|
| 1206 |
|
---|
| 1207 | L</column_names> accepts a list of scalars (the column names) or a single
|
---|
| 1208 | array_ref, so you can pass the return value from L</getline> too:
|
---|
| 1209 |
|
---|
| 1210 | $csv->column_names ($csv->getline ($fh));
|
---|
| 1211 |
|
---|
| 1212 | L</column_names> does B<no> checking on duplicates at all, which might lead
|
---|
| 1213 | to unexpected results. Undefined entries will be replaced with the string
|
---|
| 1214 | C<"\cAUNDEF\cA">, so
|
---|
| 1215 |
|
---|
| 1216 | $csv->column_names (undef, "", "name", "name");
|
---|
| 1217 | $hr = $csv->getline_hr ($fh);
|
---|
| 1218 |
|
---|
| 1219 | Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
|
---|
| 1220 | the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
|
---|
| 1221 | field.
|
---|
| 1222 |
|
---|
| 1223 | L</column_names> croaks on invalid arguments.
|
---|
| 1224 |
|
---|
| 1225 | =head2 header
|
---|
| 1226 |
|
---|
| 1227 | This method does NOT work in perl-5.6.x
|
---|
| 1228 |
|
---|
| 1229 | Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
|
---|
| 1230 |
|
---|
| 1231 | my @hdr = $csv->header ($fh);
|
---|
| 1232 | $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
|
---|
| 1233 | $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
|
---|
| 1234 |
|
---|
| 1235 | The first argument should be a file handle.
|
---|
| 1236 |
|
---|
| 1237 | This method resets some object properties, as it is supposed to be invoked
|
---|
| 1238 | only once per file or stream. It will leave attributes C<column_names> and
|
---|
| 1239 | C<bound_columns> alone of setting column names is disabled. Reading headers
|
---|
| 1240 | on previously process objects might fail on perl-5.8.0 and older.
|
---|
| 1241 |
|
---|
| 1242 | Assuming that the file opened for parsing has a header, and the header does
|
---|
| 1243 | not contain problematic characters like embedded newlines, read the first
|
---|
| 1244 | line from the open handle then auto-detect whether the header separates the
|
---|
| 1245 | column names with a character from the allowed separator list.
|
---|
| 1246 |
|
---|
| 1247 | If any of the allowed separators matches, and none of the I<other> allowed
|
---|
| 1248 | separators match, set L<C<sep>|/sep> to that separator for the current
|
---|
| 1249 | CSV instance and use it to parse the first line, map those to lowercase,
|
---|
| 1250 | and use that to set the instance L</column_names>:
|
---|
| 1251 |
|
---|
| 1252 | my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
|
---|
| 1253 | open my $fh, "<", "file.csv";
|
---|
| 1254 | binmode $fh; # for Windows
|
---|
| 1255 | $csv->header ($fh);
|
---|
| 1256 | while (my $row = $csv->getline_hr ($fh)) {
|
---|
| 1257 | ...
|
---|
| 1258 | }
|
---|
| 1259 |
|
---|
| 1260 | If the header is empty, contains more than one unique separator out of the
|
---|
| 1261 | allowed set, contains empty fields, or contains identical fields (after
|
---|
| 1262 | folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
|
---|
| 1263 |
|
---|
| 1264 | If the header contains embedded newlines or is not valid CSV in any other
|
---|
| 1265 | way, this method will croak and leave the parse error untouched.
|
---|
| 1266 |
|
---|
| 1267 | A successful call to C<header> will always set the L<C<sep>|/sep> of the
|
---|
| 1268 | C<$csv> object. This behavior can not be disabled.
|
---|
| 1269 |
|
---|
| 1270 | =head3 return value
|
---|
| 1271 |
|
---|
| 1272 | On error this method will croak.
|
---|
| 1273 |
|
---|
| 1274 | In list context, the headers will be returned whether they are used to set
|
---|
| 1275 | L</column_names> or not.
|
---|
| 1276 |
|
---|
| 1277 | In scalar context, the instance itself is returned. B<Note>: the values as
|
---|
| 1278 | found in the header will effectively be B<lost> if C<set_column_names> is
|
---|
| 1279 | false.
|
---|
| 1280 |
|
---|
| 1281 | =head3 Options
|
---|
| 1282 |
|
---|
| 1283 | =over 2
|
---|
| 1284 |
|
---|
| 1285 | =item sep_set
|
---|
| 1286 |
|
---|
| 1287 | $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
|
---|
| 1288 |
|
---|
| 1289 | The list of legal separators defaults to C<[ ";", "," ]> and can be changed
|
---|
| 1290 | by this option. As this is probably the most often used option, it can be
|
---|
| 1291 | passed on its own as an unnamed argument:
|
---|
| 1292 |
|
---|
| 1293 | $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
|
---|
| 1294 |
|
---|
| 1295 | Multi-byte sequences are allowed, both multi-character and Unicode. See
|
---|
| 1296 | L<C<sep>|/sep>.
|
---|
| 1297 |
|
---|
| 1298 | =item detect_bom
|
---|
| 1299 |
|
---|
| 1300 | $csv->header ($fh, { detect_bom => 1 });
|
---|
| 1301 |
|
---|
| 1302 | The default behavior is to detect if the header line starts with a BOM. If
|
---|
| 1303 | the header has a BOM, use that to set the encoding of C<$fh>. This default
|
---|
| 1304 | behavior can be disabled by passing a false value to C<detect_bom>.
|
---|
| 1305 |
|
---|
| 1306 | Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
|
---|
| 1307 | UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
|
---|
| 1308 | but L<Encode> does not (yet). UTF-7 is not supported.
|
---|
| 1309 |
|
---|
| 1310 | If a supported BOM was detected as start of the stream, it is stored in the
|
---|
| 1311 | abject attribute C<ENCODING>.
|
---|
| 1312 |
|
---|
| 1313 | my $enc = $csv->{ENCODING};
|
---|
| 1314 |
|
---|
| 1315 | The encoding is used with C<binmode> on C<$fh>.
|
---|
| 1316 |
|
---|
| 1317 | If the handle was opened in a (correct) encoding, this method will B<not>
|
---|
| 1318 | alter the encoding, as it checks the leading B<bytes> of the first line. In
|
---|
| 1319 | case the stream starts with a decode BOM (C<U+FEFF>), C<{ENCODING}> will be
|
---|
| 1320 | C<""> (empty) instead of the default C<undef>.
|
---|
| 1321 |
|
---|
| 1322 | =item munge_column_names
|
---|
| 1323 |
|
---|
| 1324 | This option offers the means to modify the column names into something that
|
---|
| 1325 | is most useful to the application. The default is to map all column names
|
---|
| 1326 | to lower case.
|
---|
| 1327 |
|
---|
| 1328 | $csv->header ($fh, { munge_column_names => "lc" });
|
---|
| 1329 |
|
---|
| 1330 | The following values are available:
|
---|
| 1331 |
|
---|
| 1332 | lc - lower case
|
---|
| 1333 | uc - upper case
|
---|
| 1334 | none - do not change
|
---|
| 1335 | \%hash - supply a mapping
|
---|
| 1336 | \&cb - supply a callback
|
---|
| 1337 |
|
---|
| 1338 | Literal:
|
---|
| 1339 |
|
---|
| 1340 | $csv->header ($fh, { munge_column_names => "none" });
|
---|
| 1341 |
|
---|
| 1342 | Hash:
|
---|
| 1343 |
|
---|
| 1344 | $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
|
---|
| 1345 |
|
---|
| 1346 | if a value does not exist, the original value is used unchanged
|
---|
| 1347 |
|
---|
| 1348 | Callback:
|
---|
| 1349 |
|
---|
| 1350 | $csv->header ($fh, { munge_column_names => sub { fc } });
|
---|
| 1351 | $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
|
---|
| 1352 | $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
|
---|
| 1353 |
|
---|
| 1354 | As this callback is called in a C<map>, you can use C<$_> directly.
|
---|
| 1355 |
|
---|
| 1356 | =item set_column_names
|
---|
| 1357 |
|
---|
| 1358 | $csv->header ($fh, { set_column_names => 1 });
|
---|
| 1359 |
|
---|
| 1360 | The default is to set the instances column names using L</column_names> if
|
---|
| 1361 | the method is successful, so subsequent calls to L</getline_hr> can return
|
---|
| 1362 | a hash. Disable setting the header can be forced by using a false value for
|
---|
| 1363 | this option.
|
---|
| 1364 |
|
---|
| 1365 | As described in L</return value> above, content is lost in scalar context.
|
---|
| 1366 |
|
---|
| 1367 | =back
|
---|
| 1368 |
|
---|
| 1369 | =head3 Validation
|
---|
| 1370 |
|
---|
| 1371 | When receiving CSV files from external sources, this method can be used to
|
---|
| 1372 | protect against changes in the layout by restricting to known headers (and
|
---|
| 1373 | typos in the header fields).
|
---|
| 1374 |
|
---|
| 1375 | my %known = (
|
---|
| 1376 | "record key" => "c_rec",
|
---|
| 1377 | "rec id" => "c_rec",
|
---|
| 1378 | "id_rec" => "c_rec",
|
---|
| 1379 | "kode" => "code",
|
---|
| 1380 | "code" => "code",
|
---|
| 1381 | "vaule" => "value",
|
---|
| 1382 | "value" => "value",
|
---|
| 1383 | );
|
---|
| 1384 | my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
|
---|
| 1385 | open my $fh, "<", $source or die "$source: $!";
|
---|
| 1386 | $csv->header ($fh, { munge_column_names => sub {
|
---|
| 1387 | s/\s+$//;
|
---|
| 1388 | s/^\s+//;
|
---|
| 1389 | $known{lc $_} or die "Unknown column '$_' in $source";
|
---|
| 1390 | }});
|
---|
| 1391 | while (my $row = $csv->getline_hr ($fh)) {
|
---|
| 1392 | say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
|
---|
| 1393 | }
|
---|
| 1394 |
|
---|
| 1395 | =head2 bind_columns
|
---|
| 1396 |
|
---|
| 1397 | Takes a list of scalar references to be used for output with L</print> or
|
---|
| 1398 | to store in the fields fetched by L</getline>. When you do not pass enough
|
---|
| 1399 | references to store the fetched fields in, L</getline> will fail with error
|
---|
| 1400 | C<3006>. If you pass more than there are fields to return, the content of
|
---|
| 1401 | the remaining references is left untouched.
|
---|
| 1402 |
|
---|
| 1403 | $csv->bind_columns (\$code, \$name, \$price, \$description);
|
---|
| 1404 | while ($csv->getline ($fh)) {
|
---|
| 1405 | print "The price of a $name is \x{20ac} $price\n";
|
---|
| 1406 | }
|
---|
| 1407 |
|
---|
| 1408 | To reset or clear all column binding, call L</bind_columns> with the single
|
---|
| 1409 | argument C<undef>. This will also clear column names.
|
---|
| 1410 |
|
---|
| 1411 | $csv->bind_columns (undef);
|
---|
| 1412 |
|
---|
| 1413 | If no arguments are passed at all, L</bind_columns> will return the list of
|
---|
| 1414 | current bindings or C<undef> if no binds are active.
|
---|
| 1415 |
|
---|
| 1416 | Note that in parsing with C<bind_columns>, the fields are set on the fly.
|
---|
| 1417 | That implies that if the third field of a row causes an error (or this row
|
---|
| 1418 | has just two fields where the previous row had more), the first two fields
|
---|
| 1419 | already have been assigned the values of the current row, while the rest of
|
---|
| 1420 | the fields will still hold the values of the previous row. If you want the
|
---|
| 1421 | parser to fail in these cases, use the L<C<strict>|/strict> attribute.
|
---|
| 1422 |
|
---|
| 1423 | =head2 eof
|
---|
| 1424 |
|
---|
| 1425 | $eof = $csv->eof ();
|
---|
| 1426 |
|
---|
| 1427 | If L</parse> or L</getline> was used with an IO stream, this method will
|
---|
| 1428 | return true (1) if the last call hit end of file, otherwise it will return
|
---|
| 1429 | false (''). This is useful to see the difference between a failure and end
|
---|
| 1430 | of file.
|
---|
| 1431 |
|
---|
| 1432 | Note that if the parsing of the last line caused an error, C<eof> is still
|
---|
| 1433 | true. That means that if you are I<not> using L</auto_diag>, an idiom like
|
---|
| 1434 |
|
---|
| 1435 | while (my $row = $csv->getline ($fh)) {
|
---|
| 1436 | # ...
|
---|
| 1437 | }
|
---|
| 1438 | $csv->eof or $csv->error_diag;
|
---|
| 1439 |
|
---|
| 1440 | will I<not> report the error. You would have to change that to
|
---|
| 1441 |
|
---|
| 1442 | while (my $row = $csv->getline ($fh)) {
|
---|
| 1443 | # ...
|
---|
| 1444 | }
|
---|
| 1445 | +$csv->error_diag and $csv->error_diag;
|
---|
| 1446 |
|
---|
| 1447 | =head2 types
|
---|
| 1448 |
|
---|
| 1449 | $csv->types (\@tref);
|
---|
| 1450 |
|
---|
| 1451 | This method is used to force that (all) columns are of a given type. For
|
---|
| 1452 | example, if you have an integer column, two columns with doubles and a
|
---|
| 1453 | string column, then you might do a
|
---|
| 1454 |
|
---|
| 1455 | $csv->types ([Text::CSV::IV (),
|
---|
| 1456 | Text::CSV::NV (),
|
---|
| 1457 | Text::CSV::NV (),
|
---|
| 1458 | Text::CSV::PV ()]);
|
---|
| 1459 |
|
---|
| 1460 | Column types are used only for I<decoding> columns while parsing, in other
|
---|
| 1461 | words by the L</parse> and L</getline> methods.
|
---|
| 1462 |
|
---|
| 1463 | You can unset column types by doing a
|
---|
| 1464 |
|
---|
| 1465 | $csv->types (undef);
|
---|
| 1466 |
|
---|
| 1467 | or fetch the current type settings with
|
---|
| 1468 |
|
---|
| 1469 | $types = $csv->types ();
|
---|
| 1470 |
|
---|
| 1471 | =over 4
|
---|
| 1472 |
|
---|
| 1473 | =item IV
|
---|
| 1474 |
|
---|
| 1475 | Set field type to integer.
|
---|
| 1476 |
|
---|
| 1477 | =item NV
|
---|
| 1478 |
|
---|
| 1479 | Set field type to numeric/float.
|
---|
| 1480 |
|
---|
| 1481 | =item PV
|
---|
| 1482 |
|
---|
| 1483 | Set field type to string.
|
---|
| 1484 |
|
---|
| 1485 | =back
|
---|
| 1486 |
|
---|
| 1487 | =head2 fields
|
---|
| 1488 |
|
---|
| 1489 | @columns = $csv->fields ();
|
---|
| 1490 |
|
---|
| 1491 | This method returns the input to L</combine> or the resultant decomposed
|
---|
| 1492 | fields of a successful L</parse>, whichever was called more recently.
|
---|
| 1493 |
|
---|
| 1494 | Note that the return value is undefined after using L</getline>, which does
|
---|
| 1495 | not fill the data structures returned by L</parse>.
|
---|
| 1496 |
|
---|
| 1497 | =head2 meta_info
|
---|
| 1498 |
|
---|
| 1499 | @flags = $csv->meta_info ();
|
---|
| 1500 |
|
---|
| 1501 | This method returns the "flags" of the input to L</combine> or the flags of
|
---|
| 1502 | the resultant decomposed fields of L</parse>, whichever was called more
|
---|
| 1503 | recently.
|
---|
| 1504 |
|
---|
| 1505 | For each field, a meta_info field will hold flags that inform something
|
---|
| 1506 | about the field returned by the L</fields> method or passed to the
|
---|
| 1507 | L</combine> method. The flags are bit-wise-C<or>'d like:
|
---|
| 1508 |
|
---|
| 1509 | =over 2
|
---|
| 1510 |
|
---|
| 1511 | =item C< >0x0001
|
---|
| 1512 |
|
---|
| 1513 | The field was quoted.
|
---|
| 1514 |
|
---|
| 1515 | =item C< >0x0002
|
---|
| 1516 |
|
---|
| 1517 | The field was binary.
|
---|
| 1518 |
|
---|
| 1519 | =back
|
---|
| 1520 |
|
---|
| 1521 | See the C<is_***> methods below.
|
---|
| 1522 |
|
---|
| 1523 | =head2 is_quoted
|
---|
| 1524 |
|
---|
| 1525 | my $quoted = $csv->is_quoted ($column_idx);
|
---|
| 1526 |
|
---|
| 1527 | Where C<$column_idx> is the (zero-based) index of the column in the last
|
---|
| 1528 | result of L</parse>.
|
---|
| 1529 |
|
---|
| 1530 | This returns a true value if the data in the indicated column was enclosed
|
---|
| 1531 | in L<C<quote_char>|/quote_char> quotes. This might be important for fields
|
---|
| 1532 | where content C<,20070108,> is to be treated as a numeric value, and where
|
---|
| 1533 | C<,"20070108",> is explicitly marked as character string data.
|
---|
| 1534 |
|
---|
| 1535 | This method is only valid when L</keep_meta_info> is set to a true value.
|
---|
| 1536 |
|
---|
| 1537 | =head2 is_binary
|
---|
| 1538 |
|
---|
| 1539 | my $binary = $csv->is_binary ($column_idx);
|
---|
| 1540 |
|
---|
| 1541 | Where C<$column_idx> is the (zero-based) index of the column in the last
|
---|
| 1542 | result of L</parse>.
|
---|
| 1543 |
|
---|
| 1544 | This returns a true value if the data in the indicated column contained any
|
---|
| 1545 | byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
|
---|
| 1546 |
|
---|
| 1547 | This method is only valid when L</keep_meta_info> is set to a true value.
|
---|
| 1548 |
|
---|
| 1549 | =head2 is_missing
|
---|
| 1550 |
|
---|
| 1551 | my $missing = $csv->is_missing ($column_idx);
|
---|
| 1552 |
|
---|
| 1553 | Where C<$column_idx> is the (zero-based) index of the column in the last
|
---|
| 1554 | result of L</getline_hr>.
|
---|
| 1555 |
|
---|
| 1556 | $csv->keep_meta_info (1);
|
---|
| 1557 | while (my $hr = $csv->getline_hr ($fh)) {
|
---|
| 1558 | $csv->is_missing (0) and next; # This was an empty line
|
---|
| 1559 | }
|
---|
| 1560 |
|
---|
| 1561 | When using L</getline_hr>, it is impossible to tell if the parsed fields
|
---|
| 1562 | are C<undef> because they where not filled in the C<CSV> stream or because
|
---|
| 1563 | they were not read at all, as B<all> the fields defined by L</column_names>
|
---|
| 1564 | are set in the hash-ref. If you still need to know if all fields in each
|
---|
| 1565 | row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
|
---|
| 1566 | you can check the flags.
|
---|
| 1567 |
|
---|
| 1568 | If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will
|
---|
| 1569 | always return C<undef>, regardless of C<$column_idx> being valid or not. If
|
---|
| 1570 | this attribute is C<true> it will return either C<0> (the field is present)
|
---|
| 1571 | or C<1> (the field is missing).
|
---|
| 1572 |
|
---|
| 1573 | A special case is the empty line. If the line is completely empty - after
|
---|
| 1574 | dealing with the flags - this is still a valid CSV line: it is a record of
|
---|
| 1575 | just one single empty field. However, if C<keep_meta_info> is set, invoking
|
---|
| 1576 | C<is_missing> with index C<0> will now return true.
|
---|
| 1577 |
|
---|
| 1578 | =head2 status
|
---|
| 1579 |
|
---|
| 1580 | $status = $csv->status ();
|
---|
| 1581 |
|
---|
| 1582 | This method returns the status of the last invoked L</combine> or L</parse>
|
---|
| 1583 | call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
|
---|
| 1584 |
|
---|
| 1585 | =head2 error_input
|
---|
| 1586 |
|
---|
| 1587 | $bad_argument = $csv->error_input ();
|
---|
| 1588 |
|
---|
| 1589 | This method returns the erroneous argument (if it exists) of L</combine> or
|
---|
| 1590 | L</parse>, whichever was called more recently. If the last invocation was
|
---|
| 1591 | successful, C<error_input> will return C<undef>.
|
---|
| 1592 |
|
---|
| 1593 | =head2 error_diag
|
---|
| 1594 |
|
---|
| 1595 | Text::CSV->error_diag ();
|
---|
| 1596 | $csv->error_diag ();
|
---|
| 1597 | $error_code = 0 + $csv->error_diag ();
|
---|
| 1598 | $error_str = "" . $csv->error_diag ();
|
---|
| 1599 | ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
|
---|
| 1600 |
|
---|
| 1601 | If (and only if) an error occurred, this function returns the diagnostics
|
---|
| 1602 | of that error.
|
---|
| 1603 |
|
---|
| 1604 | If called in void context, this will print the internal error code and the
|
---|
| 1605 | associated error message to STDERR.
|
---|
| 1606 |
|
---|
| 1607 | If called in list context, this will return the error code and the error
|
---|
| 1608 | message in that order. If the last error was from parsing, the rest of the
|
---|
| 1609 | values returned are a best guess at the location within the line that was
|
---|
| 1610 | being parsed. Their values are 1-based. The position currently is index of
|
---|
| 1611 | the byte at which the parsing failed in the current record. It might change
|
---|
| 1612 | to be the index of the current character in a later release. The records is
|
---|
| 1613 | the index of the record parsed by the csv instance. The field number is the
|
---|
| 1614 | index of the field the parser thinks it is currently trying to parse. See
|
---|
| 1615 | F<examples/csv-check> for how this can be used.
|
---|
| 1616 |
|
---|
| 1617 | If called in scalar context, it will return the diagnostics in a single
|
---|
| 1618 | scalar, a-la C<$!>. It will contain the error code in numeric context, and
|
---|
| 1619 | the diagnostics message in string context.
|
---|
| 1620 |
|
---|
| 1621 | When called as a class method or a direct function call, the diagnostics
|
---|
| 1622 | are that of the last L</new> call.
|
---|
| 1623 |
|
---|
| 1624 | =head2 record_number
|
---|
| 1625 |
|
---|
| 1626 | $recno = $csv->record_number ();
|
---|
| 1627 |
|
---|
| 1628 | Returns the records parsed by this csv instance. This value should be more
|
---|
| 1629 | accurate than C<$.> when embedded newlines come in play. Records written by
|
---|
| 1630 | this instance are not counted.
|
---|
| 1631 |
|
---|
| 1632 | =head2 SetDiag
|
---|
| 1633 |
|
---|
| 1634 | $csv->SetDiag (0);
|
---|
| 1635 |
|
---|
| 1636 | Use to reset the diagnostics if you are dealing with errors.
|
---|
| 1637 |
|
---|
| 1638 | =head1 ADDITIONAL METHODS
|
---|
| 1639 |
|
---|
| 1640 | =over
|
---|
| 1641 |
|
---|
| 1642 | =item backend
|
---|
| 1643 |
|
---|
| 1644 | Returns the backend module name called by Text::CSV.
|
---|
| 1645 | C<module> is an alias.
|
---|
| 1646 |
|
---|
| 1647 | =item is_xs
|
---|
| 1648 |
|
---|
| 1649 | Returns true value if Text::CSV uses an XS backend.
|
---|
| 1650 |
|
---|
| 1651 | =item is_pp
|
---|
| 1652 |
|
---|
| 1653 | Returns true value if Text::CSV uses a pure-Perl backend.
|
---|
| 1654 |
|
---|
| 1655 | =back
|
---|
| 1656 |
|
---|
| 1657 | =head1 FUNCTIONS
|
---|
| 1658 |
|
---|
| 1659 | This section is also taken from Text::CSV_XS.
|
---|
| 1660 |
|
---|
| 1661 | =head2 csv
|
---|
| 1662 |
|
---|
| 1663 | This function is not exported by default and should be explicitly requested:
|
---|
| 1664 |
|
---|
| 1665 | use Text::CSV qw( csv );
|
---|
| 1666 |
|
---|
| 1667 | This is an high-level function that aims at simple (user) interfaces. This
|
---|
| 1668 | can be used to read/parse a C<CSV> file or stream (the default behavior) or
|
---|
| 1669 | to produce a file or write to a stream (define the C<out> attribute). It
|
---|
| 1670 | returns an array- or hash-reference on parsing (or C<undef> on fail) or the
|
---|
| 1671 | numeric value of L</error_diag> on writing. When this function fails you
|
---|
| 1672 | can get to the error using the class call to L</error_diag>
|
---|
| 1673 |
|
---|
| 1674 | my $aoa = csv (in => "test.csv") or
|
---|
| 1675 | die Text::CSV->error_diag;
|
---|
| 1676 |
|
---|
| 1677 | This function takes the arguments as key-value pairs. This can be passed as
|
---|
| 1678 | a list or as an anonymous hash:
|
---|
| 1679 |
|
---|
| 1680 | my $aoa = csv ( in => "test.csv", sep_char => ";");
|
---|
| 1681 | my $aoh = csv ({ in => $fh, headers => "auto" });
|
---|
| 1682 |
|
---|
| 1683 | The arguments passed consist of two parts: the arguments to L</csv> itself
|
---|
| 1684 | and the optional attributes to the C<CSV> object used inside the function
|
---|
| 1685 | as enumerated and explained in L</new>.
|
---|
| 1686 |
|
---|
| 1687 | If not overridden, the default option used for CSV is
|
---|
| 1688 |
|
---|
| 1689 | auto_diag => 1
|
---|
| 1690 | escape_null => 0
|
---|
| 1691 |
|
---|
| 1692 | The option that is always set and cannot be altered is
|
---|
| 1693 |
|
---|
| 1694 | binary => 1
|
---|
| 1695 |
|
---|
| 1696 | As this function will likely be used in one-liners, it allows C<quote> to
|
---|
| 1697 | be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
|
---|
| 1698 | or C<escape>.
|
---|
| 1699 |
|
---|
| 1700 | Alternative invocations:
|
---|
| 1701 |
|
---|
| 1702 | my $aoa = Text::CSV::csv (in => "file.csv");
|
---|
| 1703 |
|
---|
| 1704 | my $csv = Text::CSV->new ();
|
---|
| 1705 | my $aoa = $csv->csv (in => "file.csv");
|
---|
| 1706 |
|
---|
| 1707 | In the latter case, the object attributes are used from the existing object
|
---|
| 1708 | and the attribute arguments in the function call are ignored:
|
---|
| 1709 |
|
---|
| 1710 | my $csv = Text::CSV->new ({ sep_char => ";" });
|
---|
| 1711 | my $aoh = $csv->csv (in => "file.csv", bom => 1);
|
---|
| 1712 |
|
---|
| 1713 | will parse using C<;> as C<sep_char>, not C<,>.
|
---|
| 1714 |
|
---|
| 1715 | =head3 in
|
---|
| 1716 |
|
---|
| 1717 | Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
|
---|
| 1718 | which will be opened for reading and closed when finished, a file handle
|
---|
| 1719 | (e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
|
---|
| 1720 | itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
|
---|
| 1721 |
|
---|
| 1722 | When used with L</out>, C<in> should be a reference to a CSV structure (AoA
|
---|
| 1723 | or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
|
---|
| 1724 | The code-ref will be invoked with no arguments.
|
---|
| 1725 |
|
---|
| 1726 | my $aoa = csv (in => "file.csv");
|
---|
| 1727 |
|
---|
| 1728 | open my $fh, "<", "file.csv";
|
---|
| 1729 | my $aoa = csv (in => $fh);
|
---|
| 1730 |
|
---|
| 1731 | my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
|
---|
| 1732 | my $err = csv (in => $csv, out => "file.csv");
|
---|
| 1733 |
|
---|
| 1734 | If called in void context without the L</out> attribute, the resulting ref
|
---|
| 1735 | will be used as input to a subsequent call to csv:
|
---|
| 1736 |
|
---|
| 1737 | csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
|
---|
| 1738 |
|
---|
| 1739 | will be a shortcut to
|
---|
| 1740 |
|
---|
| 1741 | csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
|
---|
| 1742 |
|
---|
| 1743 | where, in the absence of the C<out> attribute, this is a shortcut to
|
---|
| 1744 |
|
---|
| 1745 | csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
|
---|
| 1746 | out => *STDOUT)
|
---|
| 1747 |
|
---|
| 1748 | =head3 out
|
---|
| 1749 |
|
---|
| 1750 | csv (in => $aoa, out => "file.csv");
|
---|
| 1751 | csv (in => $aoa, out => $fh);
|
---|
| 1752 | csv (in => $aoa, out => STDOUT);
|
---|
| 1753 | csv (in => $aoa, out => *STDOUT);
|
---|
| 1754 | csv (in => $aoa, out => \*STDOUT);
|
---|
| 1755 | csv (in => $aoa, out => \my $data);
|
---|
| 1756 | csv (in => $aoa, out => undef);
|
---|
| 1757 | csv (in => $aoa, out => \"skip");
|
---|
| 1758 |
|
---|
| 1759 | In output mode, the default CSV options when producing CSV are
|
---|
| 1760 |
|
---|
| 1761 | eol => "\r\n"
|
---|
| 1762 |
|
---|
| 1763 | The L</fragment> attribute is ignored in output mode.
|
---|
| 1764 |
|
---|
| 1765 | C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
|
---|
| 1766 | writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
|
---|
| 1767 | reference to a glob (e.g. C<\*STDOUT>), the glob itself (e.g. C<*STDOUT>),
|
---|
| 1768 | or a reference to a scalar (e.g. C<\my $data>).
|
---|
| 1769 |
|
---|
| 1770 | csv (in => sub { $sth->fetch }, out => "dump.csv");
|
---|
| 1771 | csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
|
---|
| 1772 | headers => $sth->{NAME_lc});
|
---|
| 1773 |
|
---|
| 1774 | When a code-ref is used for C<in>, the output is generated per invocation,
|
---|
| 1775 | so no buffering is involved. This implies that there is no size restriction
|
---|
| 1776 | on the number of records. The C<csv> function ends when the coderef returns
|
---|
| 1777 | a false value.
|
---|
| 1778 |
|
---|
| 1779 | If C<out> is set to a reference of the literal string C<"skip">, the output
|
---|
| 1780 | will be suppressed completely, which might be useful in combination with a
|
---|
| 1781 | filter for side effects only.
|
---|
| 1782 |
|
---|
| 1783 | my %cache;
|
---|
| 1784 | csv (in => "dump.csv",
|
---|
| 1785 | out => \"skip",
|
---|
| 1786 | on_in => sub { $cache{$_[1][1]}++ });
|
---|
| 1787 |
|
---|
| 1788 | Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
|
---|
| 1789 | equivalent to C<\"skip">.
|
---|
| 1790 |
|
---|
| 1791 | =head3 encoding
|
---|
| 1792 |
|
---|
| 1793 | If passed, it should be an encoding accepted by the C<:encoding()> option
|
---|
| 1794 | to C<open>. There is no default value. This attribute does not work in perl
|
---|
| 1795 | 5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
|
---|
| 1796 | line invocations.
|
---|
| 1797 |
|
---|
| 1798 | If C<encoding> is set to the literal value C<"auto">, the method L</header>
|
---|
| 1799 | will be invoked on the opened stream to check if there is a BOM and set the
|
---|
| 1800 | encoding accordingly. This is equal to passing a true value in the option
|
---|
| 1801 | L<C<detect_bom>|/detect_bom>.
|
---|
| 1802 |
|
---|
| 1803 | =head3 detect_bom
|
---|
| 1804 |
|
---|
| 1805 | If C<detect_bom> is given, the method L</header> will be invoked on the
|
---|
| 1806 | opened stream to check if there is a BOM and set the encoding accordingly.
|
---|
| 1807 |
|
---|
| 1808 | C<detect_bom> can be abbreviated to C<bom>.
|
---|
| 1809 |
|
---|
| 1810 | This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
|
---|
| 1811 |
|
---|
| 1812 | Note that as the method L</header> is invoked, its default is to also set
|
---|
| 1813 | the headers.
|
---|
| 1814 |
|
---|
| 1815 | =head3 headers
|
---|
| 1816 |
|
---|
| 1817 | If this attribute is not given, the default behavior is to produce an array
|
---|
| 1818 | of arrays.
|
---|
| 1819 |
|
---|
| 1820 | If C<headers> is supplied, it should be an anonymous list of column names,
|
---|
| 1821 | an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
|
---|
| 1822 | or C<skip>.
|
---|
| 1823 |
|
---|
| 1824 | =over 2
|
---|
| 1825 |
|
---|
| 1826 | =item skip
|
---|
| 1827 |
|
---|
| 1828 | When C<skip> is used, the header will not be included in the output.
|
---|
| 1829 |
|
---|
| 1830 | my $aoa = csv (in => $fh, headers => "skip");
|
---|
| 1831 |
|
---|
| 1832 | =item auto
|
---|
| 1833 |
|
---|
| 1834 | If C<auto> is used, the first line of the C<CSV> source will be read as the
|
---|
| 1835 | list of field headers and used to produce an array of hashes.
|
---|
| 1836 |
|
---|
| 1837 | my $aoh = csv (in => $fh, headers => "auto");
|
---|
| 1838 |
|
---|
| 1839 | =item lc
|
---|
| 1840 |
|
---|
| 1841 | If C<lc> is used, the first line of the C<CSV> source will be read as the
|
---|
| 1842 | list of field headers mapped to lower case and used to produce an array of
|
---|
| 1843 | hashes. This is a variation of C<auto>.
|
---|
| 1844 |
|
---|
| 1845 | my $aoh = csv (in => $fh, headers => "lc");
|
---|
| 1846 |
|
---|
| 1847 | =item uc
|
---|
| 1848 |
|
---|
| 1849 | If C<uc> is used, the first line of the C<CSV> source will be read as the
|
---|
| 1850 | list of field headers mapped to upper case and used to produce an array of
|
---|
| 1851 | hashes. This is a variation of C<auto>.
|
---|
| 1852 |
|
---|
| 1853 | my $aoh = csv (in => $fh, headers => "uc");
|
---|
| 1854 |
|
---|
| 1855 | =item CODE
|
---|
| 1856 |
|
---|
| 1857 | If a coderef is used, the first line of the C<CSV> source will be read as
|
---|
| 1858 | the list of mangled field headers in which each field is passed as the only
|
---|
| 1859 | argument to the coderef. This list is used to produce an array of hashes.
|
---|
| 1860 |
|
---|
| 1861 | my $aoh = csv (in => $fh,
|
---|
| 1862 | headers => sub { lc ($_[0]) =~ s/kode/code/gr });
|
---|
| 1863 |
|
---|
| 1864 | this example is a variation of using C<lc> where all occurrences of C<kode>
|
---|
| 1865 | are replaced with C<code>.
|
---|
| 1866 |
|
---|
| 1867 | =item ARRAY
|
---|
| 1868 |
|
---|
| 1869 | If C<headers> is an anonymous list, the entries in the list will be used
|
---|
| 1870 | as field names. The first line is considered data instead of headers.
|
---|
| 1871 |
|
---|
| 1872 | my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
|
---|
| 1873 | csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
|
---|
| 1874 |
|
---|
| 1875 | =item HASH
|
---|
| 1876 |
|
---|
| 1877 | If C<headers> is an hash reference, this implies C<auto>, but header fields
|
---|
| 1878 | for that exist as key in the hashref will be replaced by the value for that
|
---|
| 1879 | key. Given a CSV file like
|
---|
| 1880 |
|
---|
| 1881 | post-kode,city,name,id number,fubble
|
---|
| 1882 | 1234AA,Duckstad,Donald,13,"X313DF"
|
---|
| 1883 |
|
---|
| 1884 | using
|
---|
| 1885 |
|
---|
| 1886 | csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
|
---|
| 1887 |
|
---|
| 1888 | will return an entry like
|
---|
| 1889 |
|
---|
| 1890 | { pc => "1234AA",
|
---|
| 1891 | city => "Duckstad",
|
---|
| 1892 | name => "Donald",
|
---|
| 1893 | ID => "13",
|
---|
| 1894 | fubble => "X313DF",
|
---|
| 1895 | }
|
---|
| 1896 |
|
---|
| 1897 | =back
|
---|
| 1898 |
|
---|
| 1899 | See also L<C<munge_column_names>|/munge_column_names> and
|
---|
| 1900 | L<C<set_column_names>|/set_column_names>.
|
---|
| 1901 |
|
---|
| 1902 | =head3 munge_column_names
|
---|
| 1903 |
|
---|
| 1904 | If C<munge_column_names> is set, the method L</header> is invoked on the
|
---|
| 1905 | opened stream with all matching arguments to detect and set the headers.
|
---|
| 1906 |
|
---|
| 1907 | C<munge_column_names> can be abbreviated to C<munge>.
|
---|
| 1908 |
|
---|
| 1909 | =head3 key
|
---|
| 1910 |
|
---|
| 1911 | If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
|
---|
| 1912 | hashref instead of an array of hashes. Allowed values are simple scalars or
|
---|
| 1913 | array-references where the first element is the joiner and the rest are the
|
---|
| 1914 | fields to join to combine the key.
|
---|
| 1915 |
|
---|
| 1916 | my $ref = csv (in => "test.csv", key => "code");
|
---|
| 1917 | my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
|
---|
| 1918 |
|
---|
| 1919 | with test.csv like
|
---|
| 1920 |
|
---|
| 1921 | code,product,price,color
|
---|
| 1922 | 1,pc,850,gray
|
---|
| 1923 | 2,keyboard,12,white
|
---|
| 1924 | 3,mouse,5,black
|
---|
| 1925 |
|
---|
| 1926 | the first example will return
|
---|
| 1927 |
|
---|
| 1928 | { 1 => {
|
---|
| 1929 | code => 1,
|
---|
| 1930 | color => 'gray',
|
---|
| 1931 | price => 850,
|
---|
| 1932 | product => 'pc'
|
---|
| 1933 | },
|
---|
| 1934 | 2 => {
|
---|
| 1935 | code => 2,
|
---|
| 1936 | color => 'white',
|
---|
| 1937 | price => 12,
|
---|
| 1938 | product => 'keyboard'
|
---|
| 1939 | },
|
---|
| 1940 | 3 => {
|
---|
| 1941 | code => 3,
|
---|
| 1942 | color => 'black',
|
---|
| 1943 | price => 5,
|
---|
| 1944 | product => 'mouse'
|
---|
| 1945 | }
|
---|
| 1946 | }
|
---|
| 1947 |
|
---|
| 1948 | the second example will return
|
---|
| 1949 |
|
---|
| 1950 | { "1:gray" => {
|
---|
| 1951 | code => 1,
|
---|
| 1952 | color => 'gray',
|
---|
| 1953 | price => 850,
|
---|
| 1954 | product => 'pc'
|
---|
| 1955 | },
|
---|
| 1956 | "2:white" => {
|
---|
| 1957 | code => 2,
|
---|
| 1958 | color => 'white',
|
---|
| 1959 | price => 12,
|
---|
| 1960 | product => 'keyboard'
|
---|
| 1961 | },
|
---|
| 1962 | "3:black" => {
|
---|
| 1963 | code => 3,
|
---|
| 1964 | color => 'black',
|
---|
| 1965 | price => 5,
|
---|
| 1966 | product => 'mouse'
|
---|
| 1967 | }
|
---|
| 1968 | }
|
---|
| 1969 |
|
---|
| 1970 | The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV>
|
---|
| 1971 | date that has no header line, like
|
---|
| 1972 |
|
---|
| 1973 | my $ref = csv (
|
---|
| 1974 | in => "foo.csv",
|
---|
| 1975 | headers => [qw( c_foo foo bar description stock )],
|
---|
| 1976 | key => "c_foo",
|
---|
| 1977 | );
|
---|
| 1978 |
|
---|
| 1979 | =head3 value
|
---|
| 1980 |
|
---|
| 1981 | Used to create key-value hashes.
|
---|
| 1982 |
|
---|
| 1983 | Only allowed when C<key> is valid. A C<value> can be either a single column
|
---|
| 1984 | label or an anonymous list of column labels. In the first case, the value
|
---|
| 1985 | will be a simple scalar value, in the latter case, it will be a hashref.
|
---|
| 1986 |
|
---|
| 1987 | my $ref = csv (in => "test.csv", key => "code",
|
---|
| 1988 | value => "price");
|
---|
| 1989 | my $ref = csv (in => "test.csv", key => "code",
|
---|
| 1990 | value => [ "product", "price" ]);
|
---|
| 1991 | my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
|
---|
| 1992 | value => "price");
|
---|
| 1993 | my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
|
---|
| 1994 | value => [ "product", "price" ]);
|
---|
| 1995 |
|
---|
| 1996 | with test.csv like
|
---|
| 1997 |
|
---|
| 1998 | code,product,price,color
|
---|
| 1999 | 1,pc,850,gray
|
---|
| 2000 | 2,keyboard,12,white
|
---|
| 2001 | 3,mouse,5,black
|
---|
| 2002 |
|
---|
| 2003 | the first example will return
|
---|
| 2004 |
|
---|
| 2005 | { 1 => 850,
|
---|
| 2006 | 2 => 12,
|
---|
| 2007 | 3 => 5,
|
---|
| 2008 | }
|
---|
| 2009 |
|
---|
| 2010 | the second example will return
|
---|
| 2011 |
|
---|
| 2012 | { 1 => {
|
---|
| 2013 | price => 850,
|
---|
| 2014 | product => 'pc'
|
---|
| 2015 | },
|
---|
| 2016 | 2 => {
|
---|
| 2017 | price => 12,
|
---|
| 2018 | product => 'keyboard'
|
---|
| 2019 | },
|
---|
| 2020 | 3 => {
|
---|
| 2021 | price => 5,
|
---|
| 2022 | product => 'mouse'
|
---|
| 2023 | }
|
---|
| 2024 | }
|
---|
| 2025 |
|
---|
| 2026 | the third example will return
|
---|
| 2027 |
|
---|
| 2028 | { "1:gray" => 850,
|
---|
| 2029 | "2:white" => 12,
|
---|
| 2030 | "3:black" => 5,
|
---|
| 2031 | }
|
---|
| 2032 |
|
---|
| 2033 | the fourth example will return
|
---|
| 2034 |
|
---|
| 2035 | { "1:gray" => {
|
---|
| 2036 | price => 850,
|
---|
| 2037 | product => 'pc'
|
---|
| 2038 | },
|
---|
| 2039 | "2:white" => {
|
---|
| 2040 | price => 12,
|
---|
| 2041 | product => 'keyboard'
|
---|
| 2042 | },
|
---|
| 2043 | "3:black" => {
|
---|
| 2044 | price => 5,
|
---|
| 2045 | product => 'mouse'
|
---|
| 2046 | }
|
---|
| 2047 | }
|
---|
| 2048 |
|
---|
| 2049 | =head3 keep_headers
|
---|
| 2050 |
|
---|
| 2051 | When using hashes, keep the column names into the arrayref passed, so all
|
---|
| 2052 | headers are available after the call in the original order.
|
---|
| 2053 |
|
---|
| 2054 | my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
|
---|
| 2055 |
|
---|
| 2056 | This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
|
---|
| 2057 |
|
---|
| 2058 | This attribute implies a default of C<auto> for the C<headers> attribute.
|
---|
| 2059 |
|
---|
| 2060 | =head3 fragment
|
---|
| 2061 |
|
---|
| 2062 | Only output the fragment as defined in the L</fragment> method. This option
|
---|
| 2063 | is ignored when I<generating> C<CSV>. See L</out>.
|
---|
| 2064 |
|
---|
| 2065 | Combining all of them could give something like
|
---|
| 2066 |
|
---|
| 2067 | use Text::CSV qw( csv );
|
---|
| 2068 | my $aoh = csv (
|
---|
| 2069 | in => "test.txt",
|
---|
| 2070 | encoding => "utf-8",
|
---|
| 2071 | headers => "auto",
|
---|
| 2072 | sep_char => "|",
|
---|
| 2073 | fragment => "row=3;6-9;15-*",
|
---|
| 2074 | );
|
---|
| 2075 | say $aoh->[15]{Foo};
|
---|
| 2076 |
|
---|
| 2077 | =head3 sep_set
|
---|
| 2078 |
|
---|
| 2079 | If C<sep_set> is set, the method L</header> is invoked on the opened stream
|
---|
| 2080 | to detect and set L<C<sep_char>|/sep_char> with the given set.
|
---|
| 2081 |
|
---|
| 2082 | C<sep_set> can be abbreviated to C<seps>.
|
---|
| 2083 |
|
---|
| 2084 | Note that as the L</header> method is invoked, its default is to also set
|
---|
| 2085 | the headers.
|
---|
| 2086 |
|
---|
| 2087 | =head3 set_column_names
|
---|
| 2088 |
|
---|
| 2089 | If C<set_column_names> is passed, the method L</header> is invoked on the
|
---|
| 2090 | opened stream with all arguments meant for L</header>.
|
---|
| 2091 |
|
---|
| 2092 | If C<set_column_names> is passed as a false value, the content of the first
|
---|
| 2093 | row is only preserved if the output is AoA:
|
---|
| 2094 |
|
---|
| 2095 | With an input-file like
|
---|
| 2096 |
|
---|
| 2097 | bAr,foo
|
---|
| 2098 | 1,2
|
---|
| 2099 | 3,4,5
|
---|
| 2100 |
|
---|
| 2101 | This call
|
---|
| 2102 |
|
---|
| 2103 | my $aoa = csv (in => $file, set_column_names => 0);
|
---|
| 2104 |
|
---|
| 2105 | will result in
|
---|
| 2106 |
|
---|
| 2107 | [[ "bar", "foo" ],
|
---|
| 2108 | [ "1", "2" ],
|
---|
| 2109 | [ "3", "4", "5" ]]
|
---|
| 2110 |
|
---|
| 2111 | and
|
---|
| 2112 |
|
---|
| 2113 | my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
|
---|
| 2114 |
|
---|
| 2115 | will result in
|
---|
| 2116 |
|
---|
| 2117 | [[ "bAr", "foo" ],
|
---|
| 2118 | [ "1", "2" ],
|
---|
| 2119 | [ "3", "4", "5" ]]
|
---|
| 2120 |
|
---|
| 2121 | =head2 Callbacks
|
---|
| 2122 |
|
---|
| 2123 | Callbacks enable actions triggered from the I<inside> of Text::CSV.
|
---|
| 2124 |
|
---|
| 2125 | While most of what this enables can easily be done in an unrolled loop as
|
---|
| 2126 | described in the L</SYNOPSIS> callbacks can be used to meet special demands
|
---|
| 2127 | or enhance the L</csv> function.
|
---|
| 2128 |
|
---|
| 2129 | =over 2
|
---|
| 2130 |
|
---|
| 2131 | =item error
|
---|
| 2132 |
|
---|
| 2133 | $csv->callbacks (error => sub { $csv->SetDiag (0) });
|
---|
| 2134 |
|
---|
| 2135 | the C<error> callback is invoked when an error occurs, but I<only> when
|
---|
| 2136 | L</auto_diag> is set to a true value. A callback is invoked with the values
|
---|
| 2137 | returned by L</error_diag>:
|
---|
| 2138 |
|
---|
| 2139 | my ($c, $s);
|
---|
| 2140 |
|
---|
| 2141 | sub ignore3006
|
---|
| 2142 | {
|
---|
| 2143 | my ($err, $msg, $pos, $recno, $fldno) = @_;
|
---|
| 2144 | if ($err == 3006) {
|
---|
| 2145 | # ignore this error
|
---|
| 2146 | ($c, $s) = (undef, undef);
|
---|
| 2147 | Text::CSV->SetDiag (0);
|
---|
| 2148 | }
|
---|
| 2149 | # Any other error
|
---|
| 2150 | return;
|
---|
| 2151 | } # ignore3006
|
---|
| 2152 |
|
---|
| 2153 | $csv->callbacks (error => \&ignore3006);
|
---|
| 2154 | $csv->bind_columns (\$c, \$s);
|
---|
| 2155 | while ($csv->getline ($fh)) {
|
---|
| 2156 | # Error 3006 will not stop the loop
|
---|
| 2157 | }
|
---|
| 2158 |
|
---|
| 2159 | =item after_parse
|
---|
| 2160 |
|
---|
| 2161 | $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
|
---|
| 2162 | while (my $row = $csv->getline ($fh)) {
|
---|
| 2163 | $row->[-1] eq "NEW";
|
---|
| 2164 | }
|
---|
| 2165 |
|
---|
| 2166 | This callback is invoked after parsing with L</getline> only if no error
|
---|
| 2167 | occurred. The callback is invoked with two arguments: the current C<CSV>
|
---|
| 2168 | parser object and an array reference to the fields parsed.
|
---|
| 2169 |
|
---|
| 2170 | The return code of the callback is ignored unless it is a reference to the
|
---|
| 2171 | string "skip", in which case the record will be skipped in L</getline_all>.
|
---|
| 2172 |
|
---|
| 2173 | sub add_from_db
|
---|
| 2174 | {
|
---|
| 2175 | my ($csv, $row) = @_;
|
---|
| 2176 | $sth->execute ($row->[4]);
|
---|
| 2177 | push @$row, $sth->fetchrow_array;
|
---|
| 2178 | } # add_from_db
|
---|
| 2179 |
|
---|
| 2180 | my $aoa = csv (in => "file.csv", callbacks => {
|
---|
| 2181 | after_parse => \&add_from_db });
|
---|
| 2182 |
|
---|
| 2183 | This hook can be used for validation:
|
---|
| 2184 |
|
---|
| 2185 | =over 2
|
---|
| 2186 |
|
---|
| 2187 | =item FAIL
|
---|
| 2188 |
|
---|
| 2189 | Die if any of the records does not validate a rule:
|
---|
| 2190 |
|
---|
| 2191 | after_parse => sub {
|
---|
| 2192 | $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
|
---|
| 2193 | die "5th field does not have a valid Dutch zipcode";
|
---|
| 2194 | }
|
---|
| 2195 |
|
---|
| 2196 | =item DEFAULT
|
---|
| 2197 |
|
---|
| 2198 | Replace invalid fields with a default value:
|
---|
| 2199 |
|
---|
| 2200 | after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
|
---|
| 2201 |
|
---|
| 2202 | =item SKIP
|
---|
| 2203 |
|
---|
| 2204 | Skip records that have invalid fields (only applies to L</getline_all>):
|
---|
| 2205 |
|
---|
| 2206 | after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
|
---|
| 2207 |
|
---|
| 2208 | =back
|
---|
| 2209 |
|
---|
| 2210 | =item before_print
|
---|
| 2211 |
|
---|
| 2212 | my $idx = 1;
|
---|
| 2213 | $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
|
---|
| 2214 | $csv->print (*STDOUT, [ 0, $_ ]) for @members;
|
---|
| 2215 |
|
---|
| 2216 | This callback is invoked before printing with L</print> only if no error
|
---|
| 2217 | occurred. The callback is invoked with two arguments: the current C<CSV>
|
---|
| 2218 | parser object and an array reference to the fields passed.
|
---|
| 2219 |
|
---|
| 2220 | The return code of the callback is ignored.
|
---|
| 2221 |
|
---|
| 2222 | sub max_4_fields
|
---|
| 2223 | {
|
---|
| 2224 | my ($csv, $row) = @_;
|
---|
| 2225 | @$row > 4 and splice @$row, 4;
|
---|
| 2226 | } # max_4_fields
|
---|
| 2227 |
|
---|
| 2228 | csv (in => csv (in => "file.csv"), out => *STDOUT,
|
---|
| 2229 | callbacks => { before print => \&max_4_fields });
|
---|
| 2230 |
|
---|
| 2231 | This callback is not active for L</combine>.
|
---|
| 2232 |
|
---|
| 2233 | =back
|
---|
| 2234 |
|
---|
| 2235 | =head3 Callbacks for csv ()
|
---|
| 2236 |
|
---|
| 2237 | The L</csv> allows for some callbacks that do not integrate in XS internals
|
---|
| 2238 | but only feature the L</csv> function.
|
---|
| 2239 |
|
---|
| 2240 | csv (in => "file.csv",
|
---|
| 2241 | callbacks => {
|
---|
| 2242 | filter => { 6 => sub { $_ > 15 } }, # first
|
---|
| 2243 | after_parse => sub { say "AFTER PARSE"; }, # first
|
---|
| 2244 | after_in => sub { say "AFTER IN"; }, # second
|
---|
| 2245 | on_in => sub { say "ON IN"; }, # third
|
---|
| 2246 | },
|
---|
| 2247 | );
|
---|
| 2248 |
|
---|
| 2249 | csv (in => $aoh,
|
---|
| 2250 | out => "file.csv",
|
---|
| 2251 | callbacks => {
|
---|
| 2252 | on_in => sub { say "ON IN"; }, # first
|
---|
| 2253 | before_out => sub { say "BEFORE OUT"; }, # second
|
---|
| 2254 | before_print => sub { say "BEFORE PRINT"; }, # third
|
---|
| 2255 | },
|
---|
| 2256 | );
|
---|
| 2257 |
|
---|
| 2258 | =over 2
|
---|
| 2259 |
|
---|
| 2260 | =item filter
|
---|
| 2261 |
|
---|
| 2262 | This callback can be used to filter records. It is called just after a new
|
---|
| 2263 | record has been scanned. The callback accepts a:
|
---|
| 2264 |
|
---|
| 2265 | =over 2
|
---|
| 2266 |
|
---|
| 2267 | =item hashref
|
---|
| 2268 |
|
---|
| 2269 | The keys are the index to the row (the field name or field number, 1-based)
|
---|
| 2270 | and the values are subs to return a true or false value.
|
---|
| 2271 |
|
---|
| 2272 | csv (in => "file.csv", filter => {
|
---|
| 2273 | 3 => sub { m/a/ }, # third field should contain an "a"
|
---|
| 2274 | 5 => sub { length > 4 }, # length of the 5th field minimal 5
|
---|
| 2275 | });
|
---|
| 2276 |
|
---|
| 2277 | csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
|
---|
| 2278 |
|
---|
| 2279 | If the keys to the filter hash contain any character that is not a digit it
|
---|
| 2280 | will also implicitly set L</headers> to C<"auto"> unless L</headers> was
|
---|
| 2281 | already passed as argument. When headers are active, returning an array of
|
---|
| 2282 | hashes, the filter is not applicable to the header itself.
|
---|
| 2283 |
|
---|
| 2284 | All sub results should match, as in AND.
|
---|
| 2285 |
|
---|
| 2286 | The context of the callback sets C<$_> localized to the field indicated by
|
---|
| 2287 | the filter. The two arguments are as with all other callbacks, so the other
|
---|
| 2288 | fields in the current row can be seen:
|
---|
| 2289 |
|
---|
| 2290 | filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
|
---|
| 2291 |
|
---|
| 2292 | If the context is set to return a list of hashes (L</headers> is defined),
|
---|
| 2293 | the current record will also be available in the localized C<%_>:
|
---|
| 2294 |
|
---|
| 2295 | filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
|
---|
| 2296 |
|
---|
| 2297 | If the filter is used to I<alter> the content by changing C<$_>, make sure
|
---|
| 2298 | that the sub returns true in order not to have that record skipped:
|
---|
| 2299 |
|
---|
| 2300 | filter => { 2 => sub { $_ = uc }}
|
---|
| 2301 |
|
---|
| 2302 | will upper-case the second field, and then skip it if the resulting content
|
---|
| 2303 | evaluates to false. To always accept, end with truth:
|
---|
| 2304 |
|
---|
| 2305 | filter => { 2 => sub { $_ = uc; 1 }}
|
---|
| 2306 |
|
---|
| 2307 | =item coderef
|
---|
| 2308 |
|
---|
| 2309 | csv (in => "file.csv", filter => sub { $n++; 0; });
|
---|
| 2310 |
|
---|
| 2311 | If the argument to C<filter> is a coderef, it is an alias or shortcut to a
|
---|
| 2312 | filter on column 0:
|
---|
| 2313 |
|
---|
| 2314 | csv (filter => sub { $n++; 0 });
|
---|
| 2315 |
|
---|
| 2316 | is equal to
|
---|
| 2317 |
|
---|
| 2318 | csv (filter => { 0 => sub { $n++; 0 });
|
---|
| 2319 |
|
---|
| 2320 | =item filter-name
|
---|
| 2321 |
|
---|
| 2322 | csv (in => "file.csv", filter => "not_blank");
|
---|
| 2323 | csv (in => "file.csv", filter => "not_empty");
|
---|
| 2324 | csv (in => "file.csv", filter => "filled");
|
---|
| 2325 |
|
---|
| 2326 | These are predefined filters
|
---|
| 2327 |
|
---|
| 2328 | Given a file like (line numbers prefixed for doc purpose only):
|
---|
| 2329 |
|
---|
| 2330 | 1:1,2,3
|
---|
| 2331 | 2:
|
---|
| 2332 | 3:,
|
---|
| 2333 | 4:""
|
---|
| 2334 | 5:,,
|
---|
| 2335 | 6:, ,
|
---|
| 2336 | 7:"",
|
---|
| 2337 | 8:" "
|
---|
| 2338 | 9:4,5,6
|
---|
| 2339 |
|
---|
| 2340 | =over 2
|
---|
| 2341 |
|
---|
| 2342 | =item not_blank
|
---|
| 2343 |
|
---|
| 2344 | Filter out the blank lines
|
---|
| 2345 |
|
---|
| 2346 | This filter is a shortcut for
|
---|
| 2347 |
|
---|
| 2348 | filter => { 0 => sub { @{$_[1]} > 1 or
|
---|
| 2349 | defined $_[1][0] && $_[1][0] ne "" } }
|
---|
| 2350 |
|
---|
| 2351 | Due to the implementation, it is currently impossible to also filter lines
|
---|
| 2352 | that consists only of a quoted empty field. These lines are also considered
|
---|
| 2353 | blank lines.
|
---|
| 2354 |
|
---|
| 2355 | With the given example, lines 2 and 4 will be skipped.
|
---|
| 2356 |
|
---|
| 2357 | =item not_empty
|
---|
| 2358 |
|
---|
| 2359 | Filter out lines where all the fields are empty.
|
---|
| 2360 |
|
---|
| 2361 | This filter is a shortcut for
|
---|
| 2362 |
|
---|
| 2363 | filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
|
---|
| 2364 |
|
---|
| 2365 | A space is not regarded being empty, so given the example data, lines 2, 3,
|
---|
| 2366 | 4, 5, and 7 are skipped.
|
---|
| 2367 |
|
---|
| 2368 | =item filled
|
---|
| 2369 |
|
---|
| 2370 | Filter out lines that have no visible data
|
---|
| 2371 |
|
---|
| 2372 | This filter is a shortcut for
|
---|
| 2373 |
|
---|
| 2374 | filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
|
---|
| 2375 |
|
---|
| 2376 | This filter rejects all lines that I<not> have at least one field that does
|
---|
| 2377 | not evaluate to the empty string.
|
---|
| 2378 |
|
---|
| 2379 | With the given example data, this filter would skip lines 2 through 8.
|
---|
| 2380 |
|
---|
| 2381 | =back
|
---|
| 2382 |
|
---|
| 2383 | =back
|
---|
| 2384 |
|
---|
| 2385 | =item after_in
|
---|
| 2386 |
|
---|
| 2387 | This callback is invoked for each record after all records have been parsed
|
---|
| 2388 | but before returning the reference to the caller. The hook is invoked with
|
---|
| 2389 | two arguments: the current C<CSV> parser object and a reference to the
|
---|
| 2390 | record. The reference can be a reference to a HASH or a reference to an
|
---|
| 2391 | ARRAY as determined by the arguments.
|
---|
| 2392 |
|
---|
| 2393 | This callback can also be passed as an attribute without the C<callbacks>
|
---|
| 2394 | wrapper.
|
---|
| 2395 |
|
---|
| 2396 | =item before_out
|
---|
| 2397 |
|
---|
| 2398 | This callback is invoked for each record before the record is printed. The
|
---|
| 2399 | hook is invoked with two arguments: the current C<CSV> parser object and a
|
---|
| 2400 | reference to the record. The reference can be a reference to a HASH or a
|
---|
| 2401 | reference to an ARRAY as determined by the arguments.
|
---|
| 2402 |
|
---|
| 2403 | This callback can also be passed as an attribute without the C<callbacks>
|
---|
| 2404 | wrapper.
|
---|
| 2405 |
|
---|
| 2406 | This callback makes the row available in C<%_> if the row is a hashref. In
|
---|
| 2407 | this case C<%_> is writable and will change the original row.
|
---|
| 2408 |
|
---|
| 2409 | =item on_in
|
---|
| 2410 |
|
---|
| 2411 | This callback acts exactly as the L</after_in> or the L</before_out> hooks.
|
---|
| 2412 |
|
---|
| 2413 | This callback can also be passed as an attribute without the C<callbacks>
|
---|
| 2414 | wrapper.
|
---|
| 2415 |
|
---|
| 2416 | This callback makes the row available in C<%_> if the row is a hashref. In
|
---|
| 2417 | this case C<%_> is writable and will change the original row. So e.g. with
|
---|
| 2418 |
|
---|
| 2419 | my $aoh = csv (
|
---|
| 2420 | in => \"foo\n1\n2\n",
|
---|
| 2421 | headers => "auto",
|
---|
| 2422 | on_in => sub { $_{bar} = 2; },
|
---|
| 2423 | );
|
---|
| 2424 |
|
---|
| 2425 | C<$aoh> will be:
|
---|
| 2426 |
|
---|
| 2427 | [ { foo => 1,
|
---|
| 2428 | bar => 2,
|
---|
| 2429 | }
|
---|
| 2430 | { foo => 2,
|
---|
| 2431 | bar => 2,
|
---|
| 2432 | }
|
---|
| 2433 | ]
|
---|
| 2434 |
|
---|
| 2435 | =item csv
|
---|
| 2436 |
|
---|
| 2437 | The I<function> L</csv> can also be called as a method or with an existing
|
---|
| 2438 | Text::CSV object. This could help if the function is to be invoked a lot
|
---|
| 2439 | of times and the overhead of creating the object internally over and over
|
---|
| 2440 | again would be prevented by passing an existing instance.
|
---|
| 2441 |
|
---|
| 2442 | my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
|
---|
| 2443 |
|
---|
| 2444 | my $aoa = $csv->csv (in => $fh);
|
---|
| 2445 | my $aoa = csv (in => $fh, csv => $csv);
|
---|
| 2446 |
|
---|
| 2447 | both act the same. Running this 20000 times on a 20 lines CSV file, showed
|
---|
| 2448 | a 53% speedup.
|
---|
| 2449 |
|
---|
| 2450 | =back
|
---|
| 2451 |
|
---|
| 2452 | =head1 DIAGNOSTICS
|
---|
| 2453 |
|
---|
| 2454 | This section is also taken from Text::CSV_XS.
|
---|
| 2455 |
|
---|
| 2456 | Still under construction ...
|
---|
| 2457 |
|
---|
| 2458 | If an error occurs, C<< $csv->error_diag >> can be used to get information
|
---|
| 2459 | on the cause of the failure. Note that for speed reasons the internal value
|
---|
| 2460 | is never cleared on success, so using the value returned by L</error_diag>
|
---|
| 2461 | in normal cases - when no error occurred - may cause unexpected results.
|
---|
| 2462 |
|
---|
| 2463 | If the constructor failed, the cause can be found using L</error_diag> as a
|
---|
| 2464 | class method, like C<< Text::CSV->error_diag >>.
|
---|
| 2465 |
|
---|
| 2466 | The C<< $csv->error_diag >> method is automatically invoked upon error when
|
---|
| 2467 | the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
|
---|
| 2468 | C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
|
---|
| 2469 | C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
|
---|
| 2470 | EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
|
---|
| 2471 |
|
---|
| 2472 | Errors can be (individually) caught using the L</error> callback.
|
---|
| 2473 |
|
---|
| 2474 | The errors as described below are available. I have tried to make the error
|
---|
| 2475 | itself explanatory enough, but more descriptions will be added. For most of
|
---|
| 2476 | these errors, the first three capitals describe the error category:
|
---|
| 2477 |
|
---|
| 2478 | =over 2
|
---|
| 2479 |
|
---|
| 2480 | =item *
|
---|
| 2481 | INI
|
---|
| 2482 |
|
---|
| 2483 | Initialization error or option conflict.
|
---|
| 2484 |
|
---|
| 2485 | =item *
|
---|
| 2486 | ECR
|
---|
| 2487 |
|
---|
| 2488 | Carriage-Return related parse error.
|
---|
| 2489 |
|
---|
| 2490 | =item *
|
---|
| 2491 | EOF
|
---|
| 2492 |
|
---|
| 2493 | End-Of-File related parse error.
|
---|
| 2494 |
|
---|
| 2495 | =item *
|
---|
| 2496 | EIQ
|
---|
| 2497 |
|
---|
| 2498 | Parse error inside quotation.
|
---|
| 2499 |
|
---|
| 2500 | =item *
|
---|
| 2501 | EIF
|
---|
| 2502 |
|
---|
| 2503 | Parse error inside field.
|
---|
| 2504 |
|
---|
| 2505 | =item *
|
---|
| 2506 | ECB
|
---|
| 2507 |
|
---|
| 2508 | Combine error.
|
---|
| 2509 |
|
---|
| 2510 | =item *
|
---|
| 2511 | EHR
|
---|
| 2512 |
|
---|
| 2513 | HashRef parse related error.
|
---|
| 2514 |
|
---|
| 2515 | =back
|
---|
| 2516 |
|
---|
| 2517 | And below should be the complete list of error codes that can be returned:
|
---|
| 2518 |
|
---|
| 2519 | =over 2
|
---|
| 2520 |
|
---|
| 2521 | =item *
|
---|
| 2522 | 1001 "INI - sep_char is equal to quote_char or escape_char"
|
---|
| 2523 |
|
---|
| 2524 | The L<separation character|/sep_char> cannot be equal to L<the quotation
|
---|
| 2525 | character|/quote_char> or to L<the escape character|/escape_char>, as this
|
---|
| 2526 | would invalidate all parsing rules.
|
---|
| 2527 |
|
---|
| 2528 | =item *
|
---|
| 2529 | 1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
|
---|
| 2530 |
|
---|
| 2531 | Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
|
---|
| 2532 | L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
|
---|
| 2533 | C<SPACE> or C<TAB> is too ambiguous to allow.
|
---|
| 2534 |
|
---|
| 2535 | =item *
|
---|
| 2536 | 1003 "INI - \r or \n in main attr not allowed"
|
---|
| 2537 |
|
---|
| 2538 | Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
|
---|
| 2539 | L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
|
---|
| 2540 | allowed.
|
---|
| 2541 |
|
---|
| 2542 | =item *
|
---|
| 2543 | 1004 "INI - callbacks should be undef or a hashref"
|
---|
| 2544 |
|
---|
| 2545 | The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
|
---|
| 2546 | a hash reference.
|
---|
| 2547 |
|
---|
| 2548 | =item *
|
---|
| 2549 | 1005 "INI - EOL too long"
|
---|
| 2550 |
|
---|
| 2551 | The value passed for EOL is exceeding its maximum length (16).
|
---|
| 2552 |
|
---|
| 2553 | =item *
|
---|
| 2554 | 1006 "INI - SEP too long"
|
---|
| 2555 |
|
---|
| 2556 | The value passed for SEP is exceeding its maximum length (16).
|
---|
| 2557 |
|
---|
| 2558 | =item *
|
---|
| 2559 | 1007 "INI - QUOTE too long"
|
---|
| 2560 |
|
---|
| 2561 | The value passed for QUOTE is exceeding its maximum length (16).
|
---|
| 2562 |
|
---|
| 2563 | =item *
|
---|
| 2564 | 1008 "INI - SEP undefined"
|
---|
| 2565 |
|
---|
| 2566 | The value passed for SEP should be defined and not empty.
|
---|
| 2567 |
|
---|
| 2568 | =item *
|
---|
| 2569 | 1010 "INI - the header is empty"
|
---|
| 2570 |
|
---|
| 2571 | The header line parsed in the L</header> is empty.
|
---|
| 2572 |
|
---|
| 2573 | =item *
|
---|
| 2574 | 1011 "INI - the header contains more than one valid separator"
|
---|
| 2575 |
|
---|
| 2576 | The header line parsed in the L</header> contains more than one (unique)
|
---|
| 2577 | separator character out of the allowed set of separators.
|
---|
| 2578 |
|
---|
| 2579 | =item *
|
---|
| 2580 | 1012 "INI - the header contains an empty field"
|
---|
| 2581 |
|
---|
| 2582 | The header line parsed in the L</header> is contains an empty field.
|
---|
| 2583 |
|
---|
| 2584 | =item *
|
---|
| 2585 | 1013 "INI - the header contains nun-unique fields"
|
---|
| 2586 |
|
---|
| 2587 | The header line parsed in the L</header> contains at least two identical
|
---|
| 2588 | fields.
|
---|
| 2589 |
|
---|
| 2590 | =item *
|
---|
| 2591 | 1014 "INI - header called on undefined stream"
|
---|
| 2592 |
|
---|
| 2593 | The header line cannot be parsed from an undefined sources.
|
---|
| 2594 |
|
---|
| 2595 | =item *
|
---|
| 2596 | 1500 "PRM - Invalid/unsupported argument(s)"
|
---|
| 2597 |
|
---|
| 2598 | Function or method called with invalid argument(s) or parameter(s).
|
---|
| 2599 |
|
---|
| 2600 | =item *
|
---|
| 2601 | 1501 "PRM - The key attribute is passed as an unsupported type"
|
---|
| 2602 |
|
---|
| 2603 | The C<key> attribute is of an unsupported type.
|
---|
| 2604 |
|
---|
| 2605 | =item *
|
---|
| 2606 | 1502 "PRM - The value attribute is passed without the key attribute"
|
---|
| 2607 |
|
---|
| 2608 | The C<value> attribute is only allowed when a valid key is given.
|
---|
| 2609 |
|
---|
| 2610 | =item *
|
---|
| 2611 | 1503 "PRM - The value attribute is passed as an unsupported type"
|
---|
| 2612 |
|
---|
| 2613 | The C<value> attribute is of an unsupported type.
|
---|
| 2614 |
|
---|
| 2615 | =item *
|
---|
| 2616 | 2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
|
---|
| 2617 |
|
---|
| 2618 | When L<C<eol>|/eol> has been set to anything but the default, like
|
---|
| 2619 | C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
|
---|
| 2620 | L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
|
---|
| 2621 | not make up the L<C<eol>|/eol> sequence, this is an error.
|
---|
| 2622 |
|
---|
| 2623 | =item *
|
---|
| 2624 | 2011 "ECR - Characters after end of quoted field"
|
---|
| 2625 |
|
---|
| 2626 | Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
|
---|
| 2627 | field and after the closing double-quote, there should be either a new-line
|
---|
| 2628 | sequence or a separation character.
|
---|
| 2629 |
|
---|
| 2630 | =item *
|
---|
| 2631 | 2012 "EOF - End of data in parsing input stream"
|
---|
| 2632 |
|
---|
| 2633 | Self-explaining. End-of-file while inside parsing a stream. Can happen only
|
---|
| 2634 | when reading from streams with L</getline>, as using L</parse> is done on
|
---|
| 2635 | strings that are not required to have a trailing L<C<eol>|/eol>.
|
---|
| 2636 |
|
---|
| 2637 | =item *
|
---|
| 2638 | 2013 "INI - Specification error for fragments RFC7111"
|
---|
| 2639 |
|
---|
| 2640 | Invalid specification for URI L</fragment> specification.
|
---|
| 2641 |
|
---|
| 2642 | =item *
|
---|
| 2643 | 2014 "ENF - Inconsistent number of fields"
|
---|
| 2644 |
|
---|
| 2645 | Inconsistent number of fields under strict parsing.
|
---|
| 2646 |
|
---|
| 2647 | =item *
|
---|
| 2648 | 2021 "EIQ - NL char inside quotes, binary off"
|
---|
| 2649 |
|
---|
| 2650 | Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
|
---|
| 2651 | has been selected with the constructor.
|
---|
| 2652 |
|
---|
| 2653 | =item *
|
---|
| 2654 | 2022 "EIQ - CR char inside quotes, binary off"
|
---|
| 2655 |
|
---|
| 2656 | Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
|
---|
| 2657 | has been selected with the constructor.
|
---|
| 2658 |
|
---|
| 2659 | =item *
|
---|
| 2660 | 2023 "EIQ - QUO character not allowed"
|
---|
| 2661 |
|
---|
| 2662 | Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
|
---|
| 2663 | will cause this error.
|
---|
| 2664 |
|
---|
| 2665 | =item *
|
---|
| 2666 | 2024 "EIQ - EOF cannot be escaped, not even inside quotes"
|
---|
| 2667 |
|
---|
| 2668 | The escape character is not allowed as last character in an input stream.
|
---|
| 2669 |
|
---|
| 2670 | =item *
|
---|
| 2671 | 2025 "EIQ - Loose unescaped escape"
|
---|
| 2672 |
|
---|
| 2673 | An escape character should escape only characters that need escaping.
|
---|
| 2674 |
|
---|
| 2675 | Allowing the escape for other characters is possible with the attribute
|
---|
| 2676 | L</allow_loose_escape>.
|
---|
| 2677 |
|
---|
| 2678 | =item *
|
---|
| 2679 | 2026 "EIQ - Binary character inside quoted field, binary off"
|
---|
| 2680 |
|
---|
| 2681 | Binary characters are not allowed by default. Exceptions are fields that
|
---|
| 2682 | contain valid UTF-8, that will automatically be upgraded if the content is
|
---|
| 2683 | valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
|
---|
| 2684 |
|
---|
| 2685 | =item *
|
---|
| 2686 | 2027 "EIQ - Quoted field not terminated"
|
---|
| 2687 |
|
---|
| 2688 | When parsing a field that started with a quotation character, the field is
|
---|
| 2689 | expected to be closed with a quotation character. When the parsed line is
|
---|
| 2690 | exhausted before the quote is found, that field is not terminated.
|
---|
| 2691 |
|
---|
| 2692 | =item *
|
---|
| 2693 | 2030 "EIF - NL char inside unquoted verbatim, binary off"
|
---|
| 2694 |
|
---|
| 2695 | =item *
|
---|
| 2696 | 2031 "EIF - CR char is first char of field, not part of EOL"
|
---|
| 2697 |
|
---|
| 2698 | =item *
|
---|
| 2699 | 2032 "EIF - CR char inside unquoted, not part of EOL"
|
---|
| 2700 |
|
---|
| 2701 | =item *
|
---|
| 2702 | 2034 "EIF - Loose unescaped quote"
|
---|
| 2703 |
|
---|
| 2704 | =item *
|
---|
| 2705 | 2035 "EIF - Escaped EOF in unquoted field"
|
---|
| 2706 |
|
---|
| 2707 | =item *
|
---|
| 2708 | 2036 "EIF - ESC error"
|
---|
| 2709 |
|
---|
| 2710 | =item *
|
---|
| 2711 | 2037 "EIF - Binary character in unquoted field, binary off"
|
---|
| 2712 |
|
---|
| 2713 | =item *
|
---|
| 2714 | 2110 "ECB - Binary character in Combine, binary off"
|
---|
| 2715 |
|
---|
| 2716 | =item *
|
---|
| 2717 | 2200 "EIO - print to IO failed. See errno"
|
---|
| 2718 |
|
---|
| 2719 | =item *
|
---|
| 2720 | 3001 "EHR - Unsupported syntax for column_names ()"
|
---|
| 2721 |
|
---|
| 2722 | =item *
|
---|
| 2723 | 3002 "EHR - getline_hr () called before column_names ()"
|
---|
| 2724 |
|
---|
| 2725 | =item *
|
---|
| 2726 | 3003 "EHR - bind_columns () and column_names () fields count mismatch"
|
---|
| 2727 |
|
---|
| 2728 | =item *
|
---|
| 2729 | 3004 "EHR - bind_columns () only accepts refs to scalars"
|
---|
| 2730 |
|
---|
| 2731 | =item *
|
---|
| 2732 | 3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
|
---|
| 2733 |
|
---|
| 2734 | =item *
|
---|
| 2735 | 3007 "EHR - bind_columns needs refs to writable scalars"
|
---|
| 2736 |
|
---|
| 2737 | =item *
|
---|
| 2738 | 3008 "EHR - unexpected error in bound fields"
|
---|
| 2739 |
|
---|
| 2740 | =item *
|
---|
| 2741 | 3009 "EHR - print_hr () called before column_names ()"
|
---|
| 2742 |
|
---|
| 2743 | =item *
|
---|
| 2744 | 3010 "EHR - print_hr () called with invalid arguments"
|
---|
| 2745 |
|
---|
| 2746 | =back
|
---|
| 2747 |
|
---|
| 2748 | =head1 SEE ALSO
|
---|
| 2749 |
|
---|
| 2750 | L<Text::CSV_PP>, L<Text::CSV_XS> and L<Text::CSV::Encoded>.
|
---|
| 2751 |
|
---|
| 2752 |
|
---|
| 2753 | =head1 AUTHORS and MAINTAINERS
|
---|
| 2754 |
|
---|
| 2755 | Alan Citterman F<E<lt>alan[at]mfgrtl.comE<gt>> wrote the original Perl
|
---|
| 2756 | module. Please don't send mail concerning Text::CSV to Alan, as
|
---|
| 2757 | he's not a present maintainer.
|
---|
| 2758 |
|
---|
| 2759 | Jochen Wiedmann F<E<lt>joe[at]ispsoft.deE<gt>> rewrote the encoding and
|
---|
| 2760 | decoding in C by implementing a simple finite-state machine and added
|
---|
| 2761 | the variable quote, escape and separator characters, the binary mode
|
---|
| 2762 | and the print and getline methods. See ChangeLog releases 0.10 through
|
---|
| 2763 | 0.23.
|
---|
| 2764 |
|
---|
| 2765 | H.Merijn Brand F<E<lt>h.m.brand[at]xs4all.nlE<gt>> cleaned up the code,
|
---|
| 2766 | added the field flags methods, wrote the major part of the test suite,
|
---|
| 2767 | completed the documentation, fixed some RT bugs. See ChangeLog releases
|
---|
| 2768 | 0.25 and on.
|
---|
| 2769 |
|
---|
| 2770 | Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> wrote Text::CSV_PP
|
---|
| 2771 | which is the pure-Perl version of Text::CSV_XS.
|
---|
| 2772 |
|
---|
| 2773 | New Text::CSV (since 0.99) is maintained by Makamaka, and Kenichi Ishigaki
|
---|
| 2774 | since 1.91.
|
---|
| 2775 |
|
---|
| 2776 |
|
---|
| 2777 | =head1 COPYRIGHT AND LICENSE
|
---|
| 2778 |
|
---|
| 2779 | Text::CSV
|
---|
| 2780 |
|
---|
| 2781 | Copyright (C) 1997 Alan Citterman. All rights reserved.
|
---|
| 2782 | Copyright (C) 2007-2015 Makamaka Hannyaharamitu.
|
---|
| 2783 | Copyright (C) 2017- Kenichi Ishigaki
|
---|
| 2784 | A large portion of the doc is taken from Text::CSV_XS. See below.
|
---|
| 2785 |
|
---|
| 2786 | Text::CSV_PP:
|
---|
| 2787 |
|
---|
| 2788 | Copyright (C) 2005-2015 Makamaka Hannyaharamitu.
|
---|
| 2789 | Copyright (C) 2017- Kenichi Ishigaki
|
---|
| 2790 | A large portion of the code/doc are also taken from Text::CSV_XS. See below.
|
---|
| 2791 |
|
---|
| 2792 | Text:CSV_XS:
|
---|
| 2793 |
|
---|
| 2794 | Copyright (C) 2007-2016 H.Merijn Brand for PROCURA B.V.
|
---|
| 2795 | Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
|
---|
| 2796 | Portions Copyright (C) 1997 Alan Citterman. All rights reserved.
|
---|
| 2797 |
|
---|
| 2798 |
|
---|
| 2799 | This library is free software; you can redistribute it and/or modify
|
---|
| 2800 | it under the same terms as Perl itself.
|
---|
| 2801 |
|
---|
| 2802 | =cut
|
---|