source: main/trunk/greenstone2/perllib/cpan/Text/CSV_PP.pm@ 33235

Last change on this file since 33235 was 33235, checked in by davidb, 5 years ago

CPAN module for processing CSV files

File size: 168.2 KB
Line 
1package Text::CSV_PP;
2
3################################################################################
4#
5# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
6#
7################################################################################
8require 5.006001;
9
10use strict;
11use Exporter ();
12use vars qw($VERSION @ISA @EXPORT_OK);
13use Carp;
14
15$VERSION = '1.99';
16@ISA = qw(Exporter);
17@EXPORT_OK = qw(csv);
18
19sub PV { 0 }
20sub IV { 1 }
21sub NV { 2 }
22
23sub IS_QUOTED () { 0x0001; }
24sub IS_BINARY () { 0x0002; }
25sub IS_ERROR () { 0x0004; }
26sub IS_MISSING () { 0x0010; }
27
28sub HOOK_ERROR () { 0x0001; }
29sub HOOK_AFTER_PARSE () { 0x0002; }
30sub HOOK_BEFORE_PRINT () { 0x0004; }
31
32sub useIO_EOF () { 0x0010; }
33
34my $ERRORS = {
35 # Generic errors
36 1000 => "INI - constructor failed",
37 1001 => "INI - sep_char is equal to quote_char or escape_char",
38 1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
39 1003 => "INI - \\r or \\n in main attr not allowed",
40 1004 => "INI - callbacks should be undef or a hashref",
41 1005 => "INI - EOL too long",
42 1006 => "INI - SEP too long",
43 1007 => "INI - QUOTE too long",
44 1008 => "INI - SEP undefined",
45
46 1010 => "INI - the header is empty",
47 1011 => "INI - the header contains more than one valid separator",
48 1012 => "INI - the header contains an empty field",
49 1013 => "INI - the header contains nun-unique fields",
50 1014 => "INI - header called on undefined stream",
51
52 # Syntax errors
53 1500 => "PRM - Invalid/unsupported arguments(s)",
54 1501 => "PRM - The key attribute is passed as an unsupported type",
55 1502 => "PRM - The value attribute is passed without the key attribute",
56 1503 => "PRM - The value attribute is passed as an unsupported type",
57
58 # Parse errors
59 2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
60 2011 => "ECR - Characters after end of quoted field",
61 2012 => "EOF - End of data in parsing input stream",
62 2013 => "ESP - Specification error for fragments RFC7111",
63 2014 => "ENF - Inconsistent number of fields",
64
65 # EIQ - Error Inside Quotes
66 2021 => "EIQ - NL char inside quotes, binary off",
67 2022 => "EIQ - CR char inside quotes, binary off",
68 2023 => "EIQ - QUO character not allowed",
69 2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
70 2025 => "EIQ - Loose unescaped escape",
71 2026 => "EIQ - Binary character inside quoted field, binary off",
72 2027 => "EIQ - Quoted field not terminated",
73
74 # EIF - Error Inside Field
75 2030 => "EIF - NL char inside unquoted verbatim, binary off",
76 2031 => "EIF - CR char is first char of field, not part of EOL",
77 2032 => "EIF - CR char inside unquoted, not part of EOL",
78 2034 => "EIF - Loose unescaped quote",
79 2035 => "EIF - Escaped EOF in unquoted field",
80 2036 => "EIF - ESC error",
81 2037 => "EIF - Binary character in unquoted field, binary off",
82
83 # Combine errors
84 2110 => "ECB - Binary character in Combine, binary off",
85
86 # IO errors
87 2200 => "EIO - print to IO failed. See errno",
88
89 # Hash-Ref errors
90 3001 => "EHR - Unsupported syntax for column_names ()",
91 3002 => "EHR - getline_hr () called before column_names ()",
92 3003 => "EHR - bind_columns () and column_names () fields count mismatch",
93 3004 => "EHR - bind_columns () only accepts refs to scalars",
94 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
95 3007 => "EHR - bind_columns needs refs to writable scalars",
96 3008 => "EHR - unexpected error in bound fields",
97 3009 => "EHR - print_hr () called before column_names ()",
98 3010 => "EHR - print_hr () called with invalid arguments",
99
100 4001 => "PRM - The key does not exist as field in the data",
101
102 0 => "",
103};
104
105BEGIN {
106 if ( $] < 5.006 ) {
107 $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
108 no strict 'refs';
109 *{"utf8::is_utf8"} = sub { 0; };
110 *{"utf8::decode"} = sub { };
111 }
112 elsif ( $] < 5.008 ) {
113 no strict 'refs';
114 *{"utf8::is_utf8"} = sub { 0; };
115 *{"utf8::decode"} = sub { };
116 *{"utf8::encode"} = sub { };
117 }
118 elsif ( !defined &utf8::is_utf8 ) {
119 require Encode;
120 *utf8::is_utf8 = *Encode::is_utf8;
121 }
122
123 eval q| require Scalar::Util |;
124 if ( $@ ) {
125 eval q| require B |;
126 if ( $@ ) {
127 Carp::croak $@;
128 }
129 else {
130 my %tmap = qw(
131 B::NULL SCALAR
132 B::HV HASH
133 B::AV ARRAY
134 B::CV CODE
135 B::IO IO
136 B::GV GLOB
137 B::REGEXP REGEXP
138 );
139 *Scalar::Util::reftype = sub (\$) {
140 my $r = shift;
141 return undef unless length(ref($r));
142 my $t = ref(B::svref_2object($r));
143 return
144 exists $tmap{$t} ? $tmap{$t}
145 : length(ref($$r)) ? 'REF'
146 : 'SCALAR';
147 };
148 *Scalar::Util::readonly = sub (\$) {
149 my $b = B::svref_2object( $_[0] );
150 $b->FLAGS & 0x00800000; # SVf_READONLY?
151 };
152 }
153 }
154}
155
156################################################################################
157#
158# Common pure perl methods, taken almost directly from Text::CSV_XS.
159# (These should be moved into a common class eventually, so that
160# both XS and PP don't need to apply the same changes.)
161#
162################################################################################
163
164################################################################################
165# version
166################################################################################
167
168sub version {
169 return $VERSION;
170}
171
172################################################################################
173# new
174################################################################################
175
176my %def_attr = (
177 eol => '',
178 sep_char => ',',
179 quote_char => '"',
180 escape_char => '"',
181 binary => 0,
182 decode_utf8 => 1,
183 auto_diag => 0,
184 diag_verbose => 0,
185 strict => 0,
186 blank_is_undef => 0,
187 empty_is_undef => 0,
188 allow_whitespace => 0,
189 allow_loose_quotes => 0,
190 allow_loose_escapes => 0,
191 allow_unquoted_escape => 0,
192 always_quote => 0,
193 quote_empty => 0,
194 quote_space => 1,
195 quote_binary => 1,
196 escape_null => 1,
197 keep_meta_info => 0,
198 verbatim => 0,
199 formula => 0,
200 undef_str => undef,
201 types => undef,
202 callbacks => undef,
203
204 _EOF => 0,
205 _RECNO => 0,
206 _STATUS => undef,
207 _FIELDS => undef,
208 _FFLAGS => undef,
209 _STRING => undef,
210 _ERROR_INPUT => undef,
211 _COLUMN_NAMES => undef,
212 _BOUND_COLUMNS => undef,
213 _AHEAD => undef,
214
215 ENCODING => undef,
216);
217
218my %attr_alias = (
219 quote_always => "always_quote",
220 verbose_diag => "diag_verbose",
221 quote_null => "escape_null",
222 escape => "escape_char",
223 );
224
225my $last_new_error = Text::CSV_PP->SetDiag(0);
226my $last_error;
227
228# NOT a method: is also used before bless
229sub _unhealthy_whitespace {
230 my ($self, $aw) = @_;
231 $aw or return 0; # no checks needed without allow_whitespace
232
233 my $quo = $self->{quote};
234 defined $quo && length ($quo) or $quo = $self->{quote_char};
235 my $esc = $self->{escape_char};
236
237 defined $quo && $quo =~ m/^[ \t]/ and return 1002;
238 defined $esc && $esc =~ m/^[ \t]/ and return 1002;
239
240 return 0;
241 }
242
243sub _check_sanity {
244 my $self = shift;
245
246 my $eol = $self->{eol};
247 my $sep = $self->{sep};
248 defined $sep && length ($sep) or $sep = $self->{sep_char};
249 my $quo = $self->{quote};
250 defined $quo && length ($quo) or $quo = $self->{quote_char};
251 my $esc = $self->{escape_char};
252
253# use DP;::diag ("SEP: '", DPeek ($sep),
254# "', QUO: '", DPeek ($quo),
255# "', ESC: '", DPeek ($esc),"'");
256
257 # sep_char should not be undefined
258 $sep ne "" or return 1008;
259 length ($sep) > 16 and return 1006;
260 $sep =~ m/[\r\n]/ and return 1003;
261
262 if (defined $quo) {
263 $quo eq $sep and return 1001;
264 length ($quo) > 16 and return 1007;
265 $quo =~ m/[\r\n]/ and return 1003;
266 }
267 if (defined $esc) {
268 $esc eq $sep and return 1001;
269 $esc =~ m/[\r\n]/ and return 1003;
270 }
271 if (defined $eol) {
272 length ($eol) > 16 and return 1005;
273 }
274
275 return _unhealthy_whitespace ($self, $self->{allow_whitespace});
276 }
277
278sub known_attributes {
279 sort grep !m/^_/ => "sep", "quote", keys %def_attr;
280 }
281
282sub new {
283 $last_new_error = Text::CSV_PP->SetDiag(1000,
284 'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);');
285
286 my $proto = shift;
287 my $class = ref ($proto) || $proto or return;
288 @_ > 0 && ref $_[0] ne "HASH" and return;
289 my $attr = shift || {};
290 my %attr = map {
291 my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
292 exists $attr_alias{$k} and $k = $attr_alias{$k};
293 $k => $attr->{$_};
294 } keys %$attr;
295
296 my $sep_aliased = 0;
297 if (exists $attr{sep}) {
298 $attr{sep_char} = delete $attr{sep};
299 $sep_aliased = 1;
300 }
301 my $quote_aliased = 0;
302 if (exists $attr{quote}) {
303 $attr{quote_char} = delete $attr{quote};
304 $quote_aliased = 1;
305 }
306 exists $attr{formula_handling} and
307 $attr{formula} = delete $attr{formula_handling};
308 exists $attr{formula} and
309 $attr{formula} = _supported_formula (undef, $attr{formula});
310 for (keys %attr) {
311 if (m/^[a-z]/ && exists $def_attr{$_}) {
312 # uncoverable condition false
313 defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
314 next;
315 }
316# croak?
317 $last_new_error = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
318 $attr{auto_diag} and error_diag ();
319 return;
320 }
321 if ($sep_aliased) {
322 my @b = unpack "U0C*", $attr{sep_char};
323 if (@b > 1) {
324 $attr{sep} = $attr{sep_char};
325 $attr{sep_char} = "\0";
326 }
327 else {
328 $attr{sep} = undef;
329 }
330 }
331 if ($quote_aliased and defined $attr{quote_char}) {
332 my @b = unpack "U0C*", $attr{quote_char};
333 if (@b > 1) {
334 $attr{quote} = $attr{quote_char};
335 $attr{quote_char} = "\0";
336 }
337 else {
338 $attr{quote} = undef;
339 }
340 }
341
342 my $self = { %def_attr, %attr };
343 if (my $ec = _check_sanity ($self)) {
344 $last_new_error = Text::CSV_PP->SetDiag($ec);
345 $attr{auto_diag} and error_diag ();
346 return;
347 }
348 if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
349 Carp::carp "The 'callbacks' attribute is set but is not a hash: ignored\n";
350 $self->{callbacks} = undef;
351 }
352
353 $last_new_error = Text::CSV_PP->SetDiag(0);
354 defined $\ && !exists $attr{eol} and $self->{eol} = $\;
355 bless $self, $class;
356 defined $self->{types} and $self->types ($self->{types});
357 $self;
358}
359
360# Keep in sync with XS!
361my %_cache_id = ( # Only expose what is accessed from within PM
362 quote_char => 0,
363 escape_char => 1,
364 sep_char => 2,
365 sep => 39, # 39 .. 55
366 binary => 3,
367 keep_meta_info => 4,
368 always_quote => 5,
369 allow_loose_quotes => 6,
370 allow_loose_escapes => 7,
371 allow_unquoted_escape => 8,
372 allow_whitespace => 9,
373 blank_is_undef => 10,
374 eol => 11,
375 quote => 15,
376 verbatim => 22,
377 empty_is_undef => 23,
378 auto_diag => 24,
379 diag_verbose => 33,
380 quote_space => 25,
381 quote_empty => 37,
382 quote_binary => 32,
383 escape_null => 31,
384 decode_utf8 => 35,
385 _has_ahead => 30,
386 _has_hooks => 36,
387 _is_bound => 26, # 26 .. 29
388 formula => 38,
389 strict => 42,
390 undef_str => 46,
391 );
392
393my %_hidden_cache_id = qw(
394 sep_len 38
395 eol_len 12
396 eol_is_cr 13
397 quo_len 16
398 has_error_input 34
399);
400
401my %_reverse_cache_id = (
402 map({$_cache_id{$_} => $_} keys %_cache_id),
403 map({$_hidden_cache_id{$_} => $_} keys %_hidden_cache_id),
404);
405
406# A `character'
407sub _set_attr_C {
408 my ($self, $name, $val, $ec) = @_;
409 defined $val or $val = 0;
410 utf8::decode ($val);
411 $self->{$name} = $val;
412 $ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
413 $self->_cache_set ($_cache_id{$name}, $val);
414 }
415
416# A flag
417sub _set_attr_X {
418 my ($self, $name, $val) = @_;
419 defined $val or $val = 0;
420 $self->{$name} = $val;
421 $self->_cache_set ($_cache_id{$name}, 0 + $val);
422 }
423
424# A number
425sub _set_attr_N {
426 my ($self, $name, $val) = @_;
427 $self->{$name} = $val;
428 $self->_cache_set ($_cache_id{$name}, 0 + $val);
429 }
430
431# Accessor methods.
432# It is unwise to change them halfway through a single file!
433sub quote_char {
434 my $self = shift;
435 if (@_) {
436 $self->_set_attr_C ("quote_char", shift);
437 $self->_cache_set ($_cache_id{quote}, "");
438 }
439 $self->{quote_char};
440 }
441
442sub quote {
443 my $self = shift;
444 if (@_) {
445 my $quote = shift;
446 defined $quote or $quote = "";
447 utf8::decode ($quote);
448 my @b = unpack "U0C*", $quote;
449 if (@b > 1) {
450 @b > 16 and croak ($self->SetDiag (1007));
451 $self->quote_char ("\0");
452 }
453 else {
454 $self->quote_char ($quote);
455 $quote = "";
456 }
457 $self->{quote} = $quote;
458
459 my $ec = _check_sanity ($self);
460 $ec and croak ($self->SetDiag ($ec));
461
462 $self->_cache_set ($_cache_id{quote}, $quote);
463 }
464 my $quote = $self->{quote};
465 defined $quote && length ($quote) ? $quote : $self->{quote_char};
466 }
467
468sub escape_char {
469 my $self = shift;
470 if (@_) {
471 my $ec = shift;
472 $self->_set_attr_C ("escape_char", $ec);
473 $ec or $self->_set_attr_X ("escape_null", 0);
474 }
475 $self->{escape_char};
476 }
477
478sub sep_char {
479 my $self = shift;
480 if (@_) {
481 $self->_set_attr_C ("sep_char", shift);
482 $self->_cache_set ($_cache_id{sep}, "");
483 }
484 $self->{sep_char};
485}
486
487sub sep {
488 my $self = shift;
489 if (@_) {
490 my $sep = shift;
491 defined $sep or $sep = "";
492 utf8::decode ($sep);
493 my @b = unpack "U0C*", $sep;
494 if (@b > 1) {
495 @b > 16 and croak ($self->SetDiag (1006));
496 $self->sep_char ("\0");
497 }
498 else {
499 $self->sep_char ($sep);
500 $sep = "";
501 }
502 $self->{sep} = $sep;
503
504 my $ec = _check_sanity ($self);
505 $ec and croak ($self->SetDiag ($ec));
506
507 $self->_cache_set ($_cache_id{sep}, $sep);
508 }
509 my $sep = $self->{sep};
510 defined $sep && length ($sep) ? $sep : $self->{sep_char};
511 }
512
513sub eol {
514 my $self = shift;
515 if (@_) {
516 my $eol = shift;
517 defined $eol or $eol = "";
518 length ($eol) > 16 and croak ($self->SetDiag (1005));
519 $self->{eol} = $eol;
520 $self->_cache_set ($_cache_id{eol}, $eol);
521 }
522 $self->{eol};
523 }
524
525sub always_quote {
526 my $self = shift;
527 @_ and $self->_set_attr_X ("always_quote", shift);
528 $self->{always_quote};
529 }
530
531sub quote_space {
532 my $self = shift;
533 @_ and $self->_set_attr_X ("quote_space", shift);
534 $self->{quote_space};
535 }
536
537sub quote_empty {
538 my $self = shift;
539 @_ and $self->_set_attr_X ("quote_empty", shift);
540 $self->{quote_empty};
541 }
542
543sub escape_null {
544 my $self = shift;
545 @_ and $self->_set_attr_X ("escape_null", shift);
546 $self->{escape_null};
547 }
548
549sub quote_null { goto &escape_null; }
550
551sub quote_binary {
552 my $self = shift;
553 @_ and $self->_set_attr_X ("quote_binary", shift);
554 $self->{quote_binary};
555 }
556
557sub binary {
558 my $self = shift;
559 @_ and $self->_set_attr_X ("binary", shift);
560 $self->{binary};
561 }
562
563sub strict {
564 my $self = shift;
565 @_ and $self->_set_attr_X ("strict", shift);
566 $self->{strict};
567 }
568
569sub _SetDiagInfo {
570 my ($self, $err, $msg) = @_;
571 $self->SetDiag ($err);
572 my $em = $self->error_diag;
573 $em =~ s/^\d+$// and $msg =~ s/^/# /;
574 my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
575 join $sep => grep m/\S\S\S/ => $em, $msg;
576 }
577
578sub _supported_formula {
579 my ($self, $f) = @_;
580 defined $f or return 5;
581 $f =~ m/^(?: 0 | none )$/xi ? 0 :
582 $f =~ m/^(?: 1 | die )$/xi ? 1 :
583 $f =~ m/^(?: 2 | croak )$/xi ? 2 :
584 $f =~ m/^(?: 3 | diag )$/xi ? 3 :
585 $f =~ m/^(?: 4 | empty | )$/xi ? 4 :
586 $f =~ m/^(?: 5 | undef )$/xi ? 5 : do {
587 $self ||= "Text::CSV_PP";
588 croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
589 };
590 }
591
592sub formula {
593 my $self = shift;
594 @_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
595 [qw( none die croak diag empty undef )]->[_supported_formula ($self, $self->{formula})];
596 }
597sub formula_handling {
598 my $self = shift;
599 $self->formula (@_);
600 }
601
602sub decode_utf8 {
603 my $self = shift;
604 @_ and $self->_set_attr_X ("decode_utf8", shift);
605 $self->{decode_utf8};
606}
607
608sub keep_meta_info {
609 my $self = shift;
610 if (@_) {
611 my $v = shift;
612 !defined $v || $v eq "" and $v = 0;
613 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
614 $self->_set_attr_X ("keep_meta_info", $v);
615 }
616 $self->{keep_meta_info};
617 }
618
619sub allow_loose_quotes {
620 my $self = shift;
621 @_ and $self->_set_attr_X ("allow_loose_quotes", shift);
622 $self->{allow_loose_quotes};
623 }
624
625sub allow_loose_escapes {
626 my $self = shift;
627 @_ and $self->_set_attr_X ("allow_loose_escapes", shift);
628 $self->{allow_loose_escapes};
629 }
630
631sub allow_whitespace {
632 my $self = shift;
633 if (@_) {
634 my $aw = shift;
635 _unhealthy_whitespace ($self, $aw) and
636 croak ($self->SetDiag (1002));
637 $self->_set_attr_X ("allow_whitespace", $aw);
638 }
639 $self->{allow_whitespace};
640 }
641
642sub allow_unquoted_escape {
643 my $self = shift;
644 @_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
645 $self->{allow_unquoted_escape};
646 }
647
648sub blank_is_undef {
649 my $self = shift;
650 @_ and $self->_set_attr_X ("blank_is_undef", shift);
651 $self->{blank_is_undef};
652 }
653
654sub empty_is_undef {
655 my $self = shift;
656 @_ and $self->_set_attr_X ("empty_is_undef", shift);
657 $self->{empty_is_undef};
658 }
659
660sub verbatim {
661 my $self = shift;
662 @_ and $self->_set_attr_X ("verbatim", shift);
663 $self->{verbatim};
664 }
665
666sub undef_str {
667 my $self = shift;
668 if (@_) {
669 my $v = shift;
670 $self->{undef_str} = defined $v ? "$v" : undef;
671 $self->_cache_set ($_cache_id{undef_str}, $self->{undef_str});
672 }
673 $self->{undef_str};
674 }
675
676sub auto_diag {
677 my $self = shift;
678 if (@_) {
679 my $v = shift;
680 !defined $v || $v eq "" and $v = 0;
681 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
682 $self->_set_attr_X ("auto_diag", $v);
683 }
684 $self->{auto_diag};
685 }
686
687sub diag_verbose {
688 my $self = shift;
689 if (@_) {
690 my $v = shift;
691 !defined $v || $v eq "" and $v = 0;
692 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
693 $self->_set_attr_X ("diag_verbose", $v);
694 }
695 $self->{diag_verbose};
696 }
697
698################################################################################
699# status
700################################################################################
701
702sub status {
703 $_[0]->{_STATUS};
704}
705
706sub eof {
707 $_[0]->{_EOF};
708}
709
710sub types {
711 my $self = shift;
712
713 if (@_) {
714 if (my $types = shift) {
715 $self->{'_types'} = join("", map{ chr($_) } @$types);
716 $self->{'types'} = $types;
717 }
718 else {
719 delete $self->{'types'};
720 delete $self->{'_types'};
721 undef;
722 }
723 }
724 else {
725 $self->{'types'};
726 }
727}
728
729sub callbacks {
730 my $self = shift;
731 if (@_) {
732 my $cb;
733 my $hf = 0x00;
734 if (defined $_[0]) {
735 grep { !defined } @_ and croak ($self->SetDiag (1004));
736 $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
737 : @_ % 2 == 0 ? { @_ }
738 : croak ($self->SetDiag (1004));
739 foreach my $cbk (keys %$cb) {
740 # A key cannot be a ref. That would be stored as the *string
741 # 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
742 $cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
743 croak ($self->SetDiag (1004));
744 }
745 exists $cb->{error} and $hf |= 0x01;
746 exists $cb->{after_parse} and $hf |= 0x02;
747 exists $cb->{before_print} and $hf |= 0x04;
748 }
749 elsif (@_ > 1) {
750 # (undef, whatever)
751 croak ($self->SetDiag (1004));
752 }
753 $self->_set_attr_X ("_has_hooks", $hf);
754 $self->{callbacks} = $cb;
755 }
756 $self->{callbacks};
757 }
758
759################################################################################
760# error_diag
761################################################################################
762
763sub error_diag {
764 my $self = shift;
765 my @diag = (0 + $last_new_error, $last_new_error, 0, 0, 0);
766
767 # Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an
768 # overridden isa method in any class. Well, that is exacly what I want here
769 if ($self && ref $self && # Not a class method or direct call
770 UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{_ERROR_DIAG}) {
771 $diag[0] = 0 + $self->{_ERROR_DIAG};
772 $diag[1] = $self->{_ERROR_DIAG};
773 $diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
774 $diag[3] = $self->{_RECNO};
775 $diag[4] = $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
776
777 $diag[0] && $self->{callbacks} && $self->{callbacks}{error} and
778 return $self->{callbacks}{error}->(@diag);
779 }
780
781 my $context = wantarray;
782
783 unless (defined $context) { # Void context, auto-diag
784 if ($diag[0] && $diag[0] != 2012) {
785 my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
786 $diag[4] and $msg =~ s/$/ field $diag[4]/;
787
788 unless ($self && ref $self) { # auto_diag
789 # called without args in void context
790 warn $msg;
791 return;
792 }
793
794 if ($self->{diag_verbose} and $self->{_ERROR_INPUT}) {
795 $msg .= "$self->{_ERROR_INPUT}'\n";
796 $msg .= " " x ($diag[2] - 1);
797 $msg .= "^\n";
798 }
799
800 my $lvl = $self->{auto_diag};
801 if ($lvl < 2) {
802 my @c = caller (2);
803 if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
804 my $hints = $c[10];
805 (exists $hints->{autodie} && $hints->{autodie} or
806 exists $hints->{"guard Fatal"} &&
807 !exists $hints->{"no Fatal"}) and
808 $lvl++;
809 # Future releases of autodie will probably set $^H{autodie}
810 # to "autodie @args", like "autodie :all" or "autodie open"
811 # so we can/should check for "open" or "new"
812 }
813 }
814 $lvl > 1 ? die $msg : warn $msg;
815 }
816 return;
817 }
818
819 return $context ? @diag : $diag[1];
820}
821
822sub record_number {
823 return shift->{_RECNO};
824}
825
826################################################################################
827# string
828################################################################################
829
830*string = \&_string;
831sub _string {
832 defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef;
833}
834
835################################################################################
836# fields
837################################################################################
838
839*fields = \&_fields;
840sub _fields {
841 ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef;
842}
843
844################################################################################
845# meta_info
846################################################################################
847
848sub meta_info {
849 $_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef;
850}
851
852sub is_quoted {
853 return unless (defined $_[0]->{_FFLAGS});
854 return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
855
856 $_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0;
857}
858
859sub is_binary {
860 return unless (defined $_[0]->{_FFLAGS});
861 return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
862 $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0;
863}
864
865sub is_missing {
866 my ($self, $idx, $val) = @_;
867 return unless $self->{keep_meta_info}; # FIXME
868 $idx < 0 || !ref $self->{_FFLAGS} and return;
869 $idx >= @{$self->{_FFLAGS}} and return 1;
870 $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0;
871}
872
873################################################################################
874# combine
875################################################################################
876*combine = \&_combine;
877sub _combine {
878 my ($self, @fields) = @_;
879 my $str = "";
880 $self->{_FIELDS} = \@fields;
881 $self->{_STATUS} = (@fields > 0) && $self->__combine(\$str, \@fields, 0);
882 $self->{_STRING} = \$str;
883 $self->{_STATUS};
884 }
885
886################################################################################
887# parse
888################################################################################
889*parse = \&_parse;
890sub _parse {
891 my ($self, $str) = @_;
892
893 ref $str and croak ($self->SetDiag (1500));
894
895 my $fields = [];
896 my $fflags = [];
897 $self->{_STRING} = \$str;
898 if (defined $str && $self->__parse ($fields, $fflags, $str, 0)) {
899 $self->{_FIELDS} = $fields;
900 $self->{_FFLAGS} = $fflags;
901 $self->{_STATUS} = 1;
902 }
903 else {
904 $self->{_FIELDS} = undef;
905 $self->{_FFLAGS} = undef;
906 $self->{_STATUS} = 0;
907 }
908 $self->{_STATUS};
909 }
910
911sub column_names {
912 my ( $self, @columns ) = @_;
913
914 @columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
915 @columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef;
916
917 if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) {
918 @columns = @{ $columns[0] };
919 }
920 elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) {
921 croak $self->SetDiag( 3001 );
922 }
923
924 if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) {
925 croak $self->SetDiag( 3003 );
926 }
927
928 $self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ];
929 @{ $self->{_COLUMN_NAMES} };
930}
931
932sub header {
933 my ($self, $fh, @args) = @_;
934
935 $fh or croak ($self->SetDiag (1014));
936
937 my (@seps, %args);
938 for (@args) {
939 if (ref $_ eq "ARRAY") {
940 push @seps, @$_;
941 next;
942 }
943 if (ref $_ eq "HASH") {
944 %args = %$_;
945 next;
946 }
947 croak (q{usage: $csv->header ($fh, [ seps ], { options })});
948 }
949
950 defined $args{munge} && !defined $args{munge_column_names} and
951 $args{munge_column_names} = $args{munge}; # munge as alias
952 defined $args{detect_bom} or $args{detect_bom} = 1;
953 defined $args{set_column_names} or $args{set_column_names} = 1;
954 defined $args{munge_column_names} or $args{munge_column_names} = "lc";
955
956 # Reset any previous leftovers
957 $self->{_RECNO} = 0;
958 $self->{_AHEAD} = undef;
959 $self->{_COLUMN_NAMES} = undef if $args{set_column_names};
960 $self->{_BOUND_COLUMNS} = undef if $args{set_column_names};
961 $self->_cache_set($_cache_id{'_has_ahead'}, 0);
962
963 if (defined $args{sep_set}) {
964 ref $args{sep_set} eq "ARRAY" or
965 croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
966 @seps = @{$args{sep_set}};
967 }
968
969 $^O eq "MSWin32" and binmode $fh;
970 my $hdr = <$fh>;
971 # check if $hdr can be empty here, I don't think so
972 defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
973
974 my %sep;
975 @seps or @seps = (",", ";");
976 foreach my $sep (@seps) {
977 index ($hdr, $sep) >= 0 and $sep{$sep}++;
978 }
979
980 keys %sep >= 2 and croak ($self->SetDiag (1011));
981
982 $self->sep (keys %sep);
983 my $enc = "";
984 if ($args{detect_bom}) { # UTF-7 is not supported
985 if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
986 elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
987 elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
988 elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
989 elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
990 elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
991 elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
992 elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
993 elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
994 elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
995 elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
996
997 $self->{ENCODING} = uc $enc;
998
999 $hdr eq "" and croak ($self->SetDiag (1010));
1000
1001 if ($enc) {
1002 if ($enc =~ m/([13]).le$/) {
1003 my $l = 0 + $1;
1004 my $x;
1005 $hdr .= "\0" x $l;
1006 read $fh, $x, $l;
1007 }
1008 if ($enc ne "utf-8") {
1009 require Encode;
1010 $hdr = Encode::decode ($enc, $hdr);
1011 }
1012 binmode $fh, ":encoding($enc)";
1013 }
1014 }
1015
1016 my ($ahead, $eol);
1017 if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
1018 $eol = $2;
1019 $ahead = $3;
1020 }
1021
1022 $args{munge_column_names} eq "lc" and $hdr = lc $hdr;
1023 $args{munge_column_names} eq "uc" and $hdr = uc $hdr;
1024
1025 my $hr = \$hdr; # Will cause croak on perl-5.6.x
1026 open my $h, "<", $hr or croak ($self->SetDiag (1010));
1027
1028 my $row = $self->getline ($h) or croak;
1029 close $h;
1030
1031 if ($ahead) { # Must be after getline, which creates the cache
1032 $self->_cache_set ($_cache_id{_has_ahead}, 1);
1033 $self->{_AHEAD} = $ahead;
1034 $eol =~ m/^\r([^\n]|\z)/ and $self->eol ($eol);
1035 }
1036
1037 my @hdr = @$row;
1038 ref $args{munge_column_names} eq "CODE" and
1039 @hdr = map { $args{munge_column_names}->($_) } @hdr;
1040 ref $args{munge_column_names} eq "HASH" and
1041 @hdr = map { $args{munge_column_names}->{$_} || $_ } @hdr;
1042 my %hdr; $hdr{$_}++ for @hdr;
1043 exists $hdr{""} and croak ($self->SetDiag (1012));
1044 unless (keys %hdr == @hdr) {
1045 croak ($self->_SetDiagInfo (1013, join ", " =>
1046 map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
1047 }
1048 $args{set_column_names} and $self->column_names (@hdr);
1049 wantarray ? @hdr : $self;
1050 }
1051
1052sub bind_columns {
1053 my ( $self, @refs ) = @_;
1054
1055 @refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
1056 @refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef;
1057
1058 if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) {
1059 croak $self->SetDiag( 3003 );
1060 }
1061
1062 if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep?
1063 croak $self->SetDiag( 3004 );
1064 }
1065
1066 $self->_set_attr_N("_is_bound", scalar @refs);
1067 $self->{_BOUND_COLUMNS} = [ @refs ];
1068 @refs;
1069}
1070
1071sub getline_hr {
1072 my ($self, @args, %hr) = @_;
1073 $self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
1074 my $fr = $self->getline (@args) or return;
1075 if (ref $self->{_FFLAGS}) { # missing
1076 $self->{_FFLAGS}[$_] = IS_MISSING
1077 for (@$fr ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
1078 @$fr == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
1079 $self->{_FFLAGS}[0] ||= IS_MISSING;
1080 }
1081 @hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
1082 \%hr;
1083}
1084
1085sub getline_hr_all {
1086 my ( $self, $io, @args ) = @_;
1087
1088 unless ( $self->{_COLUMN_NAMES} ) {
1089 croak $self->SetDiag( 3002 );
1090 }
1091
1092 my @cn = @{$self->{_COLUMN_NAMES}};
1093
1094 return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ];
1095}
1096
1097sub say {
1098 my ($self, $io, @f) = @_;
1099 my $eol = $self->eol;
1100 $eol eq "" and $self->eol ($\ || $/);
1101 # say ($fh, undef) does not propage actual undef to print ()
1102 my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1103 $self->eol ($eol);
1104 return $state;
1105 }
1106
1107sub print_hr {
1108 my ($self, $io, $hr) = @_;
1109 $self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
1110 ref $hr eq "HASH" or croak($self->SetDiag(3010));
1111 $self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
1112}
1113
1114sub fragment {
1115 my ($self, $io, $spec) = @_;
1116
1117 my $qd = qr{\s* [0-9]+ \s* }x; # digit
1118 my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star
1119 my $qr = qr{$qd (?: - $qs )?}x; # range
1120 my $qc = qr{$qr (?: ; $qr )*}x; # list
1121 defined $spec && $spec =~ m{^ \s*
1122 \x23 ? \s* # optional leading #
1123 ( row | col | cell ) \s* =
1124 ( $qc # for row and col
1125 | $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
1126 (?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
1127 ) \s* $}xi or croak ($self->SetDiag (2013));
1128 my ($type, $range) = (lc $1, $2);
1129
1130 my @h = $self->column_names ();
1131
1132 my @c;
1133 if ($type eq "cell") {
1134 my @spec;
1135 my $min_row;
1136 my $max_row = 0;
1137 for (split m/\s*;\s*/ => $range) {
1138 my ($tlr, $tlc, $brr, $brc) = (m{
1139 ^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
1140 (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
1141 $}x) or croak ($self->SetDiag (2013));
1142 defined $brr or ($brr, $brc) = ($tlr, $tlc);
1143 $tlr == 0 || $tlc == 0 ||
1144 ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
1145 ($brc ne "*" && ($brc == 0 || $brc < $tlc))
1146 and croak ($self->SetDiag (2013));
1147 $tlc--;
1148 $brc-- unless $brc eq "*";
1149 defined $min_row or $min_row = $tlr;
1150 $tlr < $min_row and $min_row = $tlr;
1151 $brr eq "*" || $brr > $max_row and
1152 $max_row = $brr;
1153 push @spec, [ $tlr, $tlc, $brr, $brc ];
1154 }
1155 my $r = 0;
1156 while (my $row = $self->getline ($io)) {
1157 ++$r < $min_row and next;
1158 my %row;
1159 my $lc;
1160 foreach my $s (@spec) {
1161 my ($tlr, $tlc, $brr, $brc) = @$s;
1162 $r < $tlr || ($brr ne "*" && $r > $brr) and next;
1163 !defined $lc || $tlc < $lc and $lc = $tlc;
1164 my $rr = $brc eq "*" ? $#$row : $brc;
1165 $row{$_} = $row->[$_] for $tlc .. $rr;
1166 }
1167 push @c, [ @row{sort { $a <=> $b } keys %row } ];
1168 if (@h) {
1169 my %h; @h{@h} = @{$c[-1]};
1170 $c[-1] = \%h;
1171 }
1172 $max_row ne "*" && $r == $max_row and last;
1173 }
1174 return \@c;
1175 }
1176
1177 # row or col
1178 my @r;
1179 my $eod = 0;
1180 for (split m/\s*;\s*/ => $range) {
1181 my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
1182 or croak ($self->SetDiag (2013));
1183 $to ||= $from;
1184 $to eq "*" and ($to, $eod) = ($from, 1);
1185 # $to cannot be <= 0 due to regex and ||=
1186 $from <= 0 || $to < $from and croak ($self->SetDiag (2013));
1187 $r[$_] = 1 for $from .. $to;
1188 }
1189
1190 my $r = 0;
1191 $type eq "col" and shift @r;
1192 $_ ||= 0 for @r;
1193 while (my $row = $self->getline ($io)) {
1194 $r++;
1195 if ($type eq "row") {
1196 if (($r > $#r && $eod) || $r[$r]) {
1197 push @c, $row;
1198 if (@h) {
1199 my %h; @h{@h} = @{$c[-1]};
1200 $c[-1] = \%h;
1201 }
1202 }
1203 next;
1204 }
1205 push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#$row ];
1206 if (@h) {
1207 my %h; @h{@h} = @{$c[-1]};
1208 $c[-1] = \%h;
1209 }
1210 }
1211
1212 return \@c;
1213 }
1214
1215my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1216
1217sub _csv_attr {
1218 my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
1219
1220 $attr{binary} = 1;
1221
1222 my $enc = delete $attr{enc} || delete $attr{encoding} || "";
1223 $enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
1224 $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1225
1226 my $fh;
1227 my $sink = 0;
1228 my $cls = 0; # If I open a file, I have to close it
1229 my $in = delete $attr{in} || delete $attr{file} or croak $csv_usage;
1230 my $out = exists $attr{out} && !$attr{out} ? \"skip"
1231 : delete $attr{out} || delete $attr{file};
1232
1233 ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
1234
1235 $in && $out && !ref $in && !ref $out and croak join "\n" =>
1236 qq{Cannot use a string for both in and out. Instead use:},
1237 qq{ csv (in => csv (in => "$in"), out => "$out");\n};
1238
1239 if ($out) {
1240 if ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1241 $fh = $out;
1242 }
1243 elsif (ref $out and "SCALAR" eq ref $out and defined $$out and $$out eq "skip") {
1244 delete $attr{out};
1245 $sink = 1;
1246 }
1247 else {
1248 open $fh, ">", $out or croak "$out: $!";
1249 $cls = 1;
1250 }
1251 if ($fh) {
1252 $enc and binmode $fh, $enc;
1253 unless (defined $attr{eol}) {
1254 my @layers = eval { PerlIO::get_layers ($fh) };
1255 $attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1256 }
1257 }
1258 }
1259
1260 if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
1261 # All done
1262 }
1263 elsif (ref $in eq "SCALAR") {
1264 # Strings with code points over 0xFF may not be mapped into in-memory file handles
1265 # "<$enc" does not change that :(
1266 open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
1267 $cls = 1;
1268 }
1269 elsif (ref $in or "GLOB" eq ref \$in) {
1270 if (!ref $in && $] < 5.008005) {
1271 $fh = \*$in; # uncoverable statement ancient perl version required
1272 }
1273 else {
1274 $fh = $in;
1275 }
1276 }
1277 else {
1278 open $fh, "<$enc", $in or croak "$in: $!";
1279 $cls = 1;
1280 }
1281 $fh || $sink or croak qq{No valid source passed. "in" is required};
1282
1283 my $hdrs = delete $attr{headers};
1284 my $frag = delete $attr{fragment};
1285 my $key = delete $attr{key};
1286 my $val = delete $attr{value};
1287 my $kh = delete $attr{keep_headers} ||
1288 delete $attr{keep_column_names} ||
1289 delete $attr{kh};
1290
1291 my $cbai = delete $attr{callbacks}{after_in} ||
1292 delete $attr{after_in} ||
1293 delete $attr{callbacks}{after_parse} ||
1294 delete $attr{after_parse};
1295 my $cbbo = delete $attr{callbacks}{before_out} ||
1296 delete $attr{before_out};
1297 my $cboi = delete $attr{callbacks}{on_in} ||
1298 delete $attr{on_in};
1299
1300 my $hd_s = delete $attr{sep_set} ||
1301 delete $attr{seps};
1302 my $hd_b = delete $attr{detect_bom} ||
1303 delete $attr{bom};
1304 my $hd_m = delete $attr{munge} ||
1305 delete $attr{munge_column_names};
1306 my $hd_c = delete $attr{set_column_names};
1307
1308 for ([ quo => "quote" ],
1309 [ esc => "escape" ],
1310 [ escape => "escape_char" ],
1311 ) {
1312 my ($f, $t) = @$_;
1313 exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1314 }
1315
1316 my $fltr = delete $attr{filter};
1317 my %fltr = (
1318 not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
1319 not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
1320 filled => sub { grep { defined && m/\S/ } @{$_[1]} },
1321 );
1322 defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1323 $fltr = { 0 => $fltr{$fltr} };
1324 ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1325 ref $fltr eq "HASH" or $fltr = undef;
1326
1327 exists $attr{formula} and
1328 $attr{formula} = _supported_formula (undef, $attr{formula});
1329
1330 defined $attr{auto_diag} or $attr{auto_diag} = 1;
1331 defined $attr{escape_null} or $attr{escape_null} = 0;
1332 my $csv = delete $attr{csv} || Text::CSV_PP->new (\%attr)
1333 or croak $last_new_error;
1334
1335 return {
1336 csv => $csv,
1337 attr => { %attr },
1338 fh => $fh,
1339 cls => $cls,
1340 in => $in,
1341 sink => $sink,
1342 out => $out,
1343 enc => $enc,
1344 hdrs => $hdrs,
1345 key => $key,
1346 val => $val,
1347 kh => $kh,
1348 frag => $frag,
1349 fltr => $fltr,
1350 cbai => $cbai,
1351 cbbo => $cbbo,
1352 cboi => $cboi,
1353 hd_s => $hd_s,
1354 hd_b => $hd_b,
1355 hd_m => $hd_m,
1356 hd_c => $hd_c,
1357 };
1358 }
1359
1360sub csv {
1361 @_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
1362 @_ or croak $csv_usage;
1363
1364 my $c = _csv_attr (@_);
1365
1366 my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
1367 my %hdr;
1368 if (ref $hdrs eq "HASH") {
1369 %hdr = %$hdrs;
1370 $hdrs = "auto";
1371 }
1372
1373 if ($c->{out} && !$c->{sink}) {
1374 if (ref $in eq "CODE") {
1375 my $hdr = 1;
1376 while (my $row = $in->($csv)) {
1377 if (ref $row eq "ARRAY") {
1378 $csv->print ($fh, $row);
1379 next;
1380 }
1381 if (ref $row eq "HASH") {
1382 if ($hdr) {
1383 $hdrs ||= [ map { $hdr{$_} || $_ } keys %$row ];
1384 $csv->print ($fh, $hdrs);
1385 $hdr = 0;
1386 }
1387 $csv->print ($fh, [ @{$row}{@$hdrs} ]);
1388 }
1389 }
1390 }
1391 elsif (ref $in->[0] eq "ARRAY") { # aoa
1392 ref $hdrs and $csv->print ($fh, $hdrs);
1393 for (@{$in}) {
1394 $c->{cboi} and $c->{cboi}->($csv, $_);
1395 $c->{cbbo} and $c->{cbbo}->($csv, $_);
1396 $csv->print ($fh, $_);
1397 }
1398 }
1399 else { # aoh
1400 my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
1401 defined $hdrs or $hdrs = "auto";
1402 ref $hdrs || $hdrs eq "auto" and
1403 $csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]);
1404 for (@{$in}) {
1405 local %_;
1406 *_ = $_;
1407 $c->{cboi} and $c->{cboi}->($csv, $_);
1408 $c->{cbbo} and $c->{cbbo}->($csv, $_);
1409 $csv->print ($fh, [ @{$_}{@hdrs} ]);
1410 }
1411 }
1412
1413 $c->{cls} and close $fh;
1414 return 1;
1415 }
1416
1417 my @row1;
1418 if (defined $c->{hd_s} || defined $c->{hd_b} || defined $c->{hd_m} || defined $c->{hd_c}) {
1419 my %harg;
1420 defined $c->{hd_s} and $harg{set_set} = $c->{hd_s};
1421 defined $c->{hd_d} and $harg{detect_bom} = $c->{hd_b};
1422 defined $c->{hd_m} and $harg{munge_column_names} = $hdrs ? "none" : $c->{hd_m};
1423 defined $c->{hd_c} and $harg{set_column_names} = $hdrs ? 0 : $c->{hd_c};
1424 @row1 = $csv->header ($fh, \%harg);
1425 my @hdr = $csv->column_names;
1426 @hdr and $hdrs ||= \@hdr;
1427 }
1428
1429 if ($c->{kh}) {
1430 ref $c->{kh} eq "ARRAY" or croak ($csv->SetDiag (1501));
1431 $hdrs ||= "auto";
1432 }
1433
1434 my $key = $c->{key};
1435 if ($key) {
1436 !ref $key or ref $key eq "ARRAY" && @$key > 1 or croak ($csv->SetDiag (1501));
1437 $hdrs ||= "auto";
1438 }
1439 my $val = $c->{val};
1440 if ($val) {
1441 $key or croak ($csv->SetDiag (1502));
1442 !ref $val or ref $val eq "ARRAY" && @$val > 0 or croak ($csv->SetDiag (1503));
1443 }
1444
1445 $c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs ||= "auto";
1446 if (defined $hdrs) {
1447 if (!ref $hdrs) {
1448 if ($hdrs eq "skip") {
1449 $csv->getline ($fh); # discard;
1450 }
1451 elsif ($hdrs eq "auto") {
1452 my $h = $csv->getline ($fh) or return;
1453 $hdrs = [ map { $hdr{$_} || $_ } @$h ];
1454 }
1455 elsif ($hdrs eq "lc") {
1456 my $h = $csv->getline ($fh) or return;
1457 $hdrs = [ map { lc ($hdr{$_} || $_) } @$h ];
1458 }
1459 elsif ($hdrs eq "uc") {
1460 my $h = $csv->getline ($fh) or return;
1461 $hdrs = [ map { uc ($hdr{$_} || $_) } @$h ];
1462 }
1463 }
1464 elsif (ref $hdrs eq "CODE") {
1465 my $h = $csv->getline ($fh) or return;
1466 my $cr = $hdrs;
1467 $hdrs = [ map { $cr->($hdr{$_} || $_) } @$h ];
1468 }
1469 $c->{kh} and $hdrs and @{$c->{kh}} = @$hdrs;
1470 }
1471
1472 if ($c->{fltr}) {
1473 my %f = %{$c->{fltr}};
1474 # convert headers to index
1475 my @hdr;
1476 if (ref $hdrs) {
1477 @hdr = @{$hdrs};
1478 for (0 .. $#hdr) {
1479 exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1480 }
1481 }
1482 $csv->callbacks (after_parse => sub {
1483 my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1484 foreach my $FLD (sort keys %f) {
1485 local $_ = $ROW->[$FLD - 1];
1486 local %_;
1487 @hdr and @_{@hdr} = @$ROW;
1488 $f{$FLD}->($CSV, $ROW) or return \"skip";
1489 $ROW->[$FLD - 1] = $_;
1490 }
1491 });
1492 }
1493
1494 my $frag = $c->{frag};
1495 my $ref = ref $hdrs
1496 ? # aoh
1497 do {
1498 my @h = $csv->column_names ($hdrs);
1499 my %h; $h{$_}++ for @h;
1500 exists $h{""} and croak ($csv->SetDiag (1012));
1501 unless (keys %h == @h) {
1502 croak ($csv->_SetDiagInfo (1013, join ", " =>
1503 map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
1504 }
1505 $frag ? $csv->fragment ($fh, $frag) :
1506 $key ? do {
1507 my ($k, $j, @f) = ref $key ? (undef, @$key) : ($key);
1508 if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
1509 croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1510 }
1511 +{ map {
1512 my $r = $_;
1513 my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
1514 ( $K => (
1515 $val
1516 ? ref $val
1517 ? { map { $_ => $r->{$_} } @$val }
1518 : $r->{$val}
1519 : $r ));
1520 } @{$csv->getline_hr_all ($fh)} }
1521 }
1522 : $csv->getline_hr_all ($fh);
1523 }
1524 : # aoa
1525 $frag ? $csv->fragment ($fh, $frag)
1526 : $csv->getline_all ($fh);
1527 if ($ref) {
1528 @row1 && !$c->{hd_c} && !ref $hdrs and unshift @$ref, \@row1;
1529 }
1530 else {
1531 Text::CSV_PP->auto_diag;
1532 }
1533 $c->{cls} and close $fh;
1534 if ($ref and $c->{cbai} || $c->{cboi}) {
1535 # Default is ARRAYref, but with key =>, you'll get a hashref
1536 foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
1537 local %_;
1538 ref $r eq "HASH" and *_ = $r;
1539 $c->{cbai} and $c->{cbai}->($csv, $r);
1540 $c->{cboi} and $c->{cboi}->($csv, $r);
1541 }
1542 }
1543
1544 $c->{sink} and return;
1545
1546 defined wantarray or
1547 return csv (%{$c->{attr}}, in => $ref, headers => $hdrs, %{$c->{attr}});
1548
1549 return $ref;
1550 }
1551
1552# The end of the common pure perl part.
1553
1554################################################################################
1555#
1556# The following are methods implemented in XS in Text::CSV_XS or
1557# helper methods for Text::CSV_PP only
1558#
1559################################################################################
1560
1561sub _setup_ctx {
1562 my $self = shift;
1563
1564 $last_error = undef;
1565
1566 my $ctx;
1567 if ($self->{_CACHE}) {
1568 %$ctx = %{$self->{_CACHE}};
1569 } else {
1570 $ctx->{sep} = ',';
1571 if (defined $self->{sep_char}) {
1572 $ctx->{sep} = $self->{sep_char};
1573 }
1574 if (defined $self->{sep} and $self->{sep} ne '') {
1575 use bytes;
1576 $ctx->{sep} = $self->{sep};
1577 my $sep_len = length($ctx->{sep});
1578 $ctx->{sep_len} = $sep_len if $sep_len > 1;
1579 }
1580
1581 $ctx->{quo} = '"';
1582 if (exists $self->{quote_char}) {
1583 my $quote_char = $self->{quote_char};
1584 if (defined $quote_char and length $quote_char) {
1585 $ctx->{quo} = $quote_char;
1586 } else {
1587 $ctx->{quo} = "\0";
1588 }
1589 }
1590 if (defined $self->{quote} and $self->{quote} ne '') {
1591 use bytes;
1592 $ctx->{quo} = $self->{quote};
1593 my $quote_len = length($ctx->{quo});
1594 $ctx->{quo_len} = $quote_len if $quote_len > 1;
1595 }
1596
1597 $ctx->{escape_char} = '"';
1598 if (exists $self->{escape_char}) {
1599 my $escape_char = $self->{escape_char};
1600 if (defined $escape_char and length $escape_char) {
1601 $ctx->{escape_char} = $escape_char;
1602 } else {
1603 $ctx->{escape_char} = "\0";
1604 }
1605 }
1606
1607 if (defined $self->{eol}) {
1608 my $eol = $self->{eol};
1609 my $eol_len = length($eol);
1610 $ctx->{eol} = $eol;
1611 $ctx->{eol_len} = $eol_len;
1612 if ($eol_len == 1 and $eol eq "\015") {
1613 $ctx->{eol_is_cr} = 1;
1614 }
1615 }
1616
1617 $ctx->{undef_flg} = 0;
1618 if (defined $self->{undef_str}) {
1619 $ctx->{undef_str} = $self->{undef_str};
1620 $ctx->{undef_flg} = 3 if utf8::is_utf8($self->{undef_str});
1621 } else {
1622 $ctx->{undef_str} = undef;
1623 }
1624
1625 if (defined $self->{_types}) {
1626 $ctx->{types} = $self->{_types};
1627 $ctx->{types_len} = length($ctx->{types});
1628 }
1629
1630 if (defined $self->{_is_bound}) {
1631 $ctx->{is_bound} = $self->{_is_bound};
1632 }
1633
1634 if (defined $self->{callbacks}) {
1635 my $cb = $self->{callbacks};
1636 $ctx->{has_hooks} = 0;
1637 if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
1638 $ctx->{has_hooks} |= HOOK_AFTER_PARSE;
1639 }
1640 if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
1641 $ctx->{has_hooks} |= HOOK_BEFORE_PRINT;
1642 }
1643 }
1644
1645 for (qw/
1646 binary decode_utf8 always_quote strict quote_empty
1647 allow_loose_quotes allow_loose_escapes
1648 allow_unquoted_escape allow_whitespace blank_is_undef
1649 empty_is_undef verbatim auto_diag diag_verbose
1650 keep_meta_info formula
1651 /) {
1652 $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
1653 }
1654 for (qw/quote_space escape_null quote_binary/) {
1655 $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
1656 }
1657 if ($ctx->{escape_char} eq "\0") {
1658 $ctx->{escape_null} = 0;
1659 }
1660
1661 # FIXME: readonly
1662 %{$self->{_CACHE}} = %$ctx;
1663 }
1664
1665 $ctx->{utf8} = 0;
1666 $ctx->{size} = 0;
1667 $ctx->{used} = 0;
1668
1669 if ($ctx->{is_bound}) {
1670 my $bound = $self->{_BOUND_COLUMNS};
1671 if ($bound and ref $bound eq 'ARRAY') {
1672 $ctx->{bound} = $bound;
1673 } else {
1674 $ctx->{is_bound} = 0;
1675 }
1676 }
1677
1678 $ctx->{eol_pos} = -1;
1679 $ctx->{eolx} = $ctx->{eol_len}
1680 ? $ctx->{verbatim} || $ctx->{eol_len} >= 2
1681 ? 1
1682 : $ctx->{eol} =~ /\A[\015|\012]/ ? 0 : 1
1683 : 0;
1684
1685 if ($ctx->{sep_len} and $ctx->{sep_len} > 1 and _is_valid_utf8($ctx->{sep})) {
1686 $ctx->{utf8} = 1;
1687 }
1688 if ($ctx->{quo_len} and $ctx->{quo_len} > 1 and _is_valid_utf8($ctx->{quo})) {
1689 $ctx->{utf8} = 1;
1690 }
1691
1692 $ctx;
1693}
1694
1695sub _cache_set {
1696 my ($self, $idx, $value) = @_;
1697 return unless exists $self->{_CACHE};
1698 my $cache = $self->{_CACHE};
1699
1700 my $key = $_reverse_cache_id{$idx};
1701 if (!defined $key) {
1702 warn (sprintf "Unknown cache index %d ignored\n", $idx);
1703 } elsif ($key eq 'sep_char') {
1704 $cache->{sep} = $value;
1705 $cache->{sep_len} = 0;
1706 }
1707 elsif ($key eq 'quote_char') {
1708 $cache->{quo} = $value;
1709 $cache->{quo_len} = 0;
1710 }
1711 elsif ($key eq '_has_ahead') {
1712 $cache->{has_ahead} = $value;
1713 }
1714 elsif ($key eq '_has_hooks') {
1715 $cache->{has_hooks} = $value;
1716 }
1717 elsif ($key eq '_is_bound') {
1718 $cache->{is_bound} = $value;
1719 }
1720 elsif ($key eq 'sep') {
1721 use bytes;
1722 my $len = bytes::length($value);
1723 $cache->{sep} = $value if $len;
1724 $cache->{sep_len} = $len == 1 ? 0 : $len;
1725 }
1726 elsif ($key eq 'quote') {
1727 use bytes;
1728 my $len = bytes::length($value);
1729 $cache->{quo} = $value if $len;
1730 $cache->{quo_len} = $len == 1 ? 0 : $len;
1731 }
1732 elsif ($key eq 'eol') {
1733 $cache->{eol} = $value if defined($value);
1734 $cache->{eol_is_cr} = $value eq "\015" ? 1 : 0;
1735 }
1736 elsif ($key eq 'undef_str') {
1737 if (defined $value) {
1738 $cache->{undef_str} = $value;
1739 $cache->{undef_flg} = 3 if utf8::is_utf8($value);
1740 } else {
1741 $cache->{undef_str} = undef;
1742 $cache->{undef_flg} = 0;
1743 }
1744 }
1745 else {
1746 $cache->{$key} = $value;
1747 }
1748 return 1;
1749}
1750
1751sub _cache_diag {
1752 my $self = shift;
1753 unless (exists $self->{_CACHE}) {
1754 warn ("CACHE: invalid\n");
1755 return;
1756 }
1757
1758 my $cache = $self->{_CACHE};
1759 warn ("CACHE:\n");
1760 $self->__cache_show_char(quote_char => $cache->{quo});
1761 $self->__cache_show_char(escape_char => $cache->{escape_char});
1762 $self->__cache_show_char(sep_char => $cache->{sep});
1763 for (qw/
1764 binary decode_utf8 allow_loose_escapes allow_loose_quotes allow_unquoted_escape
1765 allow_whitespace always_quote quote_empty quote_space
1766 escape_null quote_binary auto_diag diag_verbose formula strict
1767 has_error_input blank_is_undef empty_is_undef has_ahead
1768 keep_meta_info verbatim has_hooks eol_is_cr eol_len
1769 /) {
1770 $self->__cache_show_byte($_ => $cache->{$_});
1771 }
1772 $self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
1773 $self->__cache_show_byte(sep_len => $cache->{sep_len});
1774 if ($cache->{sep_len} and $cache->{sep_len} > 1) {
1775 $self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
1776 }
1777 $self->__cache_show_byte(quo_len => $cache->{quo_len});
1778 if ($cache->{quo_len} and $cache->{quo_len} > 1) {
1779 $self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
1780 }
1781}
1782
1783sub __cache_show_byte {
1784 my ($self, $key, $value) = @_;
1785 warn (sprintf " %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
1786}
1787
1788sub __cache_show_char {
1789 my ($self, $key, $value) = @_;
1790 my $v = $value;
1791 if (defined $value) {
1792 my @b = unpack "U0C*", $value;
1793 $v = pack "U*", $b[0];
1794 }
1795 warn (sprintf " %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
1796}
1797
1798sub __cache_show_str {
1799 my ($self, $key, $len, $value) = @_;
1800 warn (sprintf " %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
1801}
1802
1803sub __pretty_str { # FIXME
1804 my ($self, $str, $len) = @_;
1805 return '' unless defined $str;
1806 $str = substr($str, 0, $len);
1807 $str =~ s/"/\\"/g;
1808 $str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
1809 qq{"$str"};
1810}
1811
1812sub _hook {
1813 my ($self, $name, $fields) = @_;
1814 return 0 unless $self->{callbacks};
1815
1816 my $cb = $self->{callbacks}{$name};
1817 return 0 unless $cb && ref $cb eq 'CODE';
1818
1819 my (@res) = $cb->($self, $fields);
1820 if (@res) {
1821 return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
1822 }
1823 scalar @res;
1824}
1825
1826################################################################################
1827# methods for combine
1828################################################################################
1829
1830sub __combine {
1831 my ($self, $dst, $fields, $useIO) = @_;
1832
1833 my $ctx = $self->_setup_ctx;
1834
1835 my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
1836
1837 if(!defined $quot or $quot eq "\0"){ $quot = ''; }
1838
1839 my $re_esc;
1840 if ($esc ne '' and $esc ne "\0") {
1841 if ($quot ne '') {
1842 $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/;
1843 } else {
1844 $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$esc\E)/;
1845 }
1846 }
1847
1848 my $bound = 0;
1849 my $n = @$fields - 1;
1850 if ($n < 0 and $ctx->{is_bound}) {
1851 $n = $ctx->{is_bound} - 1;
1852 $bound = 1;
1853 }
1854
1855 my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} || []} >= $n) ? 1 : 0;
1856
1857 my $must_be_quoted;
1858 my @results;
1859 for(my $i = 0; $i <= $n; $i++) {
1860 my $v_ref;
1861 if ($bound) {
1862 $v_ref = $self->__bound_field($ctx, $i, 1);
1863 } else {
1864 if (@$fields > $i) {
1865 $v_ref = \($fields->[$i]);
1866 }
1867 }
1868 next unless $v_ref;
1869
1870 my $value = $$v_ref;
1871
1872 if (!defined $value) {
1873 if ($ctx->{undef_str}) {
1874 if ($ctx->{undef_flg}) {
1875 $ctx->{utf8} = 1;
1876 $ctx->{binary} = 1;
1877 }
1878 push @results, $ctx->{undef_str};
1879 } else {
1880 push @results, '';
1881 }
1882 next;
1883 }
1884
1885 if ( substr($value, 0, 1) eq '=' && $ctx->{formula} ) {
1886 $value = $self->_formula($ctx, $value, $i);
1887 if (!defined $value) {
1888 push @results, '';
1889 next;
1890 }
1891 }
1892
1893 $must_be_quoted = $ctx->{always_quote} ? 1 : 0;
1894 if ($value eq '') {
1895 $must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
1896 }
1897 else {
1898
1899 if (utf8::is_utf8 $value) {
1900 $ctx->{utf8} = 1;
1901 $ctx->{binary} = 1;
1902 }
1903
1904 $must_be_quoted++ if $check_meta && $self->is_quoted($i);
1905
1906 if (!$must_be_quoted and $quot ne '') {
1907 use bytes;
1908 $must_be_quoted++ if
1909 ($value =~ /\Q$quot\E/) ||
1910 ($sep ne '' and $sep ne "\0" and $value =~ /\Q$sep\E/) ||
1911 ($esc ne '' and $esc ne "\0" and $value =~ /\Q$esc\E/) ||
1912 ($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
1913 ($ctx->{quote_space} && $value =~ /[\x09\x20]/);
1914 }
1915
1916 if (!$ctx->{binary} and $value =~ /[^\x09\x20-\x7E]/) {
1917 # an argument contained an invalid character...
1918 $self->{_ERROR_INPUT} = $value;
1919 $self->SetDiag(2110);
1920 return 0;
1921 }
1922
1923 if ($re_esc) {
1924 $value =~ s/($re_esc)/$esc$1/g;
1925 }
1926 if ($ctx->{escape_null}) {
1927 $value =~ s/\0/${esc}0/g;
1928 }
1929 }
1930
1931 if ($must_be_quoted) {
1932 $value = $quot . $value . $quot;
1933 }
1934 push @results, $value;
1935 }
1936
1937 $$dst = join($sep, @results) . ( defined $ctx->{eol} ? $ctx->{eol} : '' );
1938
1939 return 1;
1940}
1941
1942sub _formula {
1943 my ($self, $ctx, $value, $i) = @_;
1944
1945 my $fa = $ctx->{formula} or return;
1946 if ($fa == 1) { die "Formulas are forbidden\n" }
1947 if ($fa == 2) { die "Formulas are forbidden\n" } # XS croak behaves like PP's "die"
1948
1949 if ($fa == 3) {
1950 my $rec = '';
1951 if ($ctx->{recno}) {
1952 $rec = sprintf " in record %lu", $ctx->{recno} + 1;
1953 }
1954 my $field = '';
1955 my $column_names = $self->{_COLUMN_NAMES};
1956 if (ref $column_names eq 'ARRAY' and @$column_names >= $i - 1) {
1957 my $column_name = $column_names->[$i - 1];
1958 $field = sprintf " (column: '%.100s')", $column_name if defined $column_name;
1959 }
1960 warn sprintf("Field %d%s%s contains formula '%s'\n", $i, $field, $rec, $value);
1961 return $value;
1962 }
1963
1964 if ($fa == 4) {
1965 return '';
1966 }
1967 if ($fa == 5) {
1968 return undef;
1969 }
1970 return;
1971}
1972
1973sub print {
1974 my ($self, $io, $fields) = @_;
1975
1976 require IO::Handle;
1977
1978 if (!defined $fields) {
1979 $fields = [];
1980 } elsif(ref($fields) ne 'ARRAY'){
1981 Carp::croak("Expected fields to be an array ref");
1982 }
1983
1984 $self->_hook(before_print => $fields);
1985
1986 my $str = "";
1987 $self->__combine(\$str, $fields, 1) or return '';
1988
1989 local $\ = '';
1990
1991 $io->print( $str ) or $self->_set_error_diag(2200);
1992}
1993
1994################################################################################
1995# methods for parse
1996################################################################################
1997
1998
1999sub __parse { # cx_xsParse
2000 my ($self, $fields, $fflags, $src, $useIO) = @_;
2001
2002 my $ctx = $self->_setup_ctx;
2003
2004 my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
2005 if ($state and ($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
2006 $self->_hook(after_parse => $fields);
2007 }
2008 return $state || !$last_error;
2009}
2010
2011sub ___parse { # cx_c_xsParse
2012 my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
2013
2014 local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
2015
2016 if ($ctx->{useIO} = $useIO) {
2017 require IO::Handle;
2018
2019 $ctx->{tmp} = undef;
2020 if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
2021 $ctx->{tmp} = $self->{_AHEAD};
2022 $ctx->{size} = length $ctx->{tmp};
2023 $ctx->{used} = 0;
2024 }
2025 } else {
2026 $ctx->{tmp} = $src;
2027 $ctx->{size} = length $src;
2028 $ctx->{used} = 0;
2029 $ctx->{utf8} = utf8::is_utf8($src);
2030 }
2031 if ($ctx->{has_error_input}) {
2032 $self->{_ERROR_INPUT} = undef;
2033 $ctx->{has_error_input} = 0;
2034 }
2035
2036 my $result = $self->____parse($ctx, $src, $fields, $fflags);
2037 $self->{_RECNO} = ++($ctx->{recno});
2038 $self->{_EOF} = '';
2039
2040 if ($ctx->{strict}) {
2041 $ctx->{strict_n} ||= $ctx->{fld_idx};
2042 if ($ctx->{strict_n} != $ctx->{fld_idx}) {
2043 $self->__parse_error($ctx, 2014, $ctx->{used});
2044 return;
2045 }
2046 }
2047
2048 if ($ctx->{useIO}) {
2049 if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
2050 $self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
2051 } else {
2052 $ctx->{has_ahead} = 0;
2053 if ($ctx->{useIO} & useIO_EOF) {
2054 $self->{_EOF} = 1;
2055 }
2056 }
2057 %{$self->{_CACHE}} = %$ctx;
2058
2059 if ($fflags) {
2060 if ($ctx->{keep_meta_info}) {
2061 $self->{_FFLAGS} = $fflags;
2062 } else {
2063 undef $fflags;
2064 }
2065 }
2066 } else {
2067 %{$self->{_CACHE}} = %$ctx;
2068 }
2069
2070 if ($result and $ctx->{types}) {
2071 my $len = @$fields;
2072 for(my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
2073 my $value = $fields->[$i];
2074 next unless defined $value;
2075 my $type = ord(substr($ctx->{types}, $i, 1));
2076 if ($type == IV) {
2077 $fields->[$i] = int($value);
2078 } elsif ($type == NV) {
2079 $fields->[$i] = $value + 0.0;
2080 }
2081 }
2082 }
2083
2084 $result;
2085}
2086
2087sub ____parse { # cx_Parse
2088 my ($self, $ctx, $src, $fields, $fflags) = @_;
2089
2090 my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
2091
2092 utf8::encode($sep) if !$ctx->{utf8} and $ctx->{sep_len};
2093 utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
2094 utf8::encode($eol) if !$ctx->{utf8} and $ctx->{eol_len};
2095
2096 my $seenSomething = 0;
2097 my $waitingForField = 1;
2098 my ($value, $v_ref);
2099 $ctx->{fld_idx} = my $fnum = 0;
2100 $ctx->{flag} = 0;
2101
2102 my $re_str = join '|', map({$_ eq "\0" ? '[\\0]' : quotemeta($_)} sort {length $b <=> length $a} grep {defined $_ and $_ ne ''} $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
2103 $ctx->{_re} = qr/$re_str/;
2104 my $re = qr/$re_str|[^\x09\x20-\x7E]|$/;
2105
2106LOOP:
2107 while($self->__get_from_src($ctx, $src)) {
2108 while($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
2109 my ($hit, $c) = ($1, $2);
2110 $ctx->{used} = pos($ctx->{tmp});
2111 if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
2112 $self->{_AHEAD} = $hit;
2113 $ctx->{has_ahead} = 1;
2114 $ctx->{has_leftover} = 1;
2115 last;
2116 }
2117 last if $seenSomething and $hit eq '' and $c eq ''; # EOF
2118
2119 # new field
2120 if (!$v_ref) {
2121 if ($ctx->{is_bound}) {
2122 $v_ref = $self->__bound_field($ctx, $fnum, 0);
2123 } else {
2124 $value = '';
2125 $v_ref = \$value;
2126 }
2127 $fnum++;
2128 return unless $v_ref;
2129 $ctx->{flag} = 0;
2130 $ctx->{fld_idx}++;
2131 }
2132
2133 $seenSomething = 1;
2134
2135 if (defined $hit and $hit ne '') {
2136 if ($waitingForField) {
2137 $waitingForField = 0;
2138 }
2139 if ($hit =~ /[^\x09\x20-\x7E]/) {
2140 $ctx->{flag} |= IS_BINARY;
2141 }
2142 $$v_ref .= $hit;
2143 }
2144
2145RESTART:
2146 if (defined $c and defined $sep and $c eq $sep) {
2147 if ($waitingForField) {
2148 # ,1,"foo, 3",,bar,
2149 # ^ ^
2150 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2151 $$v_ref = undef;
2152 } else {
2153 $$v_ref = "";
2154 }
2155 unless ($ctx->{is_bound}) {
2156 push @$fields, $$v_ref;
2157 }
2158 $v_ref = undef;
2159 if ($ctx->{keep_meta_info} and $fflags) {
2160 push @$fflags, $ctx->{flag};
2161 }
2162 } elsif ($ctx->{flag} & IS_QUOTED) {
2163 # ,1,"foo, 3",,bar,
2164 # ^
2165 $$v_ref .= $c;
2166 } else {
2167 # ,1,"foo, 3",,bar,
2168 # ^ ^ ^
2169 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2170 $v_ref = undef;
2171 $waitingForField = 1;
2172 }
2173 }
2174 elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
2175 if ($waitingForField) {
2176 # ,1,"foo, 3",,bar,\r\n
2177 # ^
2178 $ctx->{flag} |= IS_QUOTED;
2179 $waitingForField = 0;
2180 next;
2181 }
2182 if ($ctx->{flag} & IS_QUOTED) {
2183 # ,1,"foo, 3",,bar,\r\n
2184 # ^
2185 my $quoesc = 0;
2186 my $c2 = $self->__get($ctx);
2187
2188 if ($ctx->{allow_whitespace}) {
2189 # , 1 , "foo, 3" , , bar , \r\n
2190 # ^
2191 while($self->__is_whitespace($ctx, $c2)) {
2192 if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
2193 $$v_ref .= $c;
2194 $c = $c2;
2195 }
2196 $c2 = $self->__get($ctx);
2197 }
2198 }
2199
2200 if (!defined $c2) { # EOF
2201 # ,1,"foo, 3"
2202 # ^
2203 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2204 return 1;
2205 }
2206
2207 if (defined $c2 and defined $sep and $c2 eq $sep) {
2208 # ,1,"foo, 3",,bar,\r\n
2209 # ^
2210 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2211 $v_ref = undef;
2212 $waitingForField = 1;
2213 next;
2214 }
2215 if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
2216 # ,1,"foo, 3",,"bar"\n
2217 # ^
2218 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2219 return 1;
2220 }
2221
2222 if (defined $esc and $c eq $esc) {
2223 $quoesc = 1;
2224 if (defined $c2 and $c2 eq '0') {
2225 # ,1,"foo, 3"056",,bar,\r\n
2226 # ^
2227 $$v_ref .= "\0";
2228 next;
2229 }
2230 if (defined $c2 and defined $quot and $c2 eq $quot) {
2231 # ,1,"foo, 3""56",,bar,\r\n
2232 # ^
2233 if ($ctx->{utf8}) {
2234 $ctx->{flag} |= IS_BINARY;
2235 }
2236 $$v_ref .= $c2;
2237 next;
2238 }
2239 if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
2240 # ,1,"foo, 3"56",,bar,\r\n
2241 # ^
2242 $$v_ref .= $c;
2243 $c = $c2;
2244 goto RESTART;
2245 }
2246 }
2247 if (defined $c2 and $c2 eq "\015") {
2248 if ($ctx->{eol_is_cr}) {
2249 # ,1,"foo, 3"\r
2250 # ^
2251 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2252 return 1;
2253 }
2254
2255 my $c3 = $self->__get($ctx);
2256 if (defined $c3 and $c3 eq "\012") {
2257 # ,1,"foo, 3"\r\n
2258 # ^
2259 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2260 return 1;
2261 }
2262
2263 if ($ctx->{useIO} and !$ctx->{eol_len} and $c3 !~ /[^\x09\x20-\x7E]/) {
2264 # ,1,"foo\n 3",,"bar"\r
2265 # baz,4
2266 # ^
2267 $self->__set_eol_is_cr($ctx);
2268 $ctx->{used}--;
2269 $ctx->{has_ahead} = 1;
2270 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2271 return 1;
2272 }
2273
2274 $self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
2275 return;
2276 }
2277
2278 if ($ctx->{allow_loose_quotes} and !$quoesc) {
2279 # ,1,"foo, 3"456",,bar,\r\n
2280 # ^
2281 $$v_ref .= $c;
2282 $c = $c2;
2283 goto RESTART;
2284 }
2285 # 1,"foo" ",3
2286 # ^
2287 if ($quoesc) {
2288 $ctx->{used}--;
2289 $self->__error_inside_quotes($ctx, 2023);
2290 return;
2291 }
2292 $self->__error_inside_quotes($ctx, 2011);
2293 return;
2294 }
2295 # !waitingForField, !InsideQuotes
2296 if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
2297 $ctx->{flag} |= IS_ERROR;
2298 $$v_ref .= $c;
2299 } else {
2300 $self->__error_inside_field($ctx, 2034);
2301 return;
2302 }
2303 }
2304 elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
2305 # This means quote_char != escape_char
2306 if ($waitingForField) {
2307 $waitingForField = 0;
2308 if ($ctx->{allow_unquoted_escape}) {
2309 # The escape character is the first character of an
2310 # unquoted field
2311 # ... get and store next character
2312 my $c2 = $self->__get($ctx);
2313 $$v_ref = "";
2314
2315 if (!defined $c2) { # EOF
2316 $ctx->{used}--;
2317 $self->__error_inside_field($ctx, 2035);
2318 return;
2319 }
2320 if ($c2 eq '0') {
2321 $$v_ref .= "\0";
2322 }
2323 elsif (
2324 (defined $quot and $c2 eq $quot) or
2325 (defined $sep and $c2 eq $sep) or
2326 (defined $esc and $c2 eq $esc) or
2327 $ctx->{allow_loose_escapes}
2328 ) {
2329 if ($ctx->{utf8}) {
2330 $ctx->{flag} |= IS_BINARY;
2331 }
2332 $$v_ref .= $c2;
2333 } else {
2334 $self->__parse_inside_quotes($ctx, 2025);
2335 return;
2336 }
2337 }
2338 }
2339 elsif ($ctx->{flag} & IS_QUOTED) {
2340 my $c2 = $self->__get($ctx);
2341 if (!defined $c2) { # EOF
2342 $ctx->{used}--;
2343 $self->__error_inside_quotes($ctx, 2024);
2344 return;
2345 }
2346 if ($c2 eq '0') {
2347 $$v_ref .= "\0";
2348 }
2349 elsif (
2350 (defined $quot and $c2 eq $quot) or
2351 (defined $sep and $c2 eq $sep) or
2352 (defined $esc and $c2 eq $esc) or
2353 $ctx->{allow_loose_escapes}
2354 ) {
2355 if ($ctx->{utf8}) {
2356 $ctx->{flag} |= IS_BINARY;
2357 }
2358 $$v_ref .= $c2;
2359 } else {
2360 $ctx->{used}--;
2361 $self->__error_inside_quotes($ctx, 2025);
2362 return;
2363 }
2364 }
2365 elsif ($v_ref) {
2366 my $c2 = $self->__get($ctx);
2367 if (!defined $c2) { # EOF
2368 $ctx->{used}--;
2369 $self->__error_inside_field($ctx, 2035);
2370 return;
2371 }
2372 $$v_ref .= $c2;
2373 }
2374 else {
2375 $self->__error_inside_field($ctx, 2036);
2376 return;
2377 }
2378 }
2379 elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
2380 EOLX:
2381 if ($waitingForField) {
2382 # ,1,"foo, 3",,bar,
2383 # ^
2384 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2385 $$v_ref = undef;
2386 } else {
2387 $$v_ref = "";
2388 }
2389 unless ($ctx->{is_bound}) {
2390 push @$fields, $$v_ref;
2391 }
2392 if ($ctx->{keep_meta_info} and $fflags) {
2393 push @$fflags, $ctx->{flag};
2394 }
2395 return 1;
2396 }
2397 if ($ctx->{flag} & IS_QUOTED) {
2398 # ,1,"foo\n 3",,bar,
2399 # ^
2400 $ctx->{flag} |= IS_BINARY;
2401 unless ($ctx->{binary}) {
2402 $self->__error_inside_quotes($ctx, 2021);
2403 return;
2404 }
2405 $$v_ref .= $c;
2406 }
2407 elsif ($ctx->{verbatim}) {
2408 # ,1,foo\n 3,,bar,
2409 # This feature should be deprecated
2410 $ctx->{flag} |= IS_BINARY;
2411 unless ($ctx->{binary}) {
2412 $self->__error_inside_field($ctx, 2030);
2413 return;
2414 }
2415 $$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
2416 }
2417 else {
2418 # sep=,
2419 # ^
2420 if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
2421 $ctx->{sep} = $1;
2422 use bytes;
2423 my $len = length $ctx->{sep};
2424 if ($len <= 16) {
2425 $ctx->{sep_len} = $len == 1 ? 0 : $len;
2426 return $self->____parse($ctx, $src, $fields, $fflags);
2427 }
2428 }
2429
2430 # ,1,"foo\n 3",,bar
2431 # ^
2432 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2433 return 1;
2434 }
2435 }
2436 elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
2437 if ($waitingForField) {
2438 $waitingForField = 0;
2439 if ($ctx->{eol_is_cr}) {
2440 # ,1,"foo\n 3",,bar,\r
2441 # ^
2442 $c = "\012";
2443 goto RESTART;
2444 }
2445
2446 my $c2 = $self->__get($ctx);
2447 if (!defined $c2) { # EOF
2448 # ,1,"foo\n 3",,bar,\r
2449 # ^
2450 $c = undef;
2451 goto RESTART;
2452 }
2453 if ($c2 eq "\012") { # \r is not optional before EOLX!
2454 # ,1,"foo\n 3",,bar,\r\n
2455 # ^
2456 $c = $c2;
2457 goto RESTART;
2458 }
2459
2460 if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2461 # ,1,"foo\n 3",,bar,\r
2462 # baz,4
2463 # ^
2464 $self->__set_eol_is_cr($ctx);
2465 $ctx->{used}--;
2466 $ctx->{has_ahead} = 1;
2467 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2468 return 1;
2469 }
2470
2471 # ,1,"foo\n 3",,bar,\r\t
2472 # ^
2473 $ctx->{used}--;
2474 $self->__error_inside_field($ctx, 2031);
2475 return;
2476 }
2477 if ($ctx->{flag} & IS_QUOTED) {
2478 # ,1,"foo\r 3",,bar,\r\t
2479 # ^
2480 $ctx->{flag} |= IS_BINARY;
2481 unless ($ctx->{binary}) {
2482 $self->__error_inside_quotes($ctx, 2022);
2483 return;
2484 }
2485 $$v_ref .= $c;
2486 }
2487 else {
2488 if ($ctx->{eol_is_cr}) {
2489 # ,1,"foo\n 3",,bar\r
2490 # ^
2491 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2492 return 1;
2493 }
2494
2495 my $c2 = $self->__get($ctx);
2496 if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
2497 # ,1,"foo\n 3",,bar\r\n
2498 # ^
2499 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2500 return 1;
2501 }
2502
2503 if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2504 # ,1,"foo\n 3",,bar\r
2505 # baz,4
2506 # ^
2507 $self->__set_eol_is_cr($ctx);
2508 $ctx->{used}--;
2509 $ctx->{has_ahead} = 1;
2510 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2511 return 1;
2512 }
2513
2514 # ,1,"foo\n 3",,bar\r\t
2515 # ^
2516 $self->__error_inside_field($ctx, 2032);
2517 return;
2518 }
2519 }
2520 else {
2521 if ($ctx->{eolx} and $c eq $eol) {
2522 $c = '';
2523 goto EOLX;
2524 }
2525
2526 if ($waitingForField) {
2527 if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
2528 do {
2529 $c = $self->__get($ctx);
2530 last if !defined $c;
2531 } while $self->__is_whitespace($ctx, $c);
2532 goto RESTART;
2533 }
2534 $waitingForField = 0;
2535 goto RESTART;
2536 }
2537 if ($ctx->{flag} & IS_QUOTED) {
2538 if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2539 $ctx->{flag} |= IS_BINARY;
2540 unless ($ctx->{binary} or $ctx->{utf8}) {
2541 $self->__error_inside_quotes($ctx, 2026);
2542 return;
2543 }
2544 }
2545 $$v_ref .= $c;
2546 } else {
2547 if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2548 $ctx->{flag} |= IS_BINARY;
2549 unless ($ctx->{binary} or $ctx->{utf8}) {
2550 $self->__error_inside_field($ctx, 2037);
2551 return;
2552 }
2553 }
2554 $$v_ref .= $c;
2555 }
2556 }
2557 last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
2558 }
2559 }
2560
2561 if ($waitingForField) {
2562 if ($seenSomething or !$ctx->{useIO}) {
2563 # new field
2564 if (!$v_ref) {
2565 if ($ctx->{is_bound}) {
2566 $v_ref = $self->__bound_field($ctx, $fnum, 0);
2567 } else {
2568 $value = '';
2569 $v_ref = \$value;
2570 }
2571 $fnum++;
2572 return unless $v_ref;
2573 $ctx->{flag} = 0;
2574 $ctx->{fld_idx}++;
2575 }
2576 if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2577 $$v_ref = undef;
2578 } else {
2579 $$v_ref = "";
2580 }
2581 unless ($ctx->{is_bound}) {
2582 push @$fields, $$v_ref;
2583 }
2584 if ($ctx->{keep_meta_info} and $fflags) {
2585 push @$fflags, $ctx->{flag};
2586 }
2587 return 1;
2588 }
2589 $self->SetDiag(2012);
2590 return;
2591 }
2592
2593 if ($ctx->{flag} & IS_QUOTED) {
2594 $self->__error_inside_quotes($ctx, 2027);
2595 return;
2596 }
2597
2598 if ($v_ref) {
2599 $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2600 }
2601 return 1;
2602}
2603
2604sub __get_from_src {
2605 my ($self, $ctx, $src) = @_;
2606 return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
2607 return 1 if $ctx->{used} < $ctx->{size};
2608 return unless $ctx->{useIO};
2609 my $res = $src->getline;
2610 if (defined $res) {
2611 if ($ctx->{has_ahead}) {
2612 $ctx->{tmp} = $self->{_AHEAD};
2613 $ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
2614 $ctx->{tmp} .= $res;
2615 $ctx->{has_ahead} = 0;
2616 } else {
2617 $ctx->{tmp} = $res;
2618 }
2619 if ($ctx->{size} = length $ctx->{tmp}) {
2620 $ctx->{used} = -1;
2621 $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2622 pos($ctx->{tmp}) = 0;
2623 return 1;
2624 }
2625 } elsif (delete $ctx->{has_leftover}) {
2626 $ctx->{tmp} = $self->{_AHEAD};
2627 $ctx->{has_ahead} = 0;
2628 $ctx->{useIO} |= useIO_EOF;
2629 if ($ctx->{size} = length $ctx->{tmp}) {
2630 $ctx->{used} = -1;
2631 $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2632 pos($ctx->{tmp}) = 0;
2633 return 1;
2634 }
2635 }
2636 $ctx->{tmp} = '' unless defined $ctx->{tmp};
2637 $ctx->{useIO} |= useIO_EOF;
2638 return;
2639}
2640
2641sub __set_eol_is_cr {
2642 my ($self, $ctx) = @_;
2643 $ctx->{eol} = "\015";
2644 $ctx->{eol_is_cr} = 1;
2645 $ctx->{eol_len} = 1;
2646 %{$self->{_CACHE}} = %$ctx;
2647
2648 $self->{eol} = $ctx->{eol};
2649}
2650
2651sub __bound_field {
2652 my ($self, $ctx, $i, $keep) = @_;
2653 if ($i >= $ctx->{is_bound}) {
2654 $self->SetDiag(3006);
2655 return;
2656 }
2657 if (ref $ctx->{bound} eq 'ARRAY') {
2658 my $ref = $ctx->{bound}[$i];
2659 if (ref $ref) {
2660 if ($keep) {
2661 return $ref;
2662 }
2663 unless (Scalar::Util::readonly($$ref)) {
2664 $$ref = "";
2665 return $ref;
2666 }
2667 }
2668 }
2669 $self->SetDiag(3008);
2670 return;
2671}
2672
2673sub __get {
2674 my ($self, $ctx) = @_;
2675 return unless defined $ctx->{used};
2676 return if $ctx->{used} >= $ctx->{size};
2677 my $pos = pos($ctx->{tmp});
2678 if ($ctx->{tmp} =~ /\G($ctx->{_re}|.)/gs) {
2679 my $c = $1;
2680 if ($c =~ /[^\x09\x20-\x7e]/) {
2681 $ctx->{flag} |= IS_BINARY;
2682 }
2683 $ctx->{used} = pos($ctx->{tmp});
2684 return $c;
2685 } else {
2686 pos($ctx->{tmp}) = $pos;
2687 return;
2688 }
2689}
2690
2691sub __error_inside_quotes {
2692 my ($self, $ctx, $error) = @_;
2693 $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2694}
2695
2696sub __error_inside_field {
2697 my ($self, $ctx, $error) = @_;
2698 $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2699}
2700
2701sub __parse_error {
2702 my ($self, $ctx, $error, $pos) = @_;
2703 $self->{_ERROR_POS} = $pos;
2704 $self->{_ERROR_FLD} = $ctx->{fld_idx};
2705 $self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
2706 $self->SetDiag($error);
2707 return;
2708}
2709
2710sub __is_whitespace {
2711 my ($self, $ctx, $c) = @_;
2712 return unless defined $c;
2713 return (
2714 (!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
2715 (!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
2716 (!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
2717 ($c eq " " or $c eq "\t")
2718 );
2719}
2720
2721sub __push_value { # AV_PUSH (part of)
2722 my ($self, $ctx, $v_ref, $fields, $fflags, $flag, $fnum) = @_;
2723 utf8::encode($$v_ref) if $ctx->{utf8};
2724 if ($ctx->{formula} && $$v_ref && substr($$v_ref, 0, 1) eq '=') {
2725 my $value = $self->_formula($ctx, $$v_ref, $fnum);
2726 push @$fields, defined $value ? $value : undef;
2727 return;
2728 }
2729 if (
2730 (!defined $$v_ref or $$v_ref eq '') and
2731 ($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
2732 ) {
2733 $$v_ref = undef;
2734 } else {
2735 if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
2736 $$v_ref =~ s/[ \t]+$//;
2737 }
2738 if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} || _is_valid_utf8($$v_ref))) {
2739 utf8::decode($$v_ref);
2740 }
2741 }
2742 unless ($ctx->{is_bound}) {
2743 push @$fields, $$v_ref;
2744 }
2745 if ($ctx->{keep_meta_info} and $fflags) {
2746 push @$fflags, $flag;
2747 }
2748}
2749
2750sub getline {
2751 my ($self, $io) = @_;
2752
2753 my (@fields, @fflags);
2754 my $res = $self->__parse(\@fields, \@fflags, $io, 1);
2755 $res ? \@fields : undef;
2756}
2757
2758sub getline_all {
2759 my ( $self, $io, $offset, $len ) = @_;
2760
2761 my $ctx = $self->_setup_ctx;
2762
2763 my $tail = 0;
2764 my $n = 0;
2765 $offset ||= 0;
2766
2767 if ( $offset < 0 ) {
2768 $tail = -$offset;
2769 $offset = -1;
2770 }
2771
2772 my (@row, @list);
2773 while ($self->___parse($ctx, \@row, undef, $io, 1)) {
2774 $ctx = $self->_setup_ctx;
2775
2776 if ($offset > 0) {
2777 $offset--;
2778 @row = ();
2779 next;
2780 }
2781 if ($n++ >= $tail and $tail) {
2782 shift @list;
2783 $n--;
2784 }
2785 if (($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
2786 unless ($self->_hook(after_parse => \@row)) {
2787 @row = ();
2788 next;
2789 }
2790 }
2791 push @list, [@row];
2792 @row = ();
2793
2794 last if defined $len && $n >= $len and $offset >= 0; # exceeds limit size
2795 }
2796
2797 if ( defined $len && $n > $len ) {
2798 @list = splice( @list, 0, $len);
2799 }
2800
2801 return \@list;
2802}
2803
2804sub _is_valid_utf8 {
2805 return ( $_[0] =~ /^(?:
2806 [\x00-\x7F]
2807 |[\xC2-\xDF][\x80-\xBF]
2808 |[\xE0][\xA0-\xBF][\x80-\xBF]
2809 |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
2810 |[\xED][\x80-\x9F][\x80-\xBF]
2811 |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
2812 |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
2813 |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
2814 |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
2815 )+$/x ) ? 1 : 0;
2816}
2817
2818################################################################################
2819# methods for errors
2820################################################################################
2821
2822sub _set_error_diag {
2823 my ( $self, $error, $pos ) = @_;
2824
2825 $self->SetDiag($error);
2826
2827 if (defined $pos) {
2828 $_[0]->{_ERROR_POS} = $pos;
2829 }
2830
2831 return;
2832}
2833
2834sub error_input {
2835 my $self = shift;
2836 if ($self and ((Scalar::Util::reftype($self) || '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
2837 return $self->{_ERROR_INPUT};
2838 }
2839 return;
2840}
2841
2842sub _sv_diag {
2843 my ($self, $error) = @_;
2844 bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
2845}
2846
2847sub _set_diag {
2848 my ($self, $ctx, $error) = @_;
2849
2850 $last_error = $self->_sv_diag($error);
2851 $self->{_ERROR_DIAG} = $last_error;
2852 if ($error == 0) {
2853 $self->{_ERROR_POS} = 0;
2854 $self->{_ERROR_FLD} = 0;
2855 $self->{_ERROR_INPUT} = undef;
2856 $ctx->{has_error_input} = 0;
2857 }
2858 if ($error == 2012) { # EOF
2859 $self->{_EOF} = 1;
2860 }
2861 if ($ctx->{auto_diag}) {
2862 $self->error_diag;
2863 }
2864 return $last_error;
2865}
2866
2867sub SetDiag {
2868 my ($self, $error, $errstr) = @_;
2869 my $res;
2870 if (ref $self) {
2871 my $ctx = $self->_setup_ctx;
2872 $res = $self->_set_diag($ctx, $error);
2873
2874 } else {
2875 $res = $self->_sv_diag($error);
2876 }
2877 if (defined $errstr) {
2878 $res->[1] = $errstr;
2879 }
2880 $res;
2881}
2882
2883################################################################################
2884package Text::CSV::ErrorDiag;
2885
2886use strict;
2887use overload (
2888 '""' => \&stringify,
2889 '+' => \&numeric,
2890 '-' => \&numeric,
2891 '*' => \&numeric,
2892 '/' => \&numeric,
2893 fallback => 1,
2894);
2895
2896
2897sub numeric {
2898 my ($left, $right) = @_;
2899 return ref $left ? $left->[0] : $right->[0];
2900}
2901
2902
2903sub stringify {
2904 $_[0]->[1];
2905}
2906################################################################################
29071;
2908__END__
2909
2910=head1 NAME
2911
2912Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
2913
2914
2915=head1 SYNOPSIS
2916
2917This section is taken from Text::CSV_XS.
2918
2919 # Functional interface
2920 use Text::CSV_PP qw( csv );
2921
2922 # Read whole file in memory
2923 my $aoa = csv (in => "data.csv"); # as array of array
2924 my $aoh = csv (in => "data.csv",
2925 headers => "auto"); # as array of hash
2926
2927 # Write array of arrays as csv file
2928 csv (in => $aoa, out => "file.csv", sep_char=> ";");
2929
2930 # Only show lines where "code" is odd
2931 csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
2932
2933 # Object interface
2934 use Text::CSV_PP;
2935
2936 my @rows;
2937 # Read/parse CSV
2938 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
2939 open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
2940 while (my $row = $csv->getline ($fh)) {
2941 $row->[2] =~ m/pattern/ or next; # 3rd field should match
2942 push @rows, $row;
2943 }
2944 close $fh;
2945
2946 # and write as CSV
2947 open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
2948 $csv->say ($fh, $_) for @rows;
2949 close $fh or die "new.csv: $!";
2950
2951=head1 DESCRIPTION
2952
2953Text::CSV_PP is a pure-perl module that provides facilities for the
2954composition and decomposition of comma-separated values. This is
2955(almost) compatible with much faster L<Text::CSV_XS>, and mainly
2956used as its fallback module when you use L<Text::CSV> module without
2957having installed Text::CSV_XS. If you don't have any reason to use
2958this module directly, use Text::CSV for speed boost and portability
2959(or maybe Text::CSV_XS when you write an one-off script and don't need
2960to care about portability).
2961
2962The following caveats are taken from the doc of Text::CSV_XS.
2963
2964=head2 Embedded newlines
2965
2966B<Important Note>: The default behavior is to accept only ASCII characters
2967in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
2968fields can not contain newlines. If your data contains newlines embedded in
2969fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
2970set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
2971parsing options, you will always want to set binary.
2972
2973But you still have the problem that you have to pass a correct line to the
2974L</parse> method, which is more complicated from the usual point of usage:
2975
2976 my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
2977 while (<>) { # WRONG!
2978 $csv->parse ($_);
2979 my @fields = $csv->fields ();
2980 }
2981
2982this will break, as the C<while> might read broken lines: it does not care
2983about the quoting. If you need to support embedded newlines, the way to go
2984is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
2985B<and> C<\r\n> by default) and then
2986
2987 my $csv = Text::CSV_PP->new ({ binary => 1 });
2988 open my $fh, "<", $file or die "$file: $!";
2989 while (my $row = $csv->getline ($fh)) {
2990 my @fields = @$row;
2991 }
2992
2993The old(er) way of using global file handles is still supported
2994
2995 while (my $row = $csv->getline (*ARGV)) { ... }
2996
2997=head2 Unicode
2998
2999Unicode is only tested to work with perl-5.8.2 and up.
3000
3001See also L</BOM>.
3002
3003The simplest way to ensure the correct encoding is used for in- and output
3004is by either setting layers on the filehandles, or setting the L</encoding>
3005argument for L</csv>.
3006
3007 open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
3008or
3009 my $aoa = csv (in => "in.csv", encoding => "UTF-8");
3010
3011 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
3012or
3013 csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
3014
3015On parsing (both for L</getline> and L</parse>), if the source is marked
3016being UTF8, then all fields that are marked binary will also be marked UTF8.
3017
3018On combining (L</print> and L</combine>): if any of the combining fields
3019was marked UTF8, the resulting string will be marked as UTF8. Note however
3020that all fields I<before> the first field marked UTF8 and contained 8-bit
3021characters that were not upgraded to UTF8, these will be C<bytes> in the
3022resulting string too, possibly causing unexpected errors. If you pass data
3023of different encoding, or you don't know if there is different encoding,
3024force it to be upgraded before you pass them on:
3025
3026 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
3027
3028For complete control over encoding, please use L<Text::CSV::Encoded>:
3029
3030 use Text::CSV::Encoded;
3031 my $csv = Text::CSV::Encoded->new ({
3032 encoding_in => "iso-8859-1", # the encoding comes into Perl
3033 encoding_out => "cp1252", # the encoding comes out of Perl
3034 });
3035
3036 $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
3037 # combine () and print () accept *literally* utf8 encoded data
3038 # parse () and getline () return *literally* utf8 encoded data
3039
3040 $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
3041 # combine () and print () accept UTF8 marked data
3042 # parse () and getline () return UTF8 marked data
3043
3044=head2 BOM
3045
3046BOM (or Byte Order Mark) handling is available only inside the L</header>
3047method. This method supports the following encodings: C<utf-8>, C<utf-1>,
3048C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
3049C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>.
3050
3051If a file has a BOM, the easiest way to deal with that is
3052
3053 my $aoh = csv (in => $file, detect_bom => 1);
3054
3055All records will be encoded based on the detected BOM.
3056
3057This implies a call to the L</header> method, which defaults to also set
3058the L</column_names>. So this is B<not> the same as
3059
3060 my $aoh = csv (in => $file, headers => "auto");
3061
3062which only reads the first record to set L</column_names> but ignores any
3063meaning of possible present BOM.
3064
3065=head1 METHODS
3066
3067This section is also taken from Text::CSV_XS.
3068
3069=head2 version
3070
3071(Class method) Returns the current module version.
3072
3073=head2 new
3074
3075(Class method) Returns a new instance of class Text::CSV_PP. The attributes
3076are described by the (optional) hash ref C<\%attr>.
3077
3078 my $csv = Text::CSV_PP->new ({ attributes ... });
3079
3080The following attributes are available:
3081
3082=head3 eol
3083
3084 my $csv = Text::CSV_PP->new ({ eol => $/ });
3085 $csv->eol (undef);
3086 my $eol = $csv->eol;
3087
3088The end-of-line string to add to rows for L</print> or the record separator
3089for L</getline>.
3090
3091When not passed in a B<parser> instance, the default behavior is to accept
3092C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
3093all. Passing C<undef> or the empty string behave the same.
3094
3095When not passed in a B<generating> instance, records are not terminated at
3096all, so it is probably wise to pass something you expect. A safe choice for
3097C<eol> on output is either C<$/> or C<\r\n>.
3098
3099Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
3100(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
3101Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
3102
3103If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
3104only a Carriage Return without Line Feed, will be L</parse>d correct.
3105
3106=head3 sep_char
3107
3108 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
3109 $csv->sep_char (";");
3110 my $c = $csv->sep_char;
3111
3112The char used to separate fields, by default a comma. (C<,>). Limited to a
3113single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
3114(tilde). When longer sequences are required, use L<C<sep>|/sep>.
3115
3116The separation character can not be equal to the quote character or to the
3117escape character.
3118
3119=head3 sep
3120
3121 my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
3122 $csv->sep (";");
3123 my $sep = $csv->sep;
3124
3125The chars used to separate fields, by default undefined. Limited to 8 bytes.
3126
3127When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
3128acts as an alias to L<C<sep_char>|/sep_char>.
3129
3130=head3 quote_char
3131
3132 my $csv = Text::CSV_PP->new ({ quote_char => "'" });
3133 $csv->quote_char (undef);
3134 my $c = $csv->quote_char;
3135
3136The character to quote fields containing blanks or binary data, by default
3137the double quote character (C<">). A value of undef suppresses quote chars
3138(for simple cases only). Limited to a single-byte character, usually in the
3139range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
3140required, use L<C<quote>|/quote>.
3141
3142C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
3143
3144=head3 quote
3145
3146 my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
3147 $csv->quote ("'");
3148 my $quote = $csv->quote;
3149
3150The chars used to quote fields, by default undefined. Limited to 8 bytes.
3151
3152When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
3153it acts as an alias to L<C<quote_char>|/quote_char>.
3154
3155=head3 escape_char
3156
3157 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3158 $csv->escape_char (":");
3159 my $c = $csv->escape_char;
3160
3161The character to escape certain characters inside quoted fields. This is
3162limited to a single-byte character, usually in the range from C<0x20>
3163(space) to C<0x7E> (tilde).
3164
3165The C<escape_char> defaults to being the double-quote mark (C<">). In other
3166words the same as the default L<C<quote_char>|/quote_char>. This means that
3167doubling the quote mark in a field escapes it:
3168
3169 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
3170
3171If you change the L<C<quote_char>|/quote_char> without changing the
3172C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
3173If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
3174it you will need to also change the C<escape_char> to be the same as what
3175you have changed the L<C<quote_char>|/quote_char> to.
3176
3177Setting C<escape_char> to <undef> or C<""> will disable escaping completely
3178and is greatly discouraged. This will also disable C<escape_null>.
3179
3180The escape character can not be equal to the separation character.
3181
3182=head3 binary
3183
3184 my $csv = Text::CSV_PP->new ({ binary => 1 });
3185 $csv->binary (0);
3186 my $f = $csv->binary;
3187
3188If this attribute is C<1>, you may use binary characters in quoted fields,
3189including line feeds, carriage returns and C<NULL> bytes. (The latter could
3190be escaped as C<"0>.) By default this feature is off.
3191
3192If a string is marked UTF8, C<binary> will be turned on automatically when
3193binary characters other than C<CR> and C<NL> are encountered. Note that a
3194simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
3195so setting C<< { binary => 1 } >> is still a wise option.
3196
3197=head3 strict
3198
3199 my $csv = Text::CSV_PP->new ({ strict => 1 });
3200 $csv->strict (0);
3201 my $f = $csv->strict;
3202
3203If this attribute is set to C<1>, any row that parses to a different number
3204of fields than the previous row will cause the parser to throw error 2014.
3205
3206=head3 formula_handling
3207
3208=head3 formula
3209
3210 my $csv = Text::CSV_PP->new ({ formula => "none" });
3211 $csv->formula ("none");
3212 my $f = $csv->formula;
3213
3214This defines the behavior of fields containing I<formulas>. As formulas are
3215considered dangerous in spreadsheets, this attribute can define an optional
3216action to be taken if a field starts with an equal sign (C<=>).
3217
3218For purpose of code-readability, this can also be written as
3219
3220 my $csv = Text::CSV_PP->new ({ formula_handling => "none" });
3221 $csv->formula_handling ("none");
3222 my $f = $csv->formula_handling;
3223
3224Possible values for this attribute are
3225
3226=over 2
3227
3228=item none
3229
3230Take no specific action. This is the default.
3231
3232 $csv->formula ("none");
3233
3234=item die
3235
3236Cause the process to C<die> whenever a leading C<=> is encountered.
3237
3238 $csv->formula ("die");
3239
3240=item croak
3241
3242Cause the process to C<croak> whenever a leading C<=> is encountered. (See
3243L<Carp>)
3244
3245 $csv->formula ("croak");
3246
3247=item diag
3248
3249Report position and content of the field whenever a leading C<=> is found.
3250The value of the field is unchanged.
3251
3252 $csv->formula ("diag");
3253
3254=item empty
3255
3256Replace the content of fields that start with a C<=> with the empty string.
3257
3258 $csv->formula ("empty");
3259 $csv->formula ("");
3260
3261=item undef
3262
3263Replace the content of fields that start with a C<=> with C<undef>.
3264
3265 $csv->formula ("undef");
3266 $csv->formula (undef);
3267
3268=back
3269
3270All other values will give a warning and then fallback to C<diag>.
3271
3272=head3 decode_utf8
3273
3274 my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
3275 $csv->decode_utf8 (0);
3276 my $f = $csv->decode_utf8;
3277
3278This attributes defaults to TRUE.
3279
3280While I<parsing>, fields that are valid UTF-8, are automatically set to be
3281UTF-8, so that
3282
3283 $csv->parse ("\xC4\xA8\n");
3284
3285results in
3286
3287 PV("\304\250"\0) [UTF8 "\x{128}"]
3288
3289Sometimes it might not be a desired action. To prevent those upgrades, set
3290this attribute to false, and the result will be
3291
3292 PV("\304\250"\0)
3293
3294=head3 auto_diag
3295
3296 my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
3297 $csv->auto_diag (2);
3298 my $l = $csv->auto_diag;
3299
3300Set this attribute to a number between C<1> and C<9> causes L</error_diag>
3301to be automatically called in void context upon errors.
3302
3303In case of error C<2012 - EOF>, this call will be void.
3304
3305If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
3306on errors instead of C<warn>. If set to anything unrecognized, it will be
3307silently ignored.
3308
3309Future extensions to this feature will include more reliable auto-detection
3310of C<autodie> being active in the scope of which the error occurred which
3311will increment the value of C<auto_diag> with C<1> the moment the error is
3312detected.
3313
3314=head3 diag_verbose
3315
3316 my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
3317 $csv->diag_verbose (2);
3318 my $l = $csv->diag_verbose;
3319
3320Set the verbosity of the output triggered by C<auto_diag>. Currently only
3321adds the current input-record-number (if known) to the diagnostic output
3322with an indication of the position of the error.
3323
3324=head3 blank_is_undef
3325
3326 my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
3327 $csv->blank_is_undef (0);
3328 my $f = $csv->blank_is_undef;
3329
3330Under normal circumstances, C<CSV> data makes no distinction between quoted-
3331and unquoted empty fields. These both end up in an empty string field once
3332read, thus
3333
3334 1,"",," ",2
3335
3336is read as
3337
3338 ("1", "", "", " ", "2")
3339
3340When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote>
3341or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the
3342result of an undefined value. To enable this distinction when I<reading>
3343C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
3344fields to be set to C<undef>, causing the above to be parsed as
3345
3346 ("1", "", undef, " ", "2")
3347
3348note that this is specifically important when loading C<CSV> fields into a
3349database that allows C<NULL> values, as the perl equivalent for C<NULL> is
3350C<undef> in L<DBI> land.
3351
3352=head3 empty_is_undef
3353
3354 my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
3355 $csv->empty_is_undef (0);
3356 my $f = $csv->empty_is_undef;
3357
3358Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
3359attribute converts all empty fields to C<undef>, so
3360
3361 1,"",," ",2
3362
3363is read as
3364
3365 (1, undef, undef, " ", 2)
3366
3367Note that this effects only fields that are originally empty, not fields
3368that are empty after stripping allowed whitespace. YMMV.
3369
3370=head3 allow_whitespace
3371
3372 my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
3373 $csv->allow_whitespace (0);
3374 my $f = $csv->allow_whitespace;
3375
3376When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
3377surrounding the separation character is removed when parsing. If either
3378C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
3379L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
3380be considered whitespace.
3381
3382Now lines like:
3383
3384 1 , "foo" , bar , 3 , zapp
3385
3386are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
3387
3388Note that B<all> whitespace is stripped from both start and end of each
3389field. That would make it I<more> than a I<feature> to enable parsing bad
3390C<CSV> lines, as
3391
3392 1, 2.0, 3, ape , monkey
3393
3394will now be parsed as
3395
3396 ("1", "2.0", "3", "ape", "monkey")
3397
3398even if the original line was perfectly acceptable C<CSV>.
3399
3400=head3 allow_loose_quotes
3401
3402 my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
3403 $csv->allow_loose_quotes (0);
3404 my $f = $csv->allow_loose_quotes;
3405
3406By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
3407characters like
3408
3409 1,foo "bar" baz,42
3410
3411would result in parse error 2034. Though it is still bad practice to allow
3412this format, we cannot help the fact that some vendors make their
3413applications spit out lines styled this way.
3414
3415If there is B<really> bad C<CSV> data, like
3416
3417 1,"foo "bar" baz",42
3418
3419or
3420
3421 1,""foo bar baz"",42
3422
3423there is a way to get this data-line parsed and leave the quotes inside the
3424quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
3425B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
3426to L<C<quote_char>|/quote_char>.
3427
3428=head3 allow_loose_escapes
3429
3430 my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
3431 $csv->allow_loose_escapes (0);
3432 my $f = $csv->allow_loose_escapes;
3433
3434Parsing fields that have L<C<escape_char>|/escape_char> characters that
3435escape characters that do not need to be escaped, like:
3436
3437 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3438 $csv->parse (qq{1,"my bar\'s",baz,42});
3439
3440would result in parse error 2025. Though it is bad practice to allow this
3441format, this attribute enables you to treat all escape character sequences
3442equal.
3443
3444=head3 allow_unquoted_escape
3445
3446 my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
3447 $csv->allow_unquoted_escape (0);
3448 my $f = $csv->allow_unquoted_escape;
3449
3450A backward compatibility issue where L<C<escape_char>|/escape_char> differs
3451from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
3452to be in the first position of a field. If L<C<quote_char>|/quote_char> is
3453equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
3454this would be illegal:
3455
3456 1,\0,2
3457
3458Setting this attribute to C<1> might help to overcome issues with backward
3459compatibility and allow this style.
3460
3461=head3 always_quote
3462
3463 my $csv = Text::CSV_PP->new ({ always_quote => 1 });
3464 $csv->always_quote (0);
3465 my $f = $csv->always_quote;
3466
3467By default the generated fields are quoted only if they I<need> to be. For
3468example, if they contain the separator character. If you set this attribute
3469to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
3470quoted, see L</blank_is_undef>). This makes it quite often easier to handle
3471exported data in external applications.
3472
3473=head3 quote_space
3474
3475 my $csv = Text::CSV_PP->new ({ quote_space => 1 });
3476 $csv->quote_space (0);
3477 my $f = $csv->quote_space;
3478
3479By default, a space in a field would trigger quotation. As no rule exists
3480this to be forced in C<CSV>, nor any for the opposite, the default is true
3481for safety. You can exclude the space from this trigger by setting this
3482attribute to 0.
3483
3484=head3 quote_empty
3485
3486 my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
3487 $csv->quote_empty (0);
3488 my $f = $csv->quote_empty;
3489
3490By default the generated fields are quoted only if they I<need> to be. An
3491empty (defined) field does not need quotation. If you set this attribute to
3492C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
3493quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
3494
3495=head3 quote_binary
3496
3497 my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
3498 $csv->quote_binary (0);
3499 my $f = $csv->quote_binary;
3500
3501By default, all "unsafe" bytes inside a string cause the combined field to
3502be quoted. By setting this attribute to C<0>, you can disable that trigger
3503for bytes >= C<0x7F>.
3504
3505=head3 escape_null
3506
3507 my $csv = Text::CSV_PP->new ({ escape_null => 1 });
3508 $csv->escape_null (0);
3509 my $f = $csv->escape_null;
3510
3511By default, a C<NULL> byte in a field would be escaped. This option enables
3512you to treat the C<NULL> byte as a simple binary character in binary mode
3513(the C<< { binary => 1 } >> is set). The default is true. You can prevent
3514C<NULL> escapes by setting this attribute to C<0>.
3515
3516When the C<escape_char> attribute is set to undefined, this attribute will
3517be set to false.
3518
3519The default setting will encode "=\x00=" as
3520
3521 "="0="
3522
3523With C<escape_null> set, this will result in
3524
3525 "=\x00="
3526
3527The default when using the C<csv> function is C<false>.
3528
3529For backward compatibility reasons, the deprecated old name C<quote_null>
3530is still recognized.
3531
3532=head3 keep_meta_info
3533
3534 my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
3535 $csv->keep_meta_info (0);
3536 my $f = $csv->keep_meta_info;
3537
3538By default, the parsing of input records is as simple and fast as possible.
3539However, some parsing information - like quotation of the original field -
3540is lost in that process. Setting this flag to true enables retrieving that
3541information after parsing with the methods L</meta_info>, L</is_quoted>,
3542and L</is_binary> described below. Default is false for performance.
3543
3544If you set this attribute to a value greater than 9, than you can control
3545output quotation style like it was used in the input of the the last parsed
3546record (unless quotation was added because of other reasons).
3547
3548 my $csv = Text::CSV_PP->new ({
3549 binary => 1,
3550 keep_meta_info => 1,
3551 quote_space => 0,
3552 });
3553
3554 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
3555
3556 $csv->print (*STDOUT, \@row);
3557 # 1,,, , ,f,g,"h""h",help,help
3558 $csv->keep_meta_info (11);
3559 $csv->print (*STDOUT, \@row);
3560 # 1,,"", ," ",f,"g","h""h",help,"help"
3561
3562=head3 undef_str
3563
3564 my $csv = Text::CSV_PP->new ({ undef_str => "\\N" });
3565 $csv->undef_str (undef);
3566 my $s = $csv->undef_str;
3567
3568This attribute optionally defines the output of undefined fields. The value
3569passed is not changed at all, so if it needs quotation, the quotation needs
3570to be included in the value of the attribute. Use with caution, as passing
3571a value like C<",",,,,"""> will for sure mess up your output. The default
3572for this attribute is C<undef>, meaning no special treatment.
3573
3574This attribute is useful when exporting CSV data to be imported in custom
3575loaders, like for MySQL, that recognize special sequences for C<NULL> data.
3576
3577This attribute has no meaning when parsing CSV data.
3578
3579=head3 verbatim
3580
3581 my $csv = Text::CSV_PP->new ({ verbatim => 1 });
3582 $csv->verbatim (0);
3583 my $f = $csv->verbatim;
3584
3585This is a quite controversial attribute to set, but makes some hard things
3586possible.
3587
3588The rationale behind this attribute is to tell the parser that the normally
3589special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
3590special when this flag is set, and be dealt with as being ordinary binary
3591characters. This will ease working with data with embedded newlines.
3592
3593When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
3594every line.
3595
3596Imagine a file format like
3597
3598 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
3599
3600where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
3601C<^> (caret). None of the fields is quoted, but embedded binary data is
3602likely to be present. With the specific line ending, this should not be too
3603hard to detect.
3604
3605By default, Text::CSV_PP' parse function is instructed to only know about
3606C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
3607embedded newline as a real C<end-of-line>, so it can scan the next line if
3608binary is true, and the newline is inside a quoted field. With this option,
3609we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
3610a binary character.
3611
3612For L</parse> this means that the parser has no more idea about line ending
3613and L</getline> C<chomp>s line endings on reading.
3614
3615=head3 types
3616
3617A set of column types; the attribute is immediately passed to the L</types>
3618method.
3619
3620=head3 callbacks
3621
3622See the L</Callbacks> section below.
3623
3624=head3 accessors
3625
3626To sum it up,
3627
3628 $csv = Text::CSV_PP->new ();
3629
3630is equivalent to
3631
3632 $csv = Text::CSV_PP->new ({
3633 eol => undef, # \r, \n, or \r\n
3634 sep_char => ',',
3635 sep => undef,
3636 quote_char => '"',
3637 quote => undef,
3638 escape_char => '"',
3639 binary => 0,
3640 decode_utf8 => 1,
3641 auto_diag => 0,
3642 diag_verbose => 0,
3643 blank_is_undef => 0,
3644 empty_is_undef => 0,
3645 allow_whitespace => 0,
3646 allow_loose_quotes => 0,
3647 allow_loose_escapes => 0,
3648 allow_unquoted_escape => 0,
3649 always_quote => 0,
3650 quote_empty => 0,
3651 quote_space => 1,
3652 escape_null => 1,
3653 quote_binary => 1,
3654 keep_meta_info => 0,
3655 strict => 0,
3656 formula => 0,
3657 verbatim => 0,
3658 undef_str => undef,
3659 types => undef,
3660 callbacks => undef,
3661 });
3662
3663For all of the above mentioned flags, an accessor method is available where
3664you can inquire the current value, or change the value
3665
3666 my $quote = $csv->quote_char;
3667 $csv->binary (1);
3668
3669It is not wise to change these settings halfway through writing C<CSV> data
3670to a stream. If however you want to create a new stream using the available
3671C<CSV> object, there is no harm in changing them.
3672
3673If the L</new> constructor call fails, it returns C<undef>, and makes the
3674fail reason available through the L</error_diag> method.
3675
3676 $csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
3677 die "".Text::CSV_PP->error_diag ();
3678
3679L</error_diag> will return a string like
3680
3681 "INI - Unknown attribute 'ecs_char'"
3682
3683=head2 known_attributes
3684
3685 @attr = Text::CSV_PP->known_attributes;
3686 @attr = Text::CSV_PP::known_attributes;
3687 @attr = $csv->known_attributes;
3688
3689This method will return an ordered list of all the supported attributes as
3690described above. This can be useful for knowing what attributes are valid
3691in classes that use or extend Text::CSV_PP.
3692
3693=head2 print
3694
3695 $status = $csv->print ($fh, $colref);
3696
3697Similar to L</combine> + L</string> + L</print>, but much more efficient.
3698It expects an array ref as input (not an array!) and the resulting string
3699is not really created, but immediately written to the C<$fh> object,
3700typically an IO handle or any other object that offers a L</print> method.
3701
3702For performance reasons C<print> does not create a result string, so all
3703L</string>, L</status>, L</fields>, and L</error_input> methods will return
3704undefined information after executing this method.
3705
3706If C<$colref> is C<undef> (explicit, not through a variable argument) and
3707L</bind_columns> was used to specify fields to be printed, it is possible
3708to make performance improvements, as otherwise data would have to be copied
3709as arguments to the method call:
3710
3711 $csv->bind_columns (\($foo, $bar));
3712 $status = $csv->print ($fh, undef);
3713
3714A short benchmark
3715
3716 my @data = ("aa" .. "zz");
3717 $csv->bind_columns (\(@data));
3718
3719 $csv->print ($fh, [ @data ]); # 11800 recs/sec
3720 $csv->print ($fh, \@data ); # 57600 recs/sec
3721 $csv->print ($fh, undef ); # 48500 recs/sec
3722
3723=head2 say
3724
3725 $status = $csv->say ($fh, $colref);
3726
3727Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
3728
3729=head2 print_hr
3730
3731 $csv->print_hr ($fh, $ref);
3732
3733Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
3734provided the column names are set with L</column_names>.
3735
3736It is just a wrapper method with basic parameter checks over
3737
3738 $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
3739
3740=head2 combine
3741
3742 $status = $csv->combine (@fields);
3743
3744This method constructs a C<CSV> record from C<@fields>, returning success
3745or failure. Failure can result from lack of arguments or an argument that
3746contains an invalid character. Upon success, L</string> can be called to
3747retrieve the resultant C<CSV> string. Upon failure, the value returned by
3748L</string> is undefined and L</error_input> could be called to retrieve the
3749invalid argument.
3750
3751=head2 string
3752
3753 $line = $csv->string ();
3754
3755This method returns the input to L</parse> or the resultant C<CSV> string
3756of L</combine>, whichever was called more recently.
3757
3758=head2 getline
3759
3760 $colref = $csv->getline ($fh);
3761
3762This is the counterpart to L</print>, as L</parse> is the counterpart to
3763L</combine>: it parses a row from the C<$fh> handle using the L</getline>
3764method associated with C<$fh> and parses this row into an array ref. This
3765array ref is returned by the function or C<undef> for failure. When C<$fh>
3766does not support C<getline>, you are likely to hit errors.
3767
3768When fields are bound with L</bind_columns> the return value is a reference
3769to an empty list.
3770
3771The L</string>, L</fields>, and L</status> methods are meaningless again.
3772
3773=head2 getline_all
3774
3775 $arrayref = $csv->getline_all ($fh);
3776 $arrayref = $csv->getline_all ($fh, $offset);
3777 $arrayref = $csv->getline_all ($fh, $offset, $length);
3778
3779This will return a reference to a list of L<getline ($fh)|/getline> results.
3780In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
3781with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
3782into consideration.
3783
3784Given a CSV file with 10 lines:
3785
3786 lines call
3787 ----- ---------------------------------------------------------
3788 0..9 $csv->getline_all ($fh) # all
3789 0..9 $csv->getline_all ($fh, 0) # all
3790 8..9 $csv->getline_all ($fh, 8) # start at 8
3791 - $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
3792 0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
3793 4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
3794 8..9 $csv->getline_all ($fh, -2) # last 2 rows
3795 6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
3796
3797=head2 getline_hr
3798
3799The L</getline_hr> and L</column_names> methods work together to allow you
3800to have rows returned as hashrefs. You must call L</column_names> first to
3801declare your column names.
3802
3803 $csv->column_names (qw( code name price description ));
3804 $hr = $csv->getline_hr ($fh);
3805 print "Price for $hr->{name} is $hr->{price} EUR\n";
3806
3807L</getline_hr> will croak if called before L</column_names>.
3808
3809Note that L</getline_hr> creates a hashref for every row and will be much
3810slower than the combined use of L</bind_columns> and L</getline> but still
3811offering the same ease of use hashref inside the loop:
3812
3813 my @cols = @{$csv->getline ($fh)};
3814 $csv->column_names (@cols);
3815 while (my $row = $csv->getline_hr ($fh)) {
3816 print $row->{price};
3817 }
3818
3819Could easily be rewritten to the much faster:
3820
3821 my @cols = @{$csv->getline ($fh)};
3822 my $row = {};
3823 $csv->bind_columns (\@{$row}{@cols});
3824 while ($csv->getline ($fh)) {
3825 print $row->{price};
3826 }
3827
3828Your mileage may vary for the size of the data and the number of rows. With
3829perl-5.14.2 the comparison for a 100_000 line file with 14 rows:
3830
3831 Rate hashrefs getlines
3832 hashrefs 1.00/s -- -76%
3833 getlines 4.15/s 313% --
3834
3835=head2 getline_hr_all
3836
3837 $arrayref = $csv->getline_hr_all ($fh);
3838 $arrayref = $csv->getline_hr_all ($fh, $offset);
3839 $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
3840
3841This will return a reference to a list of L<getline_hr ($fh)|/getline_hr>
3842results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
3843
3844=head2 parse
3845
3846 $status = $csv->parse ($line);
3847
3848This method decomposes a C<CSV> string into fields, returning success or
3849failure. Failure can result from a lack of argument or the given C<CSV>
3850string is improperly formatted. Upon success, L</fields> can be called to
3851retrieve the decomposed fields. Upon failure calling L</fields> will return
3852undefined data and L</error_input> can be called to retrieve the invalid
3853argument.
3854
3855You may use the L</types> method for setting column types. See L</types>'
3856description below.
3857
3858The C<$line> argument is supposed to be a simple scalar. Everything else is
3859supposed to croak and set error 1500.
3860
3861=head2 fragment
3862
3863This function tries to implement RFC7111 (URI Fragment Identifiers for the
3864text/csv Media Type) - http://tools.ietf.org/html/rfc7111
3865
3866 my $AoA = $csv->fragment ($fh, $spec);
3867
3868In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
3869to indicate a range. All indices are C<1>-based: the first row or column
3870has index C<1>. Selections can be combined with the semi-colon (C<;>).
3871
3872When using this method in combination with L</column_names>, the returned
3873reference will point to a list of hashes instead of a list of lists. A
3874disjointed cell-based combined selection might return rows with different
3875number of columns making the use of hashes unpredictable.
3876
3877 $csv->column_names ("Name", "Age");
3878 my $AoH = $csv->fragment ($fh, "col=3;8");
3879
3880If the L</after_parse> callback is active, it is also called on every line
3881parsed and skipped before the fragment.
3882
3883=over 2
3884
3885=item row
3886
3887 row=4
3888 row=5-7
3889 row=6-*
3890 row=1-2;4;6-*
3891
3892=item col
3893
3894 col=2
3895 col=1-3
3896 col=4-*
3897 col=1-2;4;7-*
3898
3899=item cell
3900
3901In cell-based selection, the comma (C<,>) is used to pair row and column
3902
3903 cell=4,1
3904
3905The range operator (C<->) using C<cell>s can be used to define top-left and
3906bottom-right C<cell> location
3907
3908 cell=3,1-4,6
3909
3910The C<*> is only allowed in the second part of a pair
3911
3912 cell=3,2-*,2 # row 3 till end, only column 2
3913 cell=3,2-3,* # column 2 till end, only row 3
3914 cell=3,2-*,* # strip row 1 and 2, and column 1
3915
3916Cells and cell ranges may be combined with C<;>, possibly resulting in rows
3917with different number of columns
3918
3919 cell=1,1-2,2;3,3-4,4;1,4;4,1
3920
3921Disjointed selections will only return selected cells. The cells that are
3922not specified will not be included in the returned set, not even as
3923C<undef>. As an example given a C<CSV> like
3924
3925 11,12,13,...19
3926 21,22,...28,29
3927 : :
3928 91,...97,98,99
3929
3930with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
3931
3932 11,12,14
3933 21,22
3934 33,34
3935 41,43,44
3936
3937Overlapping cell-specs will return those cells only once, So
3938C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
3939
3940 11,12,13
3941 21,22,23,24
3942 31,32,33,34
3943 42,43,44
3944
3945=back
3946
3947L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different
3948types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
3949Passing an invalid fragment specification will croak and set error 2013.
3950
3951=head2 column_names
3952
3953Set the "keys" that will be used in the L</getline_hr> calls. If no keys
3954(column names) are passed, it will return the current setting as a list.
3955
3956L</column_names> accepts a list of scalars (the column names) or a single
3957array_ref, so you can pass the return value from L</getline> too:
3958
3959 $csv->column_names ($csv->getline ($fh));
3960
3961L</column_names> does B<no> checking on duplicates at all, which might lead
3962to unexpected results. Undefined entries will be replaced with the string
3963C<"\cAUNDEF\cA">, so
3964
3965 $csv->column_names (undef, "", "name", "name");
3966 $hr = $csv->getline_hr ($fh);
3967
3968Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
3969the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
3970field.
3971
3972L</column_names> croaks on invalid arguments.
3973
3974=head2 header
3975
3976This method does NOT work in perl-5.6.x
3977
3978Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
3979
3980 my @hdr = $csv->header ($fh);
3981 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3982 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3983
3984The first argument should be a file handle.
3985
3986This method resets some object properties, as it is supposed to be invoked
3987only once per file or stream. It will leave attributes C<column_names> and
3988C<bound_columns> alone of setting column names is disabled. Reading headers
3989on previously process objects might fail on perl-5.8.0 and older.
3990
3991Assuming that the file opened for parsing has a header, and the header does
3992not contain problematic characters like embedded newlines, read the first
3993line from the open handle then auto-detect whether the header separates the
3994column names with a character from the allowed separator list.
3995
3996If any of the allowed separators matches, and none of the I<other> allowed
3997separators match, set L<C<sep>|/sep> to that separator for the current
3998CSV_PP instance and use it to parse the first line, map those to lowercase,
3999and use that to set the instance L</column_names>:
4000
4001 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4002 open my $fh, "<", "file.csv";
4003 binmode $fh; # for Windows
4004 $csv->header ($fh);
4005 while (my $row = $csv->getline_hr ($fh)) {
4006 ...
4007 }
4008
4009If the header is empty, contains more than one unique separator out of the
4010allowed set, contains empty fields, or contains identical fields (after
4011folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
4012
4013If the header contains embedded newlines or is not valid CSV in any other
4014way, this method will croak and leave the parse error untouched.
4015
4016A successful call to C<header> will always set the L<C<sep>|/sep> of the
4017C<$csv> object. This behavior can not be disabled.
4018
4019=head3 return value
4020
4021On error this method will croak.
4022
4023In list context, the headers will be returned whether they are used to set
4024L</column_names> or not.
4025
4026In scalar context, the instance itself is returned. B<Note>: the values as
4027found in the header will effectively be B<lost> if C<set_column_names> is
4028false.
4029
4030=head3 Options
4031
4032=over 2
4033
4034=item sep_set
4035
4036 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
4037
4038The list of legal separators defaults to C<[ ";", "," ]> and can be changed
4039by this option. As this is probably the most often used option, it can be
4040passed on its own as an unnamed argument:
4041
4042 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
4043
4044Multi-byte sequences are allowed, both multi-character and Unicode. See
4045L<C<sep>|/sep>.
4046
4047=item detect_bom
4048
4049 $csv->header ($fh, { detect_bom => 1 });
4050
4051The default behavior is to detect if the header line starts with a BOM. If
4052the header has a BOM, use that to set the encoding of C<$fh>. This default
4053behavior can be disabled by passing a false value to C<detect_bom>.
4054
4055Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
4056UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
4057but L<Encode> does not (yet). UTF-7 is not supported.
4058
4059If a supported BOM was detected as start of the stream, it is stored in the
4060abject attribute C<ENCODING>.
4061
4062 my $enc = $csv->{ENCODING};
4063
4064The encoding is used with C<binmode> on C<$fh>.
4065
4066If the handle was opened in a (correct) encoding, this method will B<not>
4067alter the encoding, as it checks the leading B<bytes> of the first line. In
4068case the stream starts with a decode BOM (C<U+FEFF>), C<{ENCODING}> will be
4069C<""> (empty) instead of the default C<undef>.
4070
4071=item munge_column_names
4072
4073This option offers the means to modify the column names into something that
4074is most useful to the application. The default is to map all column names
4075to lower case.
4076
4077 $csv->header ($fh, { munge_column_names => "lc" });
4078
4079The following values are available:
4080
4081 lc - lower case
4082 uc - upper case
4083 none - do not change
4084 \%hash - supply a mapping
4085 \&cb - supply a callback
4086
4087Literal:
4088
4089 $csv->header ($fh, { munge_column_names => "none" });
4090
4091Hash:
4092
4093 $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
4094
4095if a value does not exist, the original value is used unchanged
4096
4097Callback:
4098
4099 $csv->header ($fh, { munge_column_names => sub { fc } });
4100 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
4101 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
4102
4103As this callback is called in a C<map>, you can use C<$_> directly.
4104
4105=item set_column_names
4106
4107 $csv->header ($fh, { set_column_names => 1 });
4108
4109The default is to set the instances column names using L</column_names> if
4110the method is successful, so subsequent calls to L</getline_hr> can return
4111a hash. Disable setting the header can be forced by using a false value for
4112this option.
4113
4114As described in L</return value> above, content is lost in scalar context.
4115
4116=back
4117
4118=head3 Validation
4119
4120When receiving CSV files from external sources, this method can be used to
4121protect against changes in the layout by restricting to known headers (and
4122typos in the header fields).
4123
4124 my %known = (
4125 "record key" => "c_rec",
4126 "rec id" => "c_rec",
4127 "id_rec" => "c_rec",
4128 "kode" => "code",
4129 "code" => "code",
4130 "vaule" => "value",
4131 "value" => "value",
4132 );
4133 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4134 open my $fh, "<", $source or die "$source: $!";
4135 $csv->header ($fh, { munge_column_names => sub {
4136 s/\s+$//;
4137 s/^\s+//;
4138 $known{lc $_} or die "Unknown column '$_' in $source";
4139 }});
4140 while (my $row = $csv->getline_hr ($fh)) {
4141 say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
4142 }
4143
4144=head2 bind_columns
4145
4146Takes a list of scalar references to be used for output with L</print> or
4147to store in the fields fetched by L</getline>. When you do not pass enough
4148references to store the fetched fields in, L</getline> will fail with error
4149C<3006>. If you pass more than there are fields to return, the content of
4150the remaining references is left untouched.
4151
4152 $csv->bind_columns (\$code, \$name, \$price, \$description);
4153 while ($csv->getline ($fh)) {
4154 print "The price of a $name is \x{20ac} $price\n";
4155 }
4156
4157To reset or clear all column binding, call L</bind_columns> with the single
4158argument C<undef>. This will also clear column names.
4159
4160 $csv->bind_columns (undef);
4161
4162If no arguments are passed at all, L</bind_columns> will return the list of
4163current bindings or C<undef> if no binds are active.
4164
4165Note that in parsing with C<bind_columns>, the fields are set on the fly.
4166That implies that if the third field of a row causes an error (or this row
4167has just two fields where the previous row had more), the first two fields
4168already have been assigned the values of the current row, while the rest of
4169the fields will still hold the values of the previous row. If you want the
4170parser to fail in these cases, use the L<C<strict>|/strict> attribute.
4171
4172=head2 eof
4173
4174 $eof = $csv->eof ();
4175
4176If L</parse> or L</getline> was used with an IO stream, this method will
4177return true (1) if the last call hit end of file, otherwise it will return
4178false (''). This is useful to see the difference between a failure and end
4179of file.
4180
4181Note that if the parsing of the last line caused an error, C<eof> is still
4182true. That means that if you are I<not> using L</auto_diag>, an idiom like
4183
4184 while (my $row = $csv->getline ($fh)) {
4185 # ...
4186 }
4187 $csv->eof or $csv->error_diag;
4188
4189will I<not> report the error. You would have to change that to
4190
4191 while (my $row = $csv->getline ($fh)) {
4192 # ...
4193 }
4194 +$csv->error_diag and $csv->error_diag;
4195
4196=head2 types
4197
4198 $csv->types (\@tref);
4199
4200This method is used to force that (all) columns are of a given type. For
4201example, if you have an integer column, two columns with doubles and a
4202string column, then you might do a
4203
4204 $csv->types ([Text::CSV_PP::IV (),
4205 Text::CSV_PP::NV (),
4206 Text::CSV_PP::NV (),
4207 Text::CSV_PP::PV ()]);
4208
4209Column types are used only for I<decoding> columns while parsing, in other
4210words by the L</parse> and L</getline> methods.
4211
4212You can unset column types by doing a
4213
4214 $csv->types (undef);
4215
4216or fetch the current type settings with
4217
4218 $types = $csv->types ();
4219
4220=over 4
4221
4222=item IV
4223
4224Set field type to integer.
4225
4226=item NV
4227
4228Set field type to numeric/float.
4229
4230=item PV
4231
4232Set field type to string.
4233
4234=back
4235
4236=head2 fields
4237
4238 @columns = $csv->fields ();
4239
4240This method returns the input to L</combine> or the resultant decomposed
4241fields of a successful L</parse>, whichever was called more recently.
4242
4243Note that the return value is undefined after using L</getline>, which does
4244not fill the data structures returned by L</parse>.
4245
4246=head2 meta_info
4247
4248 @flags = $csv->meta_info ();
4249
4250This method returns the "flags" of the input to L</combine> or the flags of
4251the resultant decomposed fields of L</parse>, whichever was called more
4252recently.
4253
4254For each field, a meta_info field will hold flags that inform something
4255about the field returned by the L</fields> method or passed to the
4256L</combine> method. The flags are bit-wise-C<or>'d like:
4257
4258=over 2
4259
4260=item C< >0x0001
4261
4262The field was quoted.
4263
4264=item C< >0x0002
4265
4266The field was binary.
4267
4268=back
4269
4270See the C<is_***> methods below.
4271
4272=head2 is_quoted
4273
4274 my $quoted = $csv->is_quoted ($column_idx);
4275
4276Where C<$column_idx> is the (zero-based) index of the column in the last
4277result of L</parse>.
4278
4279This returns a true value if the data in the indicated column was enclosed
4280in L<C<quote_char>|/quote_char> quotes. This might be important for fields
4281where content C<,20070108,> is to be treated as a numeric value, and where
4282C<,"20070108",> is explicitly marked as character string data.
4283
4284This method is only valid when L</keep_meta_info> is set to a true value.
4285
4286=head2 is_binary
4287
4288 my $binary = $csv->is_binary ($column_idx);
4289
4290Where C<$column_idx> is the (zero-based) index of the column in the last
4291result of L</parse>.
4292
4293This returns a true value if the data in the indicated column contained any
4294byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
4295
4296This method is only valid when L</keep_meta_info> is set to a true value.
4297
4298=head2 is_missing
4299
4300 my $missing = $csv->is_missing ($column_idx);
4301
4302Where C<$column_idx> is the (zero-based) index of the column in the last
4303result of L</getline_hr>.
4304
4305 $csv->keep_meta_info (1);
4306 while (my $hr = $csv->getline_hr ($fh)) {
4307 $csv->is_missing (0) and next; # This was an empty line
4308 }
4309
4310When using L</getline_hr>, it is impossible to tell if the parsed fields
4311are C<undef> because they where not filled in the C<CSV> stream or because
4312they were not read at all, as B<all> the fields defined by L</column_names>
4313are set in the hash-ref. If you still need to know if all fields in each
4314row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
4315you can check the flags.
4316
4317If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will
4318always return C<undef>, regardless of C<$column_idx> being valid or not. If
4319this attribute is C<true> it will return either C<0> (the field is present)
4320or C<1> (the field is missing).
4321
4322A special case is the empty line. If the line is completely empty - after
4323dealing with the flags - this is still a valid CSV line: it is a record of
4324just one single empty field. However, if C<keep_meta_info> is set, invoking
4325C<is_missing> with index C<0> will now return true.
4326
4327=head2 status
4328
4329 $status = $csv->status ();
4330
4331This method returns the status of the last invoked L</combine> or L</parse>
4332call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
4333
4334=head2 error_input
4335
4336 $bad_argument = $csv->error_input ();
4337
4338This method returns the erroneous argument (if it exists) of L</combine> or
4339L</parse>, whichever was called more recently. If the last invocation was
4340successful, C<error_input> will return C<undef>.
4341
4342=head2 error_diag
4343
4344 Text::CSV_PP->error_diag ();
4345 $csv->error_diag ();
4346 $error_code = 0 + $csv->error_diag ();
4347 $error_str = "" . $csv->error_diag ();
4348 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
4349
4350If (and only if) an error occurred, this function returns the diagnostics
4351of that error.
4352
4353If called in void context, this will print the internal error code and the
4354associated error message to STDERR.
4355
4356If called in list context, this will return the error code and the error
4357message in that order. If the last error was from parsing, the rest of the
4358values returned are a best guess at the location within the line that was
4359being parsed. Their values are 1-based. The position currently is index of
4360the byte at which the parsing failed in the current record. It might change
4361to be the index of the current character in a later release. The records is
4362the index of the record parsed by the csv instance. The field number is the
4363index of the field the parser thinks it is currently trying to parse. See
4364F<examples/csv-check> for how this can be used.
4365
4366If called in scalar context, it will return the diagnostics in a single
4367scalar, a-la C<$!>. It will contain the error code in numeric context, and
4368the diagnostics message in string context.
4369
4370When called as a class method or a direct function call, the diagnostics
4371are that of the last L</new> call.
4372
4373=head2 record_number
4374
4375 $recno = $csv->record_number ();
4376
4377Returns the records parsed by this csv instance. This value should be more
4378accurate than C<$.> when embedded newlines come in play. Records written by
4379this instance are not counted.
4380
4381=head2 SetDiag
4382
4383 $csv->SetDiag (0);
4384
4385Use to reset the diagnostics if you are dealing with errors.
4386
4387=head1 FUNCTIONS
4388
4389This section is also taken from Text::CSV_XS.
4390
4391=head2 csv
4392
4393This function is not exported by default and should be explicitly requested:
4394
4395 use Text::CSV_PP qw( csv );
4396
4397This is an high-level function that aims at simple (user) interfaces. This
4398can be used to read/parse a C<CSV> file or stream (the default behavior) or
4399to produce a file or write to a stream (define the C<out> attribute). It
4400returns an array- or hash-reference on parsing (or C<undef> on fail) or the
4401numeric value of L</error_diag> on writing. When this function fails you
4402can get to the error using the class call to L</error_diag>
4403
4404 my $aoa = csv (in => "test.csv") or
4405 die Text::CSV_PP->error_diag;
4406
4407This function takes the arguments as key-value pairs. This can be passed as
4408a list or as an anonymous hash:
4409
4410 my $aoa = csv ( in => "test.csv", sep_char => ";");
4411 my $aoh = csv ({ in => $fh, headers => "auto" });
4412
4413The arguments passed consist of two parts: the arguments to L</csv> itself
4414and the optional attributes to the C<CSV> object used inside the function
4415as enumerated and explained in L</new>.
4416
4417If not overridden, the default option used for CSV is
4418
4419 auto_diag => 1
4420 escape_null => 0
4421
4422The option that is always set and cannot be altered is
4423
4424 binary => 1
4425
4426As this function will likely be used in one-liners, it allows C<quote> to
4427be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
4428or C<escape>.
4429
4430Alternative invocations:
4431
4432 my $aoa = Text::CSV_PP::csv (in => "file.csv");
4433
4434 my $csv = Text::CSV_PP->new ();
4435 my $aoa = $csv->csv (in => "file.csv");
4436
4437In the latter case, the object attributes are used from the existing object
4438and the attribute arguments in the function call are ignored:
4439
4440 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
4441 my $aoh = $csv->csv (in => "file.csv", bom => 1);
4442
4443will parse using C<;> as C<sep_char>, not C<,>.
4444
4445=head3 in
4446
4447Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
4448which will be opened for reading and closed when finished, a file handle
4449(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
4450itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
4451
4452When used with L</out>, C<in> should be a reference to a CSV structure (AoA
4453or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
4454The code-ref will be invoked with no arguments.
4455
4456 my $aoa = csv (in => "file.csv");
4457
4458 open my $fh, "<", "file.csv";
4459 my $aoa = csv (in => $fh);
4460
4461 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
4462 my $err = csv (in => $csv, out => "file.csv");
4463
4464If called in void context without the L</out> attribute, the resulting ref
4465will be used as input to a subsequent call to csv:
4466
4467 csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
4468
4469will be a shortcut to
4470
4471 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
4472
4473where, in the absence of the C<out> attribute, this is a shortcut to
4474
4475 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
4476 out => *STDOUT)
4477
4478=head3 out
4479
4480 csv (in => $aoa, out => "file.csv");
4481 csv (in => $aoa, out => $fh);
4482 csv (in => $aoa, out => STDOUT);
4483 csv (in => $aoa, out => *STDOUT);
4484 csv (in => $aoa, out => \*STDOUT);
4485 csv (in => $aoa, out => \my $data);
4486 csv (in => $aoa, out => undef);
4487 csv (in => $aoa, out => \"skip");
4488
4489In output mode, the default CSV options when producing CSV are
4490
4491 eol => "\r\n"
4492
4493The L</fragment> attribute is ignored in output mode.
4494
4495C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
4496writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
4497reference to a glob (e.g. C<\*STDOUT>), the glob itself (e.g. C<*STDOUT>),
4498or a reference to a scalar (e.g. C<\my $data>).
4499
4500 csv (in => sub { $sth->fetch }, out => "dump.csv");
4501 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
4502 headers => $sth->{NAME_lc});
4503
4504When a code-ref is used for C<in>, the output is generated per invocation,
4505so no buffering is involved. This implies that there is no size restriction
4506on the number of records. The C<csv> function ends when the coderef returns
4507a false value.
4508
4509If C<out> is set to a reference of the literal string C<"skip">, the output
4510will be suppressed completely, which might be useful in combination with a
4511filter for side effects only.
4512
4513 my %cache;
4514 csv (in => "dump.csv",
4515 out => \"skip",
4516 on_in => sub { $cache{$_[1][1]}++ });
4517
4518Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
4519equivalent to C<\"skip">.
4520
4521=head3 encoding
4522
4523If passed, it should be an encoding accepted by the C<:encoding()> option
4524to C<open>. There is no default value. This attribute does not work in perl
45255.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
4526line invocations.
4527
4528If C<encoding> is set to the literal value C<"auto">, the method L</header>
4529will be invoked on the opened stream to check if there is a BOM and set the
4530encoding accordingly. This is equal to passing a true value in the option
4531L<C<detect_bom>|/detect_bom>.
4532
4533=head3 detect_bom
4534
4535If C<detect_bom> is given, the method L</header> will be invoked on the
4536opened stream to check if there is a BOM and set the encoding accordingly.
4537
4538C<detect_bom> can be abbreviated to C<bom>.
4539
4540This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
4541
4542Note that as the method L</header> is invoked, its default is to also set
4543the headers.
4544
4545=head3 headers
4546
4547If this attribute is not given, the default behavior is to produce an array
4548of arrays.
4549
4550If C<headers> is supplied, it should be an anonymous list of column names,
4551an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
4552or C<skip>.
4553
4554=over 2
4555
4556=item skip
4557
4558When C<skip> is used, the header will not be included in the output.
4559
4560 my $aoa = csv (in => $fh, headers => "skip");
4561
4562=item auto
4563
4564If C<auto> is used, the first line of the C<CSV> source will be read as the
4565list of field headers and used to produce an array of hashes.
4566
4567 my $aoh = csv (in => $fh, headers => "auto");
4568
4569=item lc
4570
4571If C<lc> is used, the first line of the C<CSV> source will be read as the
4572list of field headers mapped to lower case and used to produce an array of
4573hashes. This is a variation of C<auto>.
4574
4575 my $aoh = csv (in => $fh, headers => "lc");
4576
4577=item uc
4578
4579If C<uc> is used, the first line of the C<CSV> source will be read as the
4580list of field headers mapped to upper case and used to produce an array of
4581hashes. This is a variation of C<auto>.
4582
4583 my $aoh = csv (in => $fh, headers => "uc");
4584
4585=item CODE
4586
4587If a coderef is used, the first line of the C<CSV> source will be read as
4588the list of mangled field headers in which each field is passed as the only
4589argument to the coderef. This list is used to produce an array of hashes.
4590
4591 my $aoh = csv (in => $fh,
4592 headers => sub { lc ($_[0]) =~ s/kode/code/gr });
4593
4594this example is a variation of using C<lc> where all occurrences of C<kode>
4595are replaced with C<code>.
4596
4597=item ARRAY
4598
4599If C<headers> is an anonymous list, the entries in the list will be used
4600as field names. The first line is considered data instead of headers.
4601
4602 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
4603 csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
4604
4605=item HASH
4606
4607If C<headers> is an hash reference, this implies C<auto>, but header fields
4608for that exist as key in the hashref will be replaced by the value for that
4609key. Given a CSV file like
4610
4611 post-kode,city,name,id number,fubble
4612 1234AA,Duckstad,Donald,13,"X313DF"
4613
4614using
4615
4616 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
4617
4618will return an entry like
4619
4620 { pc => "1234AA",
4621 city => "Duckstad",
4622 name => "Donald",
4623 ID => "13",
4624 fubble => "X313DF",
4625 }
4626
4627=back
4628
4629See also L<C<munge_column_names>|/munge_column_names> and
4630L<C<set_column_names>|/set_column_names>.
4631
4632=head3 munge_column_names
4633
4634If C<munge_column_names> is set, the method L</header> is invoked on the
4635opened stream with all matching arguments to detect and set the headers.
4636
4637C<munge_column_names> can be abbreviated to C<munge>.
4638
4639=head3 key
4640
4641If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
4642hashref instead of an array of hashes. Allowed values are simple scalars or
4643array-references where the first element is the joiner and the rest are the
4644fields to join to combine the key.
4645
4646 my $ref = csv (in => "test.csv", key => "code");
4647 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
4648
4649with test.csv like
4650
4651 code,product,price,color
4652 1,pc,850,gray
4653 2,keyboard,12,white
4654 3,mouse,5,black
4655
4656the first example will return
4657
4658 { 1 => {
4659 code => 1,
4660 color => 'gray',
4661 price => 850,
4662 product => 'pc'
4663 },
4664 2 => {
4665 code => 2,
4666 color => 'white',
4667 price => 12,
4668 product => 'keyboard'
4669 },
4670 3 => {
4671 code => 3,
4672 color => 'black',
4673 price => 5,
4674 product => 'mouse'
4675 }
4676 }
4677
4678the second example will return
4679
4680 { "1:gray" => {
4681 code => 1,
4682 color => 'gray',
4683 price => 850,
4684 product => 'pc'
4685 },
4686 "2:white" => {
4687 code => 2,
4688 color => 'white',
4689 price => 12,
4690 product => 'keyboard'
4691 },
4692 "3:black" => {
4693 code => 3,
4694 color => 'black',
4695 price => 5,
4696 product => 'mouse'
4697 }
4698 }
4699
4700The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV>
4701date that has no header line, like
4702
4703 my $ref = csv (
4704 in => "foo.csv",
4705 headers => [qw( c_foo foo bar description stock )],
4706 key => "c_foo",
4707 );
4708
4709=head3 value
4710
4711Used to create key-value hashes.
4712
4713Only allowed when C<key> is valid. A C<value> can be either a single column
4714label or an anonymous list of column labels. In the first case, the value
4715will be a simple scalar value, in the latter case, it will be a hashref.
4716
4717 my $ref = csv (in => "test.csv", key => "code",
4718 value => "price");
4719 my $ref = csv (in => "test.csv", key => "code",
4720 value => [ "product", "price" ]);
4721 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
4722 value => "price");
4723 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
4724 value => [ "product", "price" ]);
4725
4726with test.csv like
4727
4728 code,product,price,color
4729 1,pc,850,gray
4730 2,keyboard,12,white
4731 3,mouse,5,black
4732
4733the first example will return
4734
4735 { 1 => 850,
4736 2 => 12,
4737 3 => 5,
4738 }
4739
4740the second example will return
4741
4742 { 1 => {
4743 price => 850,
4744 product => 'pc'
4745 },
4746 2 => {
4747 price => 12,
4748 product => 'keyboard'
4749 },
4750 3 => {
4751 price => 5,
4752 product => 'mouse'
4753 }
4754 }
4755
4756the third example will return
4757
4758 { "1:gray" => 850,
4759 "2:white" => 12,
4760 "3:black" => 5,
4761 }
4762
4763the fourth example will return
4764
4765 { "1:gray" => {
4766 price => 850,
4767 product => 'pc'
4768 },
4769 "2:white" => {
4770 price => 12,
4771 product => 'keyboard'
4772 },
4773 "3:black" => {
4774 price => 5,
4775 product => 'mouse'
4776 }
4777 }
4778
4779=head3 keep_headers
4780
4781When using hashes, keep the column names into the arrayref passed, so all
4782headers are available after the call in the original order.
4783
4784 my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
4785
4786This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
4787
4788This attribute implies a default of C<auto> for the C<headers> attribute.
4789
4790=head3 fragment
4791
4792Only output the fragment as defined in the L</fragment> method. This option
4793is ignored when I<generating> C<CSV>. See L</out>.
4794
4795Combining all of them could give something like
4796
4797 use Text::CSV_PP qw( csv );
4798 my $aoh = csv (
4799 in => "test.txt",
4800 encoding => "utf-8",
4801 headers => "auto",
4802 sep_char => "|",
4803 fragment => "row=3;6-9;15-*",
4804 );
4805 say $aoh->[15]{Foo};
4806
4807=head3 sep_set
4808
4809If C<sep_set> is set, the method L</header> is invoked on the opened stream
4810to detect and set L<C<sep_char>|/sep_char> with the given set.
4811
4812C<sep_set> can be abbreviated to C<seps>.
4813
4814Note that as the L</header> method is invoked, its default is to also set
4815the headers.
4816
4817=head3 set_column_names
4818
4819If C<set_column_names> is passed, the method L</header> is invoked on the
4820opened stream with all arguments meant for L</header>.
4821
4822If C<set_column_names> is passed as a false value, the content of the first
4823row is only preserved if the output is AoA:
4824
4825With an input-file like
4826
4827 bAr,foo
4828 1,2
4829 3,4,5
4830
4831This call
4832
4833 my $aoa = csv (in => $file, set_column_names => 0);
4834
4835will result in
4836
4837 [[ "bar", "foo" ],
4838 [ "1", "2" ],
4839 [ "3", "4", "5" ]]
4840
4841and
4842
4843 my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
4844
4845will result in
4846
4847 [[ "bAr", "foo" ],
4848 [ "1", "2" ],
4849 [ "3", "4", "5" ]]
4850
4851=head2 Callbacks
4852
4853Callbacks enable actions triggered from the I<inside> of Text::CSV_PP.
4854
4855While most of what this enables can easily be done in an unrolled loop as
4856described in the L</SYNOPSIS> callbacks can be used to meet special demands
4857or enhance the L</csv> function.
4858
4859=over 2
4860
4861=item error
4862
4863 $csv->callbacks (error => sub { $csv->SetDiag (0) });
4864
4865the C<error> callback is invoked when an error occurs, but I<only> when
4866L</auto_diag> is set to a true value. A callback is invoked with the values
4867returned by L</error_diag>:
4868
4869 my ($c, $s);
4870
4871 sub ignore3006
4872 {
4873 my ($err, $msg, $pos, $recno, $fldno) = @_;
4874 if ($err == 3006) {
4875 # ignore this error
4876 ($c, $s) = (undef, undef);
4877 Text::CSV_PP->SetDiag (0);
4878 }
4879 # Any other error
4880 return;
4881 } # ignore3006
4882
4883 $csv->callbacks (error => \&ignore3006);
4884 $csv->bind_columns (\$c, \$s);
4885 while ($csv->getline ($fh)) {
4886 # Error 3006 will not stop the loop
4887 }
4888
4889=item after_parse
4890
4891 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4892 while (my $row = $csv->getline ($fh)) {
4893 $row->[-1] eq "NEW";
4894 }
4895
4896This callback is invoked after parsing with L</getline> only if no error
4897occurred. The callback is invoked with two arguments: the current C<CSV>
4898parser object and an array reference to the fields parsed.
4899
4900The return code of the callback is ignored unless it is a reference to the
4901string "skip", in which case the record will be skipped in L</getline_all>.
4902
4903 sub add_from_db
4904 {
4905 my ($csv, $row) = @_;
4906 $sth->execute ($row->[4]);
4907 push @$row, $sth->fetchrow_array;
4908 } # add_from_db
4909
4910 my $aoa = csv (in => "file.csv", callbacks => {
4911 after_parse => \&add_from_db });
4912
4913This hook can be used for validation:
4914
4915=over 2
4916
4917=item FAIL
4918
4919Die if any of the records does not validate a rule:
4920
4921 after_parse => sub {
4922 $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4923 die "5th field does not have a valid Dutch zipcode";
4924 }
4925
4926=item DEFAULT
4927
4928Replace invalid fields with a default value:
4929
4930 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4931
4932=item SKIP
4933
4934Skip records that have invalid fields (only applies to L</getline_all>):
4935
4936 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4937
4938=back
4939
4940=item before_print
4941
4942 my $idx = 1;
4943 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4944 $csv->print (*STDOUT, [ 0, $_ ]) for @members;
4945
4946This callback is invoked before printing with L</print> only if no error
4947occurred. The callback is invoked with two arguments: the current C<CSV>
4948parser object and an array reference to the fields passed.
4949
4950The return code of the callback is ignored.
4951
4952 sub max_4_fields
4953 {
4954 my ($csv, $row) = @_;
4955 @$row > 4 and splice @$row, 4;
4956 } # max_4_fields
4957
4958 csv (in => csv (in => "file.csv"), out => *STDOUT,
4959 callbacks => { before print => \&max_4_fields });
4960
4961This callback is not active for L</combine>.
4962
4963=back
4964
4965=head3 Callbacks for csv ()
4966
4967The L</csv> allows for some callbacks that do not integrate in XS internals
4968but only feature the L</csv> function.
4969
4970 csv (in => "file.csv",
4971 callbacks => {
4972 filter => { 6 => sub { $_ > 15 } }, # first
4973 after_parse => sub { say "AFTER PARSE"; }, # first
4974 after_in => sub { say "AFTER IN"; }, # second
4975 on_in => sub { say "ON IN"; }, # third
4976 },
4977 );
4978
4979 csv (in => $aoh,
4980 out => "file.csv",
4981 callbacks => {
4982 on_in => sub { say "ON IN"; }, # first
4983 before_out => sub { say "BEFORE OUT"; }, # second
4984 before_print => sub { say "BEFORE PRINT"; }, # third
4985 },
4986 );
4987
4988=over 2
4989
4990=item filter
4991
4992This callback can be used to filter records. It is called just after a new
4993record has been scanned. The callback accepts a:
4994
4995=over 2
4996
4997=item hashref
4998
4999The keys are the index to the row (the field name or field number, 1-based)
5000and the values are subs to return a true or false value.
5001
5002 csv (in => "file.csv", filter => {
5003 3 => sub { m/a/ }, # third field should contain an "a"
5004 5 => sub { length > 4 }, # length of the 5th field minimal 5
5005 });
5006
5007 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
5008
5009If the keys to the filter hash contain any character that is not a digit it
5010will also implicitly set L</headers> to C<"auto"> unless L</headers> was
5011already passed as argument. When headers are active, returning an array of
5012hashes, the filter is not applicable to the header itself.
5013
5014All sub results should match, as in AND.
5015
5016The context of the callback sets C<$_> localized to the field indicated by
5017the filter. The two arguments are as with all other callbacks, so the other
5018fields in the current row can be seen:
5019
5020 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
5021
5022If the context is set to return a list of hashes (L</headers> is defined),
5023the current record will also be available in the localized C<%_>:
5024
5025 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
5026
5027If the filter is used to I<alter> the content by changing C<$_>, make sure
5028that the sub returns true in order not to have that record skipped:
5029
5030 filter => { 2 => sub { $_ = uc }}
5031
5032will upper-case the second field, and then skip it if the resulting content
5033evaluates to false. To always accept, end with truth:
5034
5035 filter => { 2 => sub { $_ = uc; 1 }}
5036
5037=item coderef
5038
5039 csv (in => "file.csv", filter => sub { $n++; 0; });
5040
5041If the argument to C<filter> is a coderef, it is an alias or shortcut to a
5042filter on column 0:
5043
5044 csv (filter => sub { $n++; 0 });
5045
5046is equal to
5047
5048 csv (filter => { 0 => sub { $n++; 0 });
5049
5050=item filter-name
5051
5052 csv (in => "file.csv", filter => "not_blank");
5053 csv (in => "file.csv", filter => "not_empty");
5054 csv (in => "file.csv", filter => "filled");
5055
5056These are predefined filters
5057
5058Given a file like (line numbers prefixed for doc purpose only):
5059
5060 1:1,2,3
5061 2:
5062 3:,
5063 4:""
5064 5:,,
5065 6:, ,
5066 7:"",
5067 8:" "
5068 9:4,5,6
5069
5070=over 2
5071
5072=item not_blank
5073
5074Filter out the blank lines
5075
5076This filter is a shortcut for
5077
5078 filter => { 0 => sub { @{$_[1]} > 1 or
5079 defined $_[1][0] && $_[1][0] ne "" } }
5080
5081Due to the implementation, it is currently impossible to also filter lines
5082that consists only of a quoted empty field. These lines are also considered
5083blank lines.
5084
5085With the given example, lines 2 and 4 will be skipped.
5086
5087=item not_empty
5088
5089Filter out lines where all the fields are empty.
5090
5091This filter is a shortcut for
5092
5093 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
5094
5095A space is not regarded being empty, so given the example data, lines 2, 3,
50964, 5, and 7 are skipped.
5097
5098=item filled
5099
5100Filter out lines that have no visible data
5101
5102This filter is a shortcut for
5103
5104 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
5105
5106This filter rejects all lines that I<not> have at least one field that does
5107not evaluate to the empty string.
5108
5109With the given example data, this filter would skip lines 2 through 8.
5110
5111=back
5112
5113=back
5114
5115=item after_in
5116
5117This callback is invoked for each record after all records have been parsed
5118but before returning the reference to the caller. The hook is invoked with
5119two arguments: the current C<CSV> parser object and a reference to the
5120record. The reference can be a reference to a HASH or a reference to an
5121ARRAY as determined by the arguments.
5122
5123This callback can also be passed as an attribute without the C<callbacks>
5124wrapper.
5125
5126=item before_out
5127
5128This callback is invoked for each record before the record is printed. The
5129hook is invoked with two arguments: the current C<CSV> parser object and a
5130reference to the record. The reference can be a reference to a HASH or a
5131reference to an ARRAY as determined by the arguments.
5132
5133This callback can also be passed as an attribute without the C<callbacks>
5134wrapper.
5135
5136This callback makes the row available in C<%_> if the row is a hashref. In
5137this case C<%_> is writable and will change the original row.
5138
5139=item on_in
5140
5141This callback acts exactly as the L</after_in> or the L</before_out> hooks.
5142
5143This callback can also be passed as an attribute without the C<callbacks>
5144wrapper.
5145
5146This callback makes the row available in C<%_> if the row is a hashref. In
5147this case C<%_> is writable and will change the original row. So e.g. with
5148
5149 my $aoh = csv (
5150 in => \"foo\n1\n2\n",
5151 headers => "auto",
5152 on_in => sub { $_{bar} = 2; },
5153 );
5154
5155C<$aoh> will be:
5156
5157 [ { foo => 1,
5158 bar => 2,
5159 }
5160 { foo => 2,
5161 bar => 2,
5162 }
5163 ]
5164
5165=item csv
5166
5167The I<function> L</csv> can also be called as a method or with an existing
5168Text::CSV_PP object. This could help if the function is to be invoked a lot
5169of times and the overhead of creating the object internally over and over
5170again would be prevented by passing an existing instance.
5171
5172 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
5173
5174 my $aoa = $csv->csv (in => $fh);
5175 my $aoa = csv (in => $fh, csv => $csv);
5176
5177both act the same. Running this 20000 times on a 20 lines CSV file, showed
5178a 53% speedup.
5179
5180=back
5181
5182=head1 DIAGNOSTICS
5183
5184This section is also taken from Text::CSV_XS.
5185
5186Still under construction ...
5187
5188If an error occurs, C<< $csv->error_diag >> can be used to get information
5189on the cause of the failure. Note that for speed reasons the internal value
5190is never cleared on success, so using the value returned by L</error_diag>
5191in normal cases - when no error occurred - may cause unexpected results.
5192
5193If the constructor failed, the cause can be found using L</error_diag> as a
5194class method, like C<< Text::CSV_PP->error_diag >>.
5195
5196The C<< $csv->error_diag >> method is automatically invoked upon error when
5197the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
5198C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
5199C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
5200EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
5201
5202Errors can be (individually) caught using the L</error> callback.
5203
5204The errors as described below are available. I have tried to make the error
5205itself explanatory enough, but more descriptions will be added. For most of
5206these errors, the first three capitals describe the error category:
5207
5208=over 2
5209
5210=item *
5211INI
5212
5213Initialization error or option conflict.
5214
5215=item *
5216ECR
5217
5218Carriage-Return related parse error.
5219
5220=item *
5221EOF
5222
5223End-Of-File related parse error.
5224
5225=item *
5226EIQ
5227
5228Parse error inside quotation.
5229
5230=item *
5231EIF
5232
5233Parse error inside field.
5234
5235=item *
5236ECB
5237
5238Combine error.
5239
5240=item *
5241EHR
5242
5243HashRef parse related error.
5244
5245=back
5246
5247And below should be the complete list of error codes that can be returned:
5248
5249=over 2
5250
5251=item *
52521001 "INI - sep_char is equal to quote_char or escape_char"
5253
5254The L<separation character|/sep_char> cannot be equal to L<the quotation
5255character|/quote_char> or to L<the escape character|/escape_char>, as this
5256would invalidate all parsing rules.
5257
5258=item *
52591002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
5260
5261Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
5262L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
5263C<SPACE> or C<TAB> is too ambiguous to allow.
5264
5265=item *
52661003 "INI - \r or \n in main attr not allowed"
5267
5268Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
5269L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
5270allowed.
5271
5272=item *
52731004 "INI - callbacks should be undef or a hashref"
5274
5275The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
5276a hash reference.
5277
5278=item *
52791005 "INI - EOL too long"
5280
5281The value passed for EOL is exceeding its maximum length (16).
5282
5283=item *
52841006 "INI - SEP too long"
5285
5286The value passed for SEP is exceeding its maximum length (16).
5287
5288=item *
52891007 "INI - QUOTE too long"
5290
5291The value passed for QUOTE is exceeding its maximum length (16).
5292
5293=item *
52941008 "INI - SEP undefined"
5295
5296The value passed for SEP should be defined and not empty.
5297
5298=item *
52991010 "INI - the header is empty"
5300
5301The header line parsed in the L</header> is empty.
5302
5303=item *
53041011 "INI - the header contains more than one valid separator"
5305
5306The header line parsed in the L</header> contains more than one (unique)
5307separator character out of the allowed set of separators.
5308
5309=item *
53101012 "INI - the header contains an empty field"
5311
5312The header line parsed in the L</header> is contains an empty field.
5313
5314=item *
53151013 "INI - the header contains nun-unique fields"
5316
5317The header line parsed in the L</header> contains at least two identical
5318fields.
5319
5320=item *
53211014 "INI - header called on undefined stream"
5322
5323The header line cannot be parsed from an undefined sources.
5324
5325=item *
53261500 "PRM - Invalid/unsupported argument(s)"
5327
5328Function or method called with invalid argument(s) or parameter(s).
5329
5330=item *
53311501 "PRM - The key attribute is passed as an unsupported type"
5332
5333The C<key> attribute is of an unsupported type.
5334
5335=item *
53361502 "PRM - The value attribute is passed without the key attribute"
5337
5338The C<value> attribute is only allowed when a valid key is given.
5339
5340=item *
53411503 "PRM - The value attribute is passed as an unsupported type"
5342
5343The C<value> attribute is of an unsupported type.
5344
5345=item *
53462010 "ECR - QUO char inside quotes followed by CR not part of EOL"
5347
5348When L<C<eol>|/eol> has been set to anything but the default, like
5349C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
5350L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
5351not make up the L<C<eol>|/eol> sequence, this is an error.
5352
5353=item *
53542011 "ECR - Characters after end of quoted field"
5355
5356Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
5357field and after the closing double-quote, there should be either a new-line
5358sequence or a separation character.
5359
5360=item *
53612012 "EOF - End of data in parsing input stream"
5362
5363Self-explaining. End-of-file while inside parsing a stream. Can happen only
5364when reading from streams with L</getline>, as using L</parse> is done on
5365strings that are not required to have a trailing L<C<eol>|/eol>.
5366
5367=item *
53682013 "INI - Specification error for fragments RFC7111"
5369
5370Invalid specification for URI L</fragment> specification.
5371
5372=item *
53732014 "ENF - Inconsistent number of fields"
5374
5375Inconsistent number of fields under strict parsing.
5376
5377=item *
53782021 "EIQ - NL char inside quotes, binary off"
5379
5380Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
5381has been selected with the constructor.
5382
5383=item *
53842022 "EIQ - CR char inside quotes, binary off"
5385
5386Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
5387has been selected with the constructor.
5388
5389=item *
53902023 "EIQ - QUO character not allowed"
5391
5392Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
5393will cause this error.
5394
5395=item *
53962024 "EIQ - EOF cannot be escaped, not even inside quotes"
5397
5398The escape character is not allowed as last character in an input stream.
5399
5400=item *
54012025 "EIQ - Loose unescaped escape"
5402
5403An escape character should escape only characters that need escaping.
5404
5405Allowing the escape for other characters is possible with the attribute
5406L</allow_loose_escape>.
5407
5408=item *
54092026 "EIQ - Binary character inside quoted field, binary off"
5410
5411Binary characters are not allowed by default. Exceptions are fields that
5412contain valid UTF-8, that will automatically be upgraded if the content is
5413valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
5414
5415=item *
54162027 "EIQ - Quoted field not terminated"
5417
5418When parsing a field that started with a quotation character, the field is
5419expected to be closed with a quotation character. When the parsed line is
5420exhausted before the quote is found, that field is not terminated.
5421
5422=item *
54232030 "EIF - NL char inside unquoted verbatim, binary off"
5424
5425=item *
54262031 "EIF - CR char is first char of field, not part of EOL"
5427
5428=item *
54292032 "EIF - CR char inside unquoted, not part of EOL"
5430
5431=item *
54322034 "EIF - Loose unescaped quote"
5433
5434=item *
54352035 "EIF - Escaped EOF in unquoted field"
5436
5437=item *
54382036 "EIF - ESC error"
5439
5440=item *
54412037 "EIF - Binary character in unquoted field, binary off"
5442
5443=item *
54442110 "ECB - Binary character in Combine, binary off"
5445
5446=item *
54472200 "EIO - print to IO failed. See errno"
5448
5449=item *
54503001 "EHR - Unsupported syntax for column_names ()"
5451
5452=item *
54533002 "EHR - getline_hr () called before column_names ()"
5454
5455=item *
54563003 "EHR - bind_columns () and column_names () fields count mismatch"
5457
5458=item *
54593004 "EHR - bind_columns () only accepts refs to scalars"
5460
5461=item *
54623006 "EHR - bind_columns () did not pass enough refs for parsed fields"
5463
5464=item *
54653007 "EHR - bind_columns needs refs to writable scalars"
5466
5467=item *
54683008 "EHR - unexpected error in bound fields"
5469
5470=item *
54713009 "EHR - print_hr () called before column_names ()"
5472
5473=item *
54743010 "EHR - print_hr () called with invalid arguments"
5475
5476=back
5477
5478=head1 SEE ALSO
5479
5480L<Text::CSV_XS>, L<Text::CSV>
5481
5482Older versions took many regexp from L<http://www.din.or.jp/~ohzaki/perl.htm>
5483
5484=head1 AUTHOR
5485
5486Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5487Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5488
5489Text::CSV_XS was written by E<lt>joe[at]ispsoft.deE<gt>
5490and maintained by E<lt>h.m.brand[at]xs4all.nlE<gt>.
5491
5492Text::CSV was written by E<lt>alan[at]mfgrtl.comE<gt>.
5493
5494=head1 COPYRIGHT AND LICENSE
5495
5496Copyright 2017- by Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5497Copyright 2005-2015 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5498
5499Most of the code and doc is directly taken from the pure perl part of
5500Text::CSV_XS.
5501
5502Copyright (C) 2007-2016 H.Merijn Brand. All rights reserved.
5503Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
5504Copyright (C) 1997 Alan Citterman. All rights reserved.
5505
5506This library is free software; you can redistribute it and/or modify
5507it under the same terms as Perl itself.
5508
5509=cut
Note: See TracBrowser for help on using the repository browser.