root/main/trunk/greenstone2/perllib/cpan/Text/CSV_PP.pm @ 33235

Revision 33235, 168.2 KB (checked in by davidb, 14 months ago)

CPAN module for processing CSV files

Line 
1package Text::CSV_PP;
2
3################################################################################
4#
5# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
6#
7################################################################################
8require 5.006001;
9
10use strict;
11use Exporter ();
12use vars qw($VERSION @ISA @EXPORT_OK);
13use Carp;
14
15$VERSION = '1.99';
16@ISA = qw(Exporter);
17@EXPORT_OK = qw(csv);
18
19sub PV  { 0 }
20sub IV  { 1 }
21sub NV  { 2 }
22
23sub IS_QUOTED () { 0x0001; }
24sub IS_BINARY () { 0x0002; }
25sub IS_ERROR ()  { 0x0004; }
26sub IS_MISSING () { 0x0010; }
27
28sub HOOK_ERROR () { 0x0001; }
29sub HOOK_AFTER_PARSE () { 0x0002; }
30sub HOOK_BEFORE_PRINT () { 0x0004; }
31
32sub useIO_EOF () { 0x0010; }
33
34my $ERRORS = {
35        # Generic errors
36        1000 => "INI - constructor failed",
37        1001 => "INI - sep_char is equal to quote_char or escape_char",
38        1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
39        1003 => "INI - \\r or \\n in main attr not allowed",
40        1004 => "INI - callbacks should be undef or a hashref",
41        1005 => "INI - EOL too long",
42        1006 => "INI - SEP too long",
43        1007 => "INI - QUOTE too long",
44        1008 => "INI - SEP undefined",
45
46        1010 => "INI - the header is empty",
47        1011 => "INI - the header contains more than one valid separator",
48        1012 => "INI - the header contains an empty field",
49        1013 => "INI - the header contains nun-unique fields",
50        1014 => "INI - header called on undefined stream",
51
52        # Syntax errors
53        1500 => "PRM - Invalid/unsupported arguments(s)",
54        1501 => "PRM - The key attribute is passed as an unsupported type",
55        1502 => "PRM - The value attribute is passed without the key attribute",
56        1503 => "PRM - The value attribute is passed as an unsupported type",
57
58        # Parse errors
59        2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
60        2011 => "ECR - Characters after end of quoted field",
61        2012 => "EOF - End of data in parsing input stream",
62        2013 => "ESP - Specification error for fragments RFC7111",
63        2014 => "ENF - Inconsistent number of fields",
64
65        # EIQ - Error Inside Quotes
66        2021 => "EIQ - NL char inside quotes, binary off",
67        2022 => "EIQ - CR char inside quotes, binary off",
68        2023 => "EIQ - QUO character not allowed",
69        2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
70        2025 => "EIQ - Loose unescaped escape",
71        2026 => "EIQ - Binary character inside quoted field, binary off",
72        2027 => "EIQ - Quoted field not terminated",
73
74        # EIF - Error Inside Field
75        2030 => "EIF - NL char inside unquoted verbatim, binary off",
76        2031 => "EIF - CR char is first char of field, not part of EOL",
77        2032 => "EIF - CR char inside unquoted, not part of EOL",
78        2034 => "EIF - Loose unescaped quote",
79        2035 => "EIF - Escaped EOF in unquoted field",
80        2036 => "EIF - ESC error",
81        2037 => "EIF - Binary character in unquoted field, binary off",
82
83        # Combine errors
84        2110 => "ECB - Binary character in Combine, binary off",
85
86        # IO errors
87        2200 => "EIO - print to IO failed. See errno",
88
89        # Hash-Ref errors
90        3001 => "EHR - Unsupported syntax for column_names ()",
91        3002 => "EHR - getline_hr () called before column_names ()",
92        3003 => "EHR - bind_columns () and column_names () fields count mismatch",
93        3004 => "EHR - bind_columns () only accepts refs to scalars",
94        3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
95        3007 => "EHR - bind_columns needs refs to writable scalars",
96        3008 => "EHR - unexpected error in bound fields",
97        3009 => "EHR - print_hr () called before column_names ()",
98        3010 => "EHR - print_hr () called with invalid arguments",
99
100        4001 => "PRM - The key does not exist as field in the data",
101
102        0    => "",
103};
104
105BEGIN {
106    if ( $] < 5.006 ) {
107        $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
108        no strict 'refs';
109        *{"utf8::is_utf8"} = sub { 0; };
110        *{"utf8::decode"}  = sub { };
111    }
112    elsif ( $] < 5.008 ) {
113        no strict 'refs';
114        *{"utf8::is_utf8"} = sub { 0; };
115        *{"utf8::decode"}  = sub { };
116        *{"utf8::encode"}  = sub { };
117    }
118    elsif ( !defined &utf8::is_utf8 ) {
119       require Encode;
120       *utf8::is_utf8 = *Encode::is_utf8;
121    }
122
123    eval q| require Scalar::Util |;
124    if ( $@ ) {
125        eval q| require B |;
126        if ( $@ ) {
127            Carp::croak $@;
128        }
129        else {
130            my %tmap = qw(
131                B::NULL   SCALAR
132                B::HV     HASH
133                B::AV     ARRAY
134                B::CV     CODE
135                B::IO     IO
136                B::GV     GLOB
137                B::REGEXP REGEXP
138            );
139            *Scalar::Util::reftype = sub (\$) {
140                my $r = shift;
141                return undef unless length(ref($r));
142                my $t = ref(B::svref_2object($r));
143                return
144                    exists $tmap{$t} ? $tmap{$t}
145                  : length(ref($$r)) ? 'REF'
146                  :                    'SCALAR';
147            };
148            *Scalar::Util::readonly = sub (\$) {
149                my $b = B::svref_2object( $_[0] );
150                $b->FLAGS & 0x00800000; # SVf_READONLY?
151            };
152        }
153    }
154}
155
156################################################################################
157#
158# Common pure perl methods, taken almost directly from Text::CSV_XS.
159# (These should be moved into a common class eventually, so that
160# both XS and PP don't need to apply the same changes.)
161#
162################################################################################
163
164################################################################################
165# version
166################################################################################
167
168sub version {
169    return $VERSION;
170}
171
172################################################################################
173# new
174################################################################################
175
176my %def_attr = (
177    eol             => '',
178    sep_char            => ',',
179    quote_char          => '"',
180    escape_char         => '"',
181    binary          => 0,
182    decode_utf8         => 1,
183    auto_diag           => 0,
184    diag_verbose        => 0,
185    strict              => 0,
186    blank_is_undef      => 0,
187    empty_is_undef      => 0,
188    allow_whitespace        => 0,
189    allow_loose_quotes      => 0,
190    allow_loose_escapes     => 0,
191    allow_unquoted_escape   => 0,
192    always_quote        => 0,
193    quote_empty         => 0,
194    quote_space         => 1,
195    quote_binary        => 1,
196    escape_null         => 1,
197    keep_meta_info      => 0,
198    verbatim            => 0,
199    formula         => 0,
200    undef_str           => undef,
201    types           => undef,
202    callbacks           => undef,
203
204    _EOF            => 0,
205    _RECNO          => 0,
206    _STATUS         => undef,
207    _FIELDS         => undef,
208    _FFLAGS         => undef,
209    _STRING         => undef,
210    _ERROR_INPUT        => undef,
211    _COLUMN_NAMES       => undef,
212    _BOUND_COLUMNS      => undef,
213    _AHEAD          => undef,
214
215    ENCODING            => undef,
216);
217
218my %attr_alias = (
219    quote_always        => "always_quote",
220    verbose_diag        => "diag_verbose",
221    quote_null          => "escape_null",
222    escape          => "escape_char",
223    );
224
225my $last_new_error = Text::CSV_PP->SetDiag(0);
226my $last_error;
227
228# NOT a method: is also used before bless
229sub _unhealthy_whitespace {
230    my ($self, $aw) = @_;
231    $aw or return 0; # no checks needed without allow_whitespace
232
233    my $quo = $self->{quote};
234    defined $quo && length ($quo) or $quo = $self->{quote_char};
235    my $esc = $self->{escape_char};
236
237    defined $quo && $quo =~ m/^[ \t]/ and return 1002;
238    defined $esc && $esc =~ m/^[ \t]/ and return 1002;
239
240    return 0;
241    }
242
243sub _check_sanity {
244    my $self = shift;
245
246    my $eol = $self->{eol};
247    my $sep = $self->{sep};
248    defined $sep && length ($sep) or $sep = $self->{sep_char};
249    my $quo = $self->{quote};
250    defined $quo && length ($quo) or $quo = $self->{quote_char};
251    my $esc = $self->{escape_char};
252
253#    use DP;::diag ("SEP: '", DPeek ($sep),
254#                "', QUO: '", DPeek ($quo),
255#                "', ESC: '", DPeek ($esc),"'");
256
257    # sep_char should not be undefined
258    $sep ne ""         or  return 1008;
259    length ($sep) > 16     and return 1006;
260    $sep =~ m/[\r\n]/      and return 1003;
261
262    if (defined $quo) {
263        $quo eq $sep        and return 1001;
264        length ($quo) > 16  and return 1007;
265        $quo =~ m/[\r\n]/   and return 1003;
266        }
267    if (defined $esc) {
268        $esc eq $sep        and return 1001;
269        $esc =~ m/[\r\n]/   and return 1003;
270        }
271    if (defined $eol) {
272        length ($eol) > 16  and return 1005;
273        }
274
275    return _unhealthy_whitespace ($self, $self->{allow_whitespace});
276    }
277
278sub known_attributes {
279    sort grep !m/^_/ => "sep", "quote", keys %def_attr;
280    }
281
282sub new {
283    $last_new_error   = Text::CSV_PP->SetDiag(1000,
284        'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);');
285
286    my $proto = shift;
287    my $class = ref ($proto) || $proto  or  return;
288    @_ > 0 &&   ref $_[0] ne "HASH" and return;
289    my $attr  = shift || {};
290    my %attr  = map {
291        my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
292        exists $attr_alias{$k} and $k = $attr_alias{$k};
293        $k => $attr->{$_};
294        } keys %$attr;
295
296    my $sep_aliased = 0;
297    if (exists $attr{sep}) {
298        $attr{sep_char} = delete $attr{sep};
299        $sep_aliased = 1;
300        }
301    my $quote_aliased = 0;
302    if (exists $attr{quote}) {
303        $attr{quote_char} = delete $attr{quote};
304        $quote_aliased = 1;
305        }
306    exists $attr{formula_handling} and
307        $attr{formula} = delete $attr{formula_handling};
308    exists $attr{formula} and
309        $attr{formula} = _supported_formula (undef, $attr{formula});
310    for (keys %attr) {
311        if (m/^[a-z]/ && exists $def_attr{$_}) {
312            # uncoverable condition false
313            defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
314            next;
315            }
316#        croak?
317        $last_new_error = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
318        $attr{auto_diag} and error_diag ();
319        return;
320        }
321    if ($sep_aliased) {
322        my @b = unpack "U0C*", $attr{sep_char};
323        if (@b > 1) {
324            $attr{sep} = $attr{sep_char};
325            $attr{sep_char} = "\0";
326            }
327        else {
328            $attr{sep} = undef;
329            }
330        }
331    if ($quote_aliased and defined $attr{quote_char}) {
332        my @b = unpack "U0C*", $attr{quote_char};
333        if (@b > 1) {
334            $attr{quote} = $attr{quote_char};
335            $attr{quote_char} = "\0";
336            }
337        else {
338            $attr{quote} = undef;
339            }
340        }
341
342    my $self = { %def_attr, %attr };
343    if (my $ec = _check_sanity ($self)) {
344        $last_new_error   = Text::CSV_PP->SetDiag($ec);
345        $attr{auto_diag} and error_diag ();
346        return;
347        }
348    if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
349        Carp::carp "The 'callbacks' attribute is set but is not a hash: ignored\n";
350        $self->{callbacks} = undef;
351        }
352
353    $last_new_error = Text::CSV_PP->SetDiag(0);
354    defined $\ && !exists $attr{eol} and $self->{eol} = $\;
355    bless $self, $class;
356    defined $self->{types} and $self->types ($self->{types});
357    $self;
358}
359
360# Keep in sync with XS!
361my %_cache_id = ( # Only expose what is accessed from within PM
362    quote_char          =>  0,
363    escape_char         =>  1,
364    sep_char            =>  2,
365    sep             => 39,  # 39 .. 55
366    binary          =>  3,
367    keep_meta_info      =>  4,
368    always_quote        =>  5,
369    allow_loose_quotes      =>  6,
370    allow_loose_escapes     =>  7,
371    allow_unquoted_escape   =>  8,
372    allow_whitespace        =>  9,
373    blank_is_undef      => 10,
374    eol             => 11,
375    quote           => 15,
376    verbatim            => 22,
377    empty_is_undef      => 23,
378    auto_diag           => 24,
379    diag_verbose        => 33,
380    quote_space         => 25,
381    quote_empty         => 37,
382    quote_binary        => 32,
383    escape_null         => 31,
384    decode_utf8         => 35,
385    _has_ahead          => 30,
386    _has_hooks          => 36,
387    _is_bound           => 26,  # 26 .. 29
388    formula         => 38,
389    strict              => 42,
390    undef_str       => 46,
391    );
392
393my %_hidden_cache_id = qw(
394    sep_len     38
395    eol_len     12
396    eol_is_cr       13
397    quo_len     16
398    has_error_input     34
399);
400
401my %_reverse_cache_id = (
402    map({$_cache_id{$_} => $_} keys %_cache_id),
403    map({$_hidden_cache_id{$_} => $_} keys %_hidden_cache_id),
404);
405
406# A `character'
407sub _set_attr_C {
408    my ($self, $name, $val, $ec) = @_;
409    defined $val or $val = 0;
410    utf8::decode ($val);
411    $self->{$name} = $val;
412    $ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
413    $self->_cache_set ($_cache_id{$name}, $val);
414    }
415
416# A flag
417sub _set_attr_X {
418    my ($self, $name, $val) = @_;
419    defined $val or $val = 0;
420    $self->{$name} = $val;
421    $self->_cache_set ($_cache_id{$name}, 0 + $val);
422    }
423
424# A number
425sub _set_attr_N {
426    my ($self, $name, $val) = @_;
427    $self->{$name} = $val;
428    $self->_cache_set ($_cache_id{$name}, 0 + $val);
429    }
430
431# Accessor methods.
432#   It is unwise to change them halfway through a single file!
433sub quote_char {
434    my $self = shift;
435    if (@_) {
436        $self->_set_attr_C ("quote_char", shift);
437        $self->_cache_set ($_cache_id{quote}, "");
438        }
439    $self->{quote_char};
440    }
441
442sub quote {
443    my $self = shift;
444    if (@_) {
445        my $quote = shift;
446        defined $quote or $quote = "";
447        utf8::decode ($quote);
448        my @b = unpack "U0C*", $quote;
449        if (@b > 1) {
450            @b > 16 and croak ($self->SetDiag (1007));
451            $self->quote_char ("\0");
452            }
453        else {
454            $self->quote_char ($quote);
455            $quote = "";
456            }
457        $self->{quote} = $quote;
458
459        my $ec = _check_sanity ($self);
460        $ec and croak ($self->SetDiag ($ec));
461
462        $self->_cache_set ($_cache_id{quote}, $quote);
463        }
464    my $quote = $self->{quote};
465    defined $quote && length ($quote) ? $quote : $self->{quote_char};
466    }
467
468sub escape_char {
469    my $self = shift;
470    if (@_) {
471        my $ec = shift;
472        $self->_set_attr_C ("escape_char", $ec);
473        $ec or $self->_set_attr_X ("escape_null", 0);
474        }
475    $self->{escape_char};
476    }
477
478sub sep_char {
479    my $self = shift;
480    if (@_) {
481        $self->_set_attr_C ("sep_char", shift);
482        $self->_cache_set ($_cache_id{sep}, "");
483        }
484    $self->{sep_char};
485}
486
487sub sep {
488    my $self = shift;
489    if (@_) {
490        my $sep = shift;
491        defined $sep or $sep = "";
492        utf8::decode ($sep);
493        my @b = unpack "U0C*", $sep;
494        if (@b > 1) {
495            @b > 16 and croak ($self->SetDiag (1006));
496            $self->sep_char ("\0");
497            }
498        else {
499            $self->sep_char ($sep);
500            $sep = "";
501            }
502        $self->{sep} = $sep;
503
504        my $ec = _check_sanity ($self);
505        $ec and croak ($self->SetDiag ($ec));
506
507        $self->_cache_set ($_cache_id{sep}, $sep);
508        }
509    my $sep = $self->{sep};
510    defined $sep && length ($sep) ? $sep : $self->{sep_char};
511    }
512
513sub eol {
514    my $self = shift;
515    if (@_) {
516        my $eol = shift;
517        defined $eol or $eol = "";
518        length ($eol) > 16 and croak ($self->SetDiag (1005));
519        $self->{eol} = $eol;
520        $self->_cache_set ($_cache_id{eol}, $eol);
521        }
522    $self->{eol};
523    }
524
525sub always_quote {
526    my $self = shift;
527    @_ and $self->_set_attr_X ("always_quote", shift);
528    $self->{always_quote};
529    }
530
531sub quote_space {
532    my $self = shift;
533    @_ and $self->_set_attr_X ("quote_space", shift);
534    $self->{quote_space};
535    }
536
537sub quote_empty {
538    my $self = shift;
539    @_ and $self->_set_attr_X ("quote_empty", shift);
540    $self->{quote_empty};
541    }
542
543sub escape_null {
544    my $self = shift;
545    @_ and $self->_set_attr_X ("escape_null", shift);
546    $self->{escape_null};
547    }
548
549sub quote_null { goto &escape_null; }
550
551sub quote_binary {
552    my $self = shift;
553    @_ and $self->_set_attr_X ("quote_binary", shift);
554    $self->{quote_binary};
555    }
556
557sub binary {
558    my $self = shift;
559    @_ and $self->_set_attr_X ("binary", shift);
560    $self->{binary};
561    }
562
563sub strict {
564    my $self = shift;
565    @_ and $self->_set_attr_X ("strict", shift);
566    $self->{strict};
567    }
568
569sub _SetDiagInfo {
570     my ($self, $err, $msg) = @_;
571     $self->SetDiag ($err);
572     my $em  = $self->error_diag;
573     $em =~ s/^\d+$// and $msg =~ s/^/# /;
574     my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
575     join $sep => grep m/\S\S\S/ => $em, $msg;
576     }
577
578sub _supported_formula {
579    my ($self, $f) = @_;
580    defined $f or return 5;
581    $f =~ m/^(?: 0 | none    )$/xi ? 0 :
582    $f =~ m/^(?: 1 | die     )$/xi ? 1 :
583    $f =~ m/^(?: 2 | croak   )$/xi ? 2 :
584    $f =~ m/^(?: 3 | diag    )$/xi ? 3 :
585    $f =~ m/^(?: 4 | empty | )$/xi ? 4 :
586    $f =~ m/^(?: 5 | undef   )$/xi ? 5 : do {
587        $self ||= "Text::CSV_PP";
588        croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
589        };
590    }
591
592sub formula {
593    my $self = shift;
594    @_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
595    [qw( none die croak diag empty undef )]->[_supported_formula ($self, $self->{formula})];
596    }
597sub formula_handling {
598    my $self = shift;
599    $self->formula (@_);
600    }
601
602sub decode_utf8 {
603    my $self = shift;
604    @_ and $self->_set_attr_X ("decode_utf8", shift);
605    $self->{decode_utf8};
606}
607
608sub keep_meta_info {
609    my $self = shift;
610    if (@_) {
611        my $v = shift;
612        !defined $v || $v eq "" and $v = 0;
613        $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
614        $self->_set_attr_X ("keep_meta_info", $v);
615        }
616    $self->{keep_meta_info};
617    }
618
619sub allow_loose_quotes {
620    my $self = shift;
621    @_ and $self->_set_attr_X ("allow_loose_quotes", shift);
622    $self->{allow_loose_quotes};
623    }
624
625sub allow_loose_escapes {
626    my $self = shift;
627    @_ and $self->_set_attr_X ("allow_loose_escapes", shift);
628    $self->{allow_loose_escapes};
629    }
630
631sub allow_whitespace {
632    my $self = shift;
633    if (@_) {
634        my $aw = shift;
635        _unhealthy_whitespace ($self, $aw) and
636            croak ($self->SetDiag (1002));
637        $self->_set_attr_X ("allow_whitespace", $aw);
638        }
639    $self->{allow_whitespace};
640    }
641
642sub allow_unquoted_escape {
643    my $self = shift;
644    @_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
645    $self->{allow_unquoted_escape};
646    }
647
648sub blank_is_undef {
649    my $self = shift;
650    @_ and $self->_set_attr_X ("blank_is_undef", shift);
651    $self->{blank_is_undef};
652    }
653
654sub empty_is_undef {
655    my $self = shift;
656    @_ and $self->_set_attr_X ("empty_is_undef", shift);
657    $self->{empty_is_undef};
658    }
659
660sub verbatim {
661    my $self = shift;
662    @_ and $self->_set_attr_X ("verbatim", shift);
663    $self->{verbatim};
664    }
665
666sub undef_str {
667    my $self = shift;
668    if (@_) {
669        my $v = shift;
670        $self->{undef_str} = defined $v ? "$v" : undef;
671        $self->_cache_set ($_cache_id{undef_str}, $self->{undef_str});
672        }
673    $self->{undef_str};
674    }
675
676sub auto_diag {
677    my $self = shift;
678    if (@_) {
679        my $v = shift;
680        !defined $v || $v eq "" and $v = 0;
681        $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
682        $self->_set_attr_X ("auto_diag", $v);
683        }
684    $self->{auto_diag};
685    }
686
687sub diag_verbose {
688    my $self = shift;
689    if (@_) {
690        my $v = shift;
691        !defined $v || $v eq "" and $v = 0;
692        $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
693        $self->_set_attr_X ("diag_verbose", $v);
694        }
695    $self->{diag_verbose};
696    }
697
698################################################################################
699# status
700################################################################################
701
702sub status {
703    $_[0]->{_STATUS};
704}
705
706sub eof {
707    $_[0]->{_EOF};
708}
709
710sub types {
711    my $self = shift;
712
713    if (@_) {
714        if (my $types = shift) {
715            $self->{'_types'} = join("", map{ chr($_) } @$types);
716            $self->{'types'} = $types;
717        }
718        else {
719            delete $self->{'types'};
720            delete $self->{'_types'};
721            undef;
722        }
723    }
724    else {
725        $self->{'types'};
726    }
727}
728
729sub callbacks {
730    my $self = shift;
731    if (@_) {
732        my $cb;
733        my $hf = 0x00;
734        if (defined $_[0]) {
735            grep { !defined } @_ and croak ($self->SetDiag (1004));
736            $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
737                : @_ % 2 == 0                    ? { @_ }
738                : croak ($self->SetDiag (1004));
739            foreach my $cbk (keys %$cb) {
740                # A key cannot be a ref. That would be stored as the *string
741                # 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
742                $cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
743                    croak ($self->SetDiag (1004));
744                }
745            exists $cb->{error}        and $hf |= 0x01;
746            exists $cb->{after_parse}  and $hf |= 0x02;
747            exists $cb->{before_print} and $hf |= 0x04;
748            }
749        elsif (@_ > 1) {
750            # (undef, whatever)
751            croak ($self->SetDiag (1004));
752            }
753        $self->_set_attr_X ("_has_hooks", $hf);
754        $self->{callbacks} = $cb;
755        }
756    $self->{callbacks};
757    }
758
759################################################################################
760# error_diag
761################################################################################
762
763sub error_diag {
764    my $self = shift;
765    my @diag = (0 + $last_new_error, $last_new_error, 0, 0, 0);
766
767    # Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an
768    # overridden isa method in any class. Well, that is exacly what I want here
769    if ($self && ref $self && # Not a class method or direct call
770        UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{_ERROR_DIAG}) {
771        $diag[0] = 0 + $self->{_ERROR_DIAG};
772        $diag[1] =     $self->{_ERROR_DIAG};
773        $diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
774        $diag[3] =     $self->{_RECNO};
775        $diag[4] =     $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
776
777        $diag[0] && $self->{callbacks} && $self->{callbacks}{error} and
778            return $self->{callbacks}{error}->(@diag);
779        }
780
781    my $context = wantarray;
782
783    unless (defined $context) { # Void context, auto-diag
784        if ($diag[0] && $diag[0] != 2012) {
785            my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
786            $diag[4] and $msg =~ s/$/ field $diag[4]/;
787
788            unless ($self && ref $self) {        # auto_diag
789                    # called without args in void context
790                warn $msg;
791                return;
792                }
793
794            if ($self->{diag_verbose} and $self->{_ERROR_INPUT}) {
795                $msg .= "$self->{_ERROR_INPUT}'\n";
796                $msg .= " " x ($diag[2] - 1);
797                $msg .= "^\n";
798                }
799
800            my $lvl = $self->{auto_diag};
801            if ($lvl < 2) {
802                my @c = caller (2);
803                if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
804                    my $hints = $c[10];
805                    (exists $hints->{autodie} && $hints->{autodie} or
806                     exists $hints->{"guard Fatal"} &&
807                    !exists $hints->{"no Fatal"}) and
808                        $lvl++;
809                    # Future releases of autodie will probably set $^H{autodie}
810                    #  to "autodie @args", like "autodie :all" or "autodie open"
811                    #  so we can/should check for "open" or "new"
812                    }
813                }
814            $lvl > 1 ? die $msg : warn $msg;
815            }
816        return;
817        }
818
819    return $context ? @diag : $diag[1];
820}
821
822sub record_number {
823    return shift->{_RECNO};
824}
825
826################################################################################
827# string
828################################################################################
829
830*string = \&_string;
831sub _string {
832    defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef;
833}
834
835################################################################################
836# fields
837################################################################################
838
839*fields = \&_fields;
840sub _fields {
841    ref($_[0]->{_FIELDS}) ?  @{$_[0]->{_FIELDS}} : undef;
842}
843
844################################################################################
845# meta_info
846################################################################################
847
848sub meta_info {
849    $_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef;
850}
851
852sub is_quoted {
853    return unless (defined $_[0]->{_FFLAGS});
854    return if( $_[1] =~ /\D/ or $_[1] < 0 or  $_[1] > $#{ $_[0]->{_FFLAGS} } );
855
856    $_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0;
857}
858
859sub is_binary {
860    return unless (defined $_[0]->{_FFLAGS});
861    return if( $_[1] =~ /\D/ or $_[1] < 0 or  $_[1] > $#{ $_[0]->{_FFLAGS} } );
862    $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0;
863}
864
865sub is_missing {
866    my ($self, $idx, $val) = @_;
867    return unless $self->{keep_meta_info}; # FIXME
868    $idx < 0 || !ref $self->{_FFLAGS} and return;
869    $idx >= @{$self->{_FFLAGS}} and return 1;
870    $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0;
871}
872
873################################################################################
874# combine
875################################################################################
876*combine = \&_combine;
877sub _combine {
878    my ($self, @fields) = @_;
879    my $str  = "";
880    $self->{_FIELDS} = \@fields;
881    $self->{_STATUS} = (@fields > 0) && $self->__combine(\$str, \@fields, 0);
882    $self->{_STRING} = \$str;
883    $self->{_STATUS};
884    }
885
886################################################################################
887# parse
888################################################################################
889*parse = \&_parse;
890sub _parse {
891    my ($self, $str) = @_;
892
893    ref $str and croak ($self->SetDiag (1500));
894
895    my $fields = [];
896    my $fflags = [];
897    $self->{_STRING} = \$str;
898    if (defined $str && $self->__parse ($fields, $fflags, $str, 0)) {
899        $self->{_FIELDS} = $fields;
900        $self->{_FFLAGS} = $fflags;
901        $self->{_STATUS} = 1;
902        }
903    else {
904        $self->{_FIELDS} = undef;
905        $self->{_FFLAGS} = undef;
906        $self->{_STATUS} = 0;
907        }
908    $self->{_STATUS};
909    }
910
911sub column_names {
912    my ( $self, @columns ) = @_;
913
914    @columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
915    @columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef;
916
917    if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) {
918        @columns = @{ $columns[0] };
919    }
920    elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) {
921        croak $self->SetDiag( 3001 );
922    }
923
924    if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) {
925        croak $self->SetDiag( 3003 );
926    }
927
928    $self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ];
929    @{ $self->{_COLUMN_NAMES} };
930}
931
932sub header {
933    my ($self, $fh, @args) = @_;
934
935    $fh or croak ($self->SetDiag (1014));
936
937    my (@seps, %args);
938    for (@args) {
939        if (ref $_ eq "ARRAY") {
940            push @seps, @$_;
941            next;
942            }
943        if (ref $_ eq "HASH") {
944            %args = %$_;
945            next;
946            }
947        croak (q{usage: $csv->header ($fh, [ seps ], { options })});
948        }
949
950    defined $args{munge} && !defined $args{munge_column_names} and
951        $args{munge_column_names} = $args{munge}; # munge as alias
952    defined $args{detect_bom}         or $args{detect_bom}         = 1;
953    defined $args{set_column_names}   or $args{set_column_names}   = 1;
954    defined $args{munge_column_names} or $args{munge_column_names} = "lc";
955
956    # Reset any previous leftovers
957    $self->{_RECNO}        = 0;
958    $self->{_AHEAD}        = undef;
959    $self->{_COLUMN_NAMES} = undef if $args{set_column_names};
960    $self->{_BOUND_COLUMNS}    = undef if $args{set_column_names};
961    $self->_cache_set($_cache_id{'_has_ahead'}, 0);
962
963    if (defined $args{sep_set}) {
964        ref $args{sep_set} eq "ARRAY" or
965            croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
966        @seps =  @{$args{sep_set}};
967    }
968
969    $^O eq "MSWin32" and binmode $fh;
970    my $hdr = <$fh>;
971    # check if $hdr can be empty here, I don't think so
972    defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
973
974    my %sep;
975    @seps or @seps = (",", ";");
976    foreach my $sep (@seps) {
977        index ($hdr, $sep) >= 0 and $sep{$sep}++;
978        }
979
980    keys %sep >= 2 and croak ($self->SetDiag (1011));
981
982    $self->sep (keys %sep);
983    my $enc = "";
984    if ($args{detect_bom}) { # UTF-7 is not supported
985           if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be"   }
986        elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le"   }
987        elsif ($hdr =~ s/^\xfe\xff//)         { $enc = "utf-16be"   }
988        elsif ($hdr =~ s/^\xff\xfe//)         { $enc = "utf-16le"   }
989        elsif ($hdr =~ s/^\xef\xbb\xbf//)     { $enc = "utf-8"      }
990        elsif ($hdr =~ s/^\xf7\x64\x4c//)     { $enc = "utf-1"      }
991        elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
992        elsif ($hdr =~ s/^\x0e\xfe\xff//)     { $enc = "scsu"       }
993        elsif ($hdr =~ s/^\xfb\xee\x28//)     { $enc = "bocu-1"     }
994        elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030"   }
995        elsif ($hdr =~ s/^\x{feff}//)         { $enc = ""           }
996
997        $self->{ENCODING} = uc $enc;
998
999        $hdr eq "" and croak ($self->SetDiag (1010));
1000
1001        if ($enc) {
1002            if ($enc =~ m/([13]).le$/) {
1003                my $l = 0 + $1;
1004                my $x;
1005                $hdr .= "\0" x $l;
1006                read $fh, $x, $l;
1007                }
1008            if ($enc ne "utf-8") {
1009               require Encode;
1010               $hdr = Encode::decode ($enc, $hdr);
1011               }
1012            binmode $fh, ":encoding($enc)";
1013            }
1014        }
1015
1016    my ($ahead, $eol);
1017    if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
1018        $eol   = $2;
1019        $ahead = $3;
1020    }
1021
1022    $args{munge_column_names} eq "lc" and $hdr = lc $hdr;
1023    $args{munge_column_names} eq "uc" and $hdr = uc $hdr;
1024
1025    my $hr = \$hdr; # Will cause croak on perl-5.6.x
1026    open my $h, "<", $hr or croak ($self->SetDiag (1010));
1027
1028    my $row = $self->getline ($h) or croak;
1029    close $h;
1030
1031    if ($ahead) { # Must be after getline, which creates the cache
1032        $self->_cache_set ($_cache_id{_has_ahead}, 1);
1033        $self->{_AHEAD} = $ahead;
1034        $eol =~ m/^\r([^\n]|\z)/ and $self->eol ($eol);
1035        }
1036
1037    my @hdr = @$row;
1038    ref $args{munge_column_names} eq "CODE" and
1039        @hdr = map { $args{munge_column_names}->($_)       } @hdr;
1040    ref $args{munge_column_names} eq "HASH" and
1041        @hdr = map { $args{munge_column_names}->{$_} || $_ } @hdr;
1042    my %hdr; $hdr{$_}++ for @hdr;
1043    exists $hdr{""} and croak ($self->SetDiag (1012));
1044    unless (keys %hdr == @hdr) {
1045        croak ($self->_SetDiagInfo (1013, join ", " =>
1046            map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
1047        }
1048    $args{set_column_names} and $self->column_names (@hdr);
1049    wantarray ? @hdr : $self;
1050    }
1051
1052sub bind_columns {
1053    my ( $self, @refs ) = @_;
1054
1055    @refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
1056    @refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef;
1057
1058    if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) {
1059        croak $self->SetDiag( 3003 );
1060    }
1061
1062    if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep?
1063        croak $self->SetDiag( 3004 );
1064    }
1065
1066    $self->_set_attr_N("_is_bound", scalar @refs);
1067    $self->{_BOUND_COLUMNS} = [ @refs ];
1068    @refs;
1069}
1070
1071sub getline_hr {
1072    my ($self, @args, %hr) = @_;
1073    $self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
1074    my $fr = $self->getline (@args) or return;
1075    if (ref $self->{_FFLAGS}) { # missing
1076        $self->{_FFLAGS}[$_] = IS_MISSING
1077            for (@$fr ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
1078        @$fr == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
1079            $self->{_FFLAGS}[0] ||= IS_MISSING;
1080        }
1081    @hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
1082    \%hr;
1083}
1084
1085sub getline_hr_all {
1086    my ( $self, $io, @args ) = @_;
1087
1088    unless ( $self->{_COLUMN_NAMES} ) {
1089        croak $self->SetDiag( 3002 );
1090    }
1091
1092    my @cn = @{$self->{_COLUMN_NAMES}};
1093
1094    return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ];
1095}
1096
1097sub say {
1098    my ($self, $io, @f) = @_;
1099    my $eol = $self->eol;
1100    $eol eq "" and $self->eol ($\ || $/);
1101    # say ($fh, undef) does not propage actual undef to print ()
1102    my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1103    $self->eol ($eol);
1104    return $state;
1105    }
1106
1107sub print_hr {
1108    my ($self, $io, $hr) = @_;
1109    $self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
1110    ref $hr eq "HASH"      or croak($self->SetDiag(3010));
1111    $self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
1112}
1113
1114sub fragment {
1115    my ($self, $io, $spec) = @_;
1116
1117    my $qd = qr{\s* [0-9]+ \s* }x;                # digit
1118    my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x;        # digit or star
1119    my $qr = qr{$qd (?: - $qs )?}x;                # range
1120    my $qc = qr{$qr (?: ; $qr )*}x;                # list
1121    defined $spec && $spec =~ m{^ \s*
1122        \x23 ? \s*                                # optional leading #
1123        ( row | col | cell ) \s* =
1124        ( $qc                                        # for row and col
1125        | $qd , $qd (?: - $qs , $qs)?                # for cell (ranges)
1126          (?: ; $qd , $qd (?: - $qs , $qs)? )*        # and cell (range) lists
1127        ) \s* $}xi or croak ($self->SetDiag (2013));
1128    my ($type, $range) = (lc $1, $2);
1129
1130    my @h = $self->column_names ();
1131
1132    my @c;
1133    if ($type eq "cell") {
1134        my @spec;
1135        my $min_row;
1136        my $max_row = 0;
1137        for (split m/\s*;\s*/ => $range) {
1138            my ($tlr, $tlc, $brr, $brc) = (m{
1139                    ^ \s* ([0-9]+     ) \s* , \s* ([0-9]+     ) \s*
1140                (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
1141                    $}x) or croak ($self->SetDiag (2013));
1142            defined $brr or ($brr, $brc) = ($tlr, $tlc);
1143            $tlr == 0 || $tlc == 0 ||
1144                ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
1145                ($brc ne "*" && ($brc == 0 || $brc < $tlc))
1146                    and croak ($self->SetDiag (2013));
1147            $tlc--;
1148            $brc-- unless $brc eq "*";
1149            defined $min_row or $min_row = $tlr;
1150            $tlr < $min_row and $min_row = $tlr;
1151            $brr eq "*" || $brr > $max_row and
1152                $max_row = $brr;
1153            push @spec, [ $tlr, $tlc, $brr, $brc ];
1154            }
1155        my $r = 0;
1156        while (my $row = $self->getline ($io)) {
1157            ++$r < $min_row and next;
1158            my %row;
1159            my $lc;
1160            foreach my $s (@spec) {
1161                my ($tlr, $tlc, $brr, $brc) = @$s;
1162                $r <  $tlr || ($brr ne "*" && $r > $brr) and next;
1163                !defined $lc || $tlc < $lc and $lc = $tlc;
1164                my $rr = $brc eq "*" ? $#$row : $brc;
1165                $row{$_} = $row->[$_] for $tlc .. $rr;
1166                }
1167            push @c, [ @row{sort { $a <=> $b } keys %row } ];
1168            if (@h) {
1169                my %h; @h{@h} = @{$c[-1]};
1170                $c[-1] = \%h;
1171                }
1172            $max_row ne "*" && $r == $max_row and last;
1173            }
1174        return \@c;
1175        }
1176
1177    # row or col
1178    my @r;
1179    my $eod = 0;
1180    for (split m/\s*;\s*/ => $range) {
1181        my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
1182            or croak ($self->SetDiag (2013));
1183        $to ||= $from;
1184        $to eq "*" and ($to, $eod) = ($from, 1);
1185        # $to cannot be <= 0 due to regex and ||=
1186        $from <= 0 || $to < $from and croak ($self->SetDiag (2013));
1187        $r[$_] = 1 for $from .. $to;
1188        }
1189
1190    my $r = 0;
1191    $type eq "col" and shift @r;
1192    $_ ||= 0 for @r;
1193    while (my $row = $self->getline ($io)) {
1194        $r++;
1195        if ($type eq "row") {
1196            if (($r > $#r && $eod) || $r[$r]) {
1197                push @c, $row;
1198                if (@h) {
1199                    my %h; @h{@h} = @{$c[-1]};
1200                    $c[-1] = \%h;
1201                    }
1202                }
1203            next;
1204            }
1205        push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#$row ];
1206        if (@h) {
1207            my %h; @h{@h} = @{$c[-1]};
1208            $c[-1] = \%h;
1209            }
1210        }
1211
1212    return \@c;
1213    }
1214
1215my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1216
1217sub _csv_attr {
1218    my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
1219
1220    $attr{binary} = 1;
1221
1222    my $enc = delete $attr{enc} || delete $attr{encoding} || "";
1223    $enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
1224    $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1225
1226    my $fh;
1227    my $sink = 0;
1228    my $cls  = 0;  # If I open a file, I have to close it
1229    my $in   = delete $attr{in}  || delete $attr{file} or croak $csv_usage;
1230    my $out  = exists $attr{out} && !$attr{out} ? \"skip"
1231        : delete $attr{out} || delete $attr{file};
1232
1233    ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
1234
1235    $in && $out && !ref $in && !ref $out and croak join "\n" =>
1236       qq{Cannot use a string for both in and out. Instead use:},
1237       qq{ csv (in => csv (in => "$in"), out => "$out");\n};
1238
1239    if ($out) {
1240        if ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1241            $fh = $out;
1242            }
1243        elsif (ref $out and "SCALAR" eq ref $out and defined $$out and $$out eq "skip") {
1244            delete $attr{out};
1245            $sink = 1;
1246            }
1247        else {
1248            open $fh, ">", $out or croak "$out: $!";
1249            $cls = 1;
1250            }
1251        if ($fh) {
1252            $enc and binmode $fh, $enc;
1253            unless (defined $attr{eol}) {
1254                my @layers = eval { PerlIO::get_layers ($fh) };
1255                $attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1256                }
1257            }
1258        }
1259
1260    if (   ref $in eq "CODE" or ref $in eq "ARRAY") {
1261        # All done
1262        }
1263    elsif (ref $in eq "SCALAR") {
1264        # Strings with code points over 0xFF may not be mapped into in-memory file handles
1265        # "<$enc" does not change that :(
1266        open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
1267        $cls = 1;
1268        }
1269    elsif (ref $in or "GLOB" eq ref \$in) {
1270        if (!ref $in && $] < 5.008005) {
1271            $fh = \*$in; # uncoverable statement ancient perl version required
1272            }
1273        else {
1274            $fh = $in;
1275            }
1276        }
1277    else {
1278        open $fh, "<$enc", $in or croak "$in: $!";
1279        $cls = 1;
1280        }
1281    $fh || $sink or croak qq{No valid source passed. "in" is required};
1282
1283    my $hdrs = delete $attr{headers};
1284    my $frag = delete $attr{fragment};
1285    my $key  = delete $attr{key};
1286    my $val  = delete $attr{value};
1287    my $kh   = delete $attr{keep_headers}      ||
1288          delete $attr{keep_column_names}      ||
1289          delete $attr{kh};
1290
1291    my $cbai = delete $attr{callbacks}{after_in}    ||
1292               delete $attr{after_in}               ||
1293               delete $attr{callbacks}{after_parse} ||
1294               delete $attr{after_parse};
1295    my $cbbo = delete $attr{callbacks}{before_out}  ||
1296               delete $attr{before_out};
1297    my $cboi = delete $attr{callbacks}{on_in}       ||
1298               delete $attr{on_in};
1299
1300    my $hd_s = delete $attr{sep_set}                ||
1301               delete $attr{seps};
1302    my $hd_b = delete $attr{detect_bom}             ||
1303               delete $attr{bom};
1304    my $hd_m = delete $attr{munge}                  ||
1305               delete $attr{munge_column_names};
1306    my $hd_c = delete $attr{set_column_names};
1307
1308    for ([ quo    => "quote"                ],
1309         [ esc    => "escape"                ],
1310         [ escape => "escape_char"        ],
1311         ) {
1312        my ($f, $t) = @$_;
1313        exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1314        }
1315
1316    my $fltr = delete $attr{filter};
1317    my %fltr = (
1318        not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
1319        not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
1320        filled    => sub { grep { defined && m/\S/    } @{$_[1]} },
1321        );
1322    defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1323        $fltr = { 0 => $fltr{$fltr} };
1324    ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1325    ref $fltr eq "HASH" or $fltr = undef;
1326
1327    exists $attr{formula} and
1328    $attr{formula} = _supported_formula (undef, $attr{formula});
1329
1330    defined $attr{auto_diag}   or $attr{auto_diag}   = 1;
1331    defined $attr{escape_null} or $attr{escape_null} = 0;
1332    my $csv = delete $attr{csv} || Text::CSV_PP->new (\%attr)
1333        or croak $last_new_error;
1334
1335    return {
1336        csv  => $csv,
1337        attr => { %attr },
1338        fh   => $fh,
1339        cls  => $cls,
1340        in   => $in,
1341        sink => $sink,
1342        out  => $out,
1343        enc  => $enc,
1344        hdrs => $hdrs,
1345        key  => $key,
1346        val  => $val,
1347        kh   => $kh,
1348        frag => $frag,
1349        fltr => $fltr,
1350        cbai => $cbai,
1351        cbbo => $cbbo,
1352        cboi => $cboi,
1353        hd_s => $hd_s,
1354        hd_b => $hd_b,
1355        hd_m => $hd_m,
1356        hd_c => $hd_c,
1357        };
1358    }
1359
1360sub csv {
1361    @_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
1362    @_ or croak $csv_usage;
1363
1364    my $c = _csv_attr (@_);
1365
1366    my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
1367    my %hdr;
1368    if (ref $hdrs eq "HASH") {
1369        %hdr  = %$hdrs;
1370        $hdrs = "auto";
1371        }
1372
1373    if ($c->{out} && !$c->{sink}) {
1374        if (ref $in eq "CODE") {
1375            my $hdr = 1;
1376            while (my $row = $in->($csv)) {
1377                if (ref $row eq "ARRAY") {
1378                    $csv->print ($fh, $row);
1379                    next;
1380                    }
1381                if (ref $row eq "HASH") {
1382                    if ($hdr) {
1383                        $hdrs ||= [ map { $hdr{$_} || $_ } keys %$row ];
1384                        $csv->print ($fh, $hdrs);
1385                        $hdr = 0;
1386                        }
1387                    $csv->print ($fh, [ @{$row}{@$hdrs} ]);
1388                    }
1389                }
1390            }
1391        elsif (ref $in->[0] eq "ARRAY") { # aoa
1392            ref $hdrs and $csv->print ($fh, $hdrs);
1393            for (@{$in}) {
1394                $c->{cboi} and $c->{cboi}->($csv, $_);
1395                $c->{cbbo} and $c->{cbbo}->($csv, $_);
1396                $csv->print ($fh, $_);
1397                }
1398            }
1399        else { # aoh
1400            my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
1401            defined $hdrs or $hdrs = "auto";
1402            ref $hdrs || $hdrs eq "auto" and
1403                $csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]);
1404            for (@{$in}) {
1405                local %_;
1406                *_ = $_;
1407                $c->{cboi} and $c->{cboi}->($csv, $_);
1408                $c->{cbbo} and $c->{cbbo}->($csv, $_);
1409                $csv->print ($fh, [ @{$_}{@hdrs} ]);
1410                }
1411            }
1412
1413        $c->{cls} and close $fh;
1414        return 1;
1415        }
1416
1417    my @row1;
1418    if (defined $c->{hd_s} || defined $c->{hd_b} || defined $c->{hd_m} || defined $c->{hd_c}) {
1419        my %harg;
1420        defined $c->{hd_s} and $harg{set_set}            = $c->{hd_s};
1421        defined $c->{hd_d} and $harg{detect_bom}         = $c->{hd_b};
1422        defined $c->{hd_m} and $harg{munge_column_names} = $hdrs ? "none" : $c->{hd_m};
1423        defined $c->{hd_c} and $harg{set_column_names}   = $hdrs ? 0      : $c->{hd_c};
1424        @row1 = $csv->header ($fh, \%harg);
1425        my @hdr = $csv->column_names;
1426        @hdr and $hdrs ||= \@hdr;
1427        }
1428
1429    if ($c->{kh}) {
1430        ref $c->{kh} eq "ARRAY" or croak ($csv->SetDiag (1501));
1431        $hdrs ||= "auto";
1432        }
1433
1434    my $key = $c->{key};
1435    if ($key) {
1436       !ref $key or ref $key eq "ARRAY" && @$key > 1 or croak ($csv->SetDiag (1501));
1437        $hdrs ||= "auto";
1438        }
1439    my $val = $c->{val};
1440    if ($val) {
1441       $key                                          or croak ($csv->SetDiag (1502));
1442       !ref $val or ref $val eq "ARRAY" && @$val > 0 or croak ($csv->SetDiag (1503));
1443       }
1444
1445    $c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs ||= "auto";
1446    if (defined $hdrs) {
1447        if (!ref $hdrs) {
1448            if ($hdrs eq "skip") {
1449                $csv->getline ($fh); # discard;
1450                }
1451            elsif ($hdrs eq "auto") {
1452                my $h = $csv->getline ($fh) or return;
1453                $hdrs = [ map {      $hdr{$_} || $_ } @$h ];
1454                }
1455            elsif ($hdrs eq "lc") {
1456                my $h = $csv->getline ($fh) or return;
1457                $hdrs = [ map { lc ($hdr{$_} || $_) } @$h ];
1458                }
1459            elsif ($hdrs eq "uc") {
1460                my $h = $csv->getline ($fh) or return;
1461                $hdrs = [ map { uc ($hdr{$_} || $_) } @$h ];
1462                }
1463            }
1464        elsif (ref $hdrs eq "CODE") {
1465            my $h  = $csv->getline ($fh) or return;
1466            my $cr = $hdrs;
1467            $hdrs  = [ map {  $cr->($hdr{$_} || $_) } @$h ];
1468            }
1469        $c->{kh} and $hdrs and @{$c->{kh}} = @$hdrs;
1470        }
1471
1472    if ($c->{fltr}) {
1473        my %f = %{$c->{fltr}};
1474        # convert headers to index
1475        my @hdr;
1476        if (ref $hdrs) {
1477            @hdr = @{$hdrs};
1478            for (0 .. $#hdr) {
1479                exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1480                }
1481            }
1482        $csv->callbacks (after_parse => sub {
1483            my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1484            foreach my $FLD (sort keys %f) {
1485                local $_ = $ROW->[$FLD - 1];
1486                local %_;
1487                @hdr and @_{@hdr} = @$ROW;
1488                $f{$FLD}->($CSV, $ROW) or return \"skip";
1489                $ROW->[$FLD - 1] = $_;
1490                }
1491            });
1492        }
1493
1494    my $frag = $c->{frag};
1495    my $ref = ref $hdrs
1496        ? # aoh
1497          do {
1498            my @h = $csv->column_names ($hdrs);
1499            my %h; $h{$_}++ for @h;
1500            exists $h{""} and croak ($csv->SetDiag (1012));
1501            unless (keys %h == @h) {
1502                croak ($csv->_SetDiagInfo (1013, join ", " =>
1503                    map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
1504                }
1505            $frag ? $csv->fragment ($fh, $frag) :
1506            $key  ? do {
1507                        my ($k, $j, @f) = ref $key ? (undef, @$key) : ($key);
1508                        if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
1509                            croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1510                            }
1511                        +{ map {
1512                            my $r = $_;
1513                            my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
1514                            ( $K => (
1515                            $val
1516                                ? ref $val
1517                                    ? { map { $_ => $r->{$_} } @$val }
1518                                    : $r->{$val}
1519                                : $r ));
1520                            } @{$csv->getline_hr_all ($fh)} }
1521                        }
1522                  : $csv->getline_hr_all ($fh);
1523            }
1524        : # aoa
1525            $frag ? $csv->fragment ($fh, $frag)
1526                  : $csv->getline_all ($fh);
1527    if ($ref) {
1528        @row1 && !$c->{hd_c} && !ref $hdrs and unshift @$ref, \@row1;
1529        }
1530    else {
1531        Text::CSV_PP->auto_diag;
1532        }
1533    $c->{cls} and close $fh;
1534    if ($ref and $c->{cbai} || $c->{cboi}) {
1535        # Default is ARRAYref, but with key =>, you'll get a hashref
1536        foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
1537            local %_;
1538            ref $r eq "HASH" and *_ = $r;
1539            $c->{cbai} and $c->{cbai}->($csv, $r);
1540            $c->{cboi} and $c->{cboi}->($csv, $r);
1541            }
1542        }
1543
1544    $c->{sink} and return;
1545
1546    defined wantarray or
1547        return csv (%{$c->{attr}}, in => $ref, headers => $hdrs, %{$c->{attr}});
1548
1549    return $ref;
1550    }
1551
1552# The end of the common pure perl part.
1553
1554################################################################################
1555#
1556# The following are methods implemented in XS in Text::CSV_XS or
1557# helper methods for Text::CSV_PP only
1558#
1559################################################################################
1560
1561sub _setup_ctx {
1562    my $self = shift;
1563
1564    $last_error = undef;
1565
1566    my $ctx;
1567    if ($self->{_CACHE}) {
1568        %$ctx = %{$self->{_CACHE}};
1569    } else {
1570        $ctx->{sep} = ',';
1571        if (defined $self->{sep_char}) {
1572            $ctx->{sep} = $self->{sep_char};
1573        }
1574        if (defined $self->{sep} and $self->{sep} ne '') {
1575            use bytes;
1576            $ctx->{sep} = $self->{sep};
1577            my $sep_len = length($ctx->{sep});
1578            $ctx->{sep_len} = $sep_len if $sep_len > 1;
1579        }
1580
1581        $ctx->{quo} = '"';
1582        if (exists $self->{quote_char}) {
1583            my $quote_char = $self->{quote_char};
1584            if (defined $quote_char and length $quote_char) {
1585                $ctx->{quo} = $quote_char;
1586            } else {
1587                $ctx->{quo} = "\0";
1588            }
1589        }
1590        if (defined $self->{quote} and $self->{quote} ne '') {
1591            use bytes;
1592            $ctx->{quo} = $self->{quote};
1593            my $quote_len = length($ctx->{quo});
1594            $ctx->{quo_len} = $quote_len if $quote_len > 1;
1595        }
1596
1597        $ctx->{escape_char} = '"';
1598        if (exists $self->{escape_char}) {
1599            my $escape_char = $self->{escape_char};
1600            if (defined $escape_char and length $escape_char) {
1601                $ctx->{escape_char} = $escape_char;
1602            } else {
1603                $ctx->{escape_char} = "\0";
1604            }
1605        }
1606
1607        if (defined $self->{eol}) {
1608            my $eol = $self->{eol};
1609            my $eol_len = length($eol);
1610            $ctx->{eol} = $eol;
1611            $ctx->{eol_len} = $eol_len;
1612            if ($eol_len == 1 and $eol eq "\015") {
1613                $ctx->{eol_is_cr} = 1;
1614            }
1615        }
1616
1617        $ctx->{undef_flg} = 0;
1618        if (defined $self->{undef_str}) {
1619            $ctx->{undef_str} = $self->{undef_str};
1620            $ctx->{undef_flg} = 3 if utf8::is_utf8($self->{undef_str});
1621        } else {
1622            $ctx->{undef_str} = undef;
1623        }
1624
1625        if (defined $self->{_types}) {
1626            $ctx->{types} = $self->{_types};
1627            $ctx->{types_len} = length($ctx->{types});
1628        }
1629
1630        if (defined $self->{_is_bound}) {
1631            $ctx->{is_bound} = $self->{_is_bound};
1632        }
1633
1634        if (defined $self->{callbacks}) {
1635            my $cb = $self->{callbacks};
1636            $ctx->{has_hooks} = 0;
1637            if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
1638                $ctx->{has_hooks} |= HOOK_AFTER_PARSE;
1639            }
1640            if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
1641                $ctx->{has_hooks} |= HOOK_BEFORE_PRINT;
1642            }
1643        }
1644
1645        for (qw/
1646            binary decode_utf8 always_quote strict quote_empty
1647            allow_loose_quotes allow_loose_escapes
1648            allow_unquoted_escape allow_whitespace blank_is_undef
1649            empty_is_undef verbatim auto_diag diag_verbose
1650            keep_meta_info formula
1651        /) {
1652            $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
1653        }
1654        for (qw/quote_space escape_null quote_binary/) {
1655            $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
1656        }
1657        if ($ctx->{escape_char} eq "\0") {
1658            $ctx->{escape_null} = 0;
1659        }
1660
1661        # FIXME: readonly
1662        %{$self->{_CACHE}} = %$ctx;
1663    }
1664
1665    $ctx->{utf8} = 0;
1666    $ctx->{size} = 0;
1667    $ctx->{used} = 0;
1668
1669    if ($ctx->{is_bound}) {
1670        my $bound = $self->{_BOUND_COLUMNS};
1671        if ($bound and ref $bound eq 'ARRAY') {
1672            $ctx->{bound} = $bound;
1673        } else {
1674            $ctx->{is_bound} = 0;
1675        }
1676    }
1677
1678    $ctx->{eol_pos} = -1;
1679    $ctx->{eolx} = $ctx->{eol_len}
1680        ? $ctx->{verbatim} || $ctx->{eol_len} >= 2
1681            ? 1
1682            : $ctx->{eol} =~ /\A[\015|\012]/ ? 0 : 1
1683        : 0;
1684
1685    if ($ctx->{sep_len} and $ctx->{sep_len} > 1 and _is_valid_utf8($ctx->{sep})) {
1686        $ctx->{utf8} = 1;
1687    }
1688    if ($ctx->{quo_len} and $ctx->{quo_len} > 1 and _is_valid_utf8($ctx->{quo})) {
1689        $ctx->{utf8} = 1;
1690    }
1691
1692    $ctx;
1693}
1694
1695sub _cache_set {
1696    my ($self, $idx, $value) = @_;
1697    return unless exists $self->{_CACHE};
1698    my $cache = $self->{_CACHE};
1699
1700    my $key = $_reverse_cache_id{$idx};
1701    if (!defined $key) {
1702        warn (sprintf "Unknown cache index %d ignored\n", $idx);
1703    } elsif ($key eq 'sep_char') {
1704        $cache->{sep} = $value;
1705        $cache->{sep_len} = 0;
1706    }
1707    elsif ($key eq 'quote_char') {
1708        $cache->{quo} = $value;
1709        $cache->{quo_len} = 0;
1710    }
1711    elsif ($key eq '_has_ahead') {
1712        $cache->{has_ahead} = $value;
1713    }
1714    elsif ($key eq '_has_hooks') {
1715        $cache->{has_hooks} = $value;
1716    }
1717    elsif ($key eq '_is_bound') {
1718        $cache->{is_bound} = $value;
1719    }
1720    elsif ($key eq 'sep') {
1721        use bytes;
1722        my $len = bytes::length($value);
1723        $cache->{sep} = $value if $len;
1724        $cache->{sep_len} = $len == 1 ? 0 : $len;
1725    }
1726    elsif ($key eq 'quote') {
1727        use bytes;
1728        my $len = bytes::length($value);
1729        $cache->{quo} = $value if $len;
1730        $cache->{quo_len} = $len == 1 ? 0 : $len;
1731    }
1732    elsif ($key eq 'eol') {
1733        $cache->{eol} = $value if defined($value);
1734        $cache->{eol_is_cr} = $value eq "\015" ? 1 : 0;
1735    }
1736    elsif ($key eq 'undef_str') {
1737        if (defined $value) {
1738            $cache->{undef_str} = $value;
1739            $cache->{undef_flg} = 3 if utf8::is_utf8($value);
1740        } else {
1741            $cache->{undef_str} = undef;
1742            $cache->{undef_flg} = 0;
1743        }
1744    }
1745    else {
1746        $cache->{$key} = $value;
1747    }
1748    return 1;
1749}
1750
1751sub _cache_diag {
1752    my $self = shift;
1753    unless (exists $self->{_CACHE}) {
1754        warn ("CACHE: invalid\n");
1755        return;
1756    }
1757
1758    my $cache = $self->{_CACHE};
1759    warn ("CACHE:\n");
1760    $self->__cache_show_char(quote_char => $cache->{quo});
1761    $self->__cache_show_char(escape_char => $cache->{escape_char});
1762    $self->__cache_show_char(sep_char => $cache->{sep});
1763    for (qw/
1764        binary decode_utf8 allow_loose_escapes allow_loose_quotes allow_unquoted_escape
1765        allow_whitespace always_quote quote_empty quote_space
1766        escape_null quote_binary auto_diag diag_verbose formula strict
1767        has_error_input blank_is_undef empty_is_undef has_ahead
1768        keep_meta_info verbatim has_hooks eol_is_cr eol_len
1769    /) {
1770        $self->__cache_show_byte($_ => $cache->{$_});
1771    }
1772    $self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
1773    $self->__cache_show_byte(sep_len => $cache->{sep_len});
1774    if ($cache->{sep_len} and $cache->{sep_len} > 1) {
1775        $self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
1776    }
1777    $self->__cache_show_byte(quo_len => $cache->{quo_len});
1778    if ($cache->{quo_len} and $cache->{quo_len} > 1) {
1779        $self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
1780    }
1781}
1782
1783sub __cache_show_byte {
1784    my ($self, $key, $value) = @_;
1785    warn (sprintf "  %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
1786}
1787
1788sub __cache_show_char {
1789    my ($self, $key, $value) = @_;
1790    my $v = $value;
1791    if (defined $value) {
1792        my @b = unpack "U0C*", $value;
1793        $v = pack "U*", $b[0];
1794    }
1795    warn (sprintf "  %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
1796}
1797
1798sub __cache_show_str {
1799    my ($self, $key, $len, $value) = @_;
1800    warn (sprintf "  %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
1801}
1802
1803sub __pretty_str { # FIXME
1804    my ($self, $str, $len) = @_;
1805    return '' unless defined $str;
1806    $str = substr($str, 0, $len);
1807    $str =~ s/"/\\"/g;
1808    $str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
1809    qq{"$str"};
1810}
1811
1812sub _hook {
1813    my ($self, $name, $fields) = @_;
1814    return 0 unless $self->{callbacks};
1815
1816    my $cb = $self->{callbacks}{$name};
1817    return 0 unless $cb && ref $cb eq 'CODE';
1818
1819    my (@res) = $cb->($self, $fields);
1820    if (@res) {
1821        return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
1822    }
1823    scalar @res;
1824}
1825
1826################################################################################
1827# methods for combine
1828################################################################################
1829
1830sub __combine {
1831    my ($self, $dst, $fields, $useIO) = @_;
1832
1833    my $ctx = $self->_setup_ctx;
1834
1835    my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
1836
1837    if(!defined $quot or $quot eq "\0"){ $quot = ''; }
1838
1839    my $re_esc;
1840    if ($esc ne '' and $esc ne "\0") {
1841      if ($quot ne '') {
1842        $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/;
1843      } else {
1844        $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$esc\E)/;
1845      }
1846    }
1847
1848    my $bound = 0;
1849    my $n = @$fields - 1;
1850    if ($n < 0 and $ctx->{is_bound}) {
1851        $n = $ctx->{is_bound} - 1;
1852        $bound = 1;
1853    }
1854
1855    my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} || []} >= $n) ? 1 : 0;
1856
1857    my $must_be_quoted;
1858    my @results;
1859    for(my $i = 0; $i <= $n; $i++) {
1860        my $v_ref;
1861        if ($bound) {
1862            $v_ref = $self->__bound_field($ctx, $i, 1);
1863        } else {
1864            if (@$fields > $i) {
1865                $v_ref = \($fields->[$i]);
1866            }
1867        }
1868        next unless $v_ref;
1869
1870        my $value = $$v_ref;
1871
1872        if (!defined $value) {
1873            if ($ctx->{undef_str}) {
1874                if ($ctx->{undef_flg}) {
1875                    $ctx->{utf8} = 1;
1876                    $ctx->{binary} = 1;
1877                }
1878                push @results, $ctx->{undef_str};
1879            } else {
1880                push @results, '';
1881            }
1882            next;
1883        }
1884
1885        if ( substr($value, 0, 1) eq '=' && $ctx->{formula} ) {
1886            $value = $self->_formula($ctx, $value, $i);
1887            if (!defined $value) {
1888                push @results, '';
1889                next;
1890            }
1891        }
1892
1893        $must_be_quoted = $ctx->{always_quote} ? 1 : 0;
1894        if ($value eq '') {
1895            $must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
1896        }
1897        else {
1898
1899            if (utf8::is_utf8 $value) {
1900                $ctx->{utf8} = 1;
1901                $ctx->{binary} = 1;
1902            }
1903
1904            $must_be_quoted++ if $check_meta && $self->is_quoted($i);
1905
1906            if (!$must_be_quoted and $quot ne '') {
1907                use bytes;
1908                $must_be_quoted++ if
1909                    ($value =~ /\Q$quot\E/) ||
1910                    ($sep ne '' and $sep ne "\0" and $value =~ /\Q$sep\E/) ||
1911                    ($esc ne '' and $esc ne "\0" and $value =~ /\Q$esc\E/) ||
1912                    ($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
1913                    ($ctx->{quote_space} && $value =~ /[\x09\x20]/);
1914            }
1915
1916            if (!$ctx->{binary} and $value =~ /[^\x09\x20-\x7E]/) {
1917                # an argument contained an invalid character...
1918                $self->{_ERROR_INPUT} = $value;
1919                $self->SetDiag(2110);
1920                return 0;
1921            }
1922
1923            if ($re_esc) {
1924                $value =~ s/($re_esc)/$esc$1/g;
1925            }
1926            if ($ctx->{escape_null}) {
1927                $value =~ s/\0/${esc}0/g;
1928            }
1929        }
1930
1931        if ($must_be_quoted) {
1932            $value = $quot . $value . $quot;
1933        }
1934        push @results, $value;
1935    }
1936
1937    $$dst = join($sep, @results) . ( defined $ctx->{eol} ? $ctx->{eol} : '' );
1938
1939    return 1;
1940}
1941
1942sub _formula {
1943    my ($self, $ctx, $value, $i) = @_;
1944
1945    my $fa = $ctx->{formula} or return;
1946    if ($fa == 1) { die "Formulas are forbidden\n" }
1947    if ($fa == 2) { die "Formulas are forbidden\n" } # XS croak behaves like PP's "die"
1948
1949    if ($fa == 3) {
1950        my $rec = '';
1951        if ($ctx->{recno}) {
1952            $rec = sprintf " in record %lu", $ctx->{recno} + 1;
1953        }
1954        my $field = '';
1955        my $column_names = $self->{_COLUMN_NAMES};
1956        if (ref $column_names eq 'ARRAY' and @$column_names >= $i - 1) {
1957            my $column_name = $column_names->[$i - 1];
1958            $field = sprintf " (column: '%.100s')", $column_name if defined $column_name;
1959        }
1960        warn sprintf("Field %d%s%s contains formula '%s'\n", $i, $field, $rec, $value);
1961        return $value;
1962    }
1963
1964    if ($fa == 4) {
1965        return '';
1966    }
1967    if ($fa == 5) {
1968        return undef;
1969    }
1970    return;
1971}
1972
1973sub print {
1974    my ($self, $io, $fields) = @_;
1975
1976    require IO::Handle;
1977
1978    if (!defined $fields) {
1979        $fields = [];
1980    } elsif(ref($fields) ne 'ARRAY'){
1981        Carp::croak("Expected fields to be an array ref");
1982    }
1983
1984    $self->_hook(before_print => $fields);
1985
1986    my $str = "";
1987    $self->__combine(\$str, $fields, 1) or return '';
1988
1989    local $\ = '';
1990
1991    $io->print( $str ) or $self->_set_error_diag(2200);
1992}
1993
1994################################################################################
1995# methods for parse
1996################################################################################
1997
1998
1999sub __parse { # cx_xsParse
2000    my ($self, $fields, $fflags, $src, $useIO) = @_;
2001
2002    my $ctx = $self->_setup_ctx;
2003
2004    my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
2005    if ($state and ($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
2006        $self->_hook(after_parse => $fields);
2007    }
2008    return $state || !$last_error;
2009}
2010
2011sub ___parse { # cx_c_xsParse
2012    my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
2013
2014    local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
2015
2016    if ($ctx->{useIO} = $useIO) {
2017        require IO::Handle;
2018
2019        $ctx->{tmp} = undef;
2020        if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
2021            $ctx->{tmp} = $self->{_AHEAD};
2022            $ctx->{size} = length $ctx->{tmp};
2023            $ctx->{used} = 0;
2024        }
2025    } else {
2026        $ctx->{tmp} = $src;
2027        $ctx->{size} = length $src;
2028        $ctx->{used} = 0;
2029        $ctx->{utf8} = utf8::is_utf8($src);
2030    }
2031    if ($ctx->{has_error_input}) {
2032        $self->{_ERROR_INPUT} = undef;
2033        $ctx->{has_error_input} = 0;
2034    }
2035
2036    my $result = $self->____parse($ctx, $src, $fields, $fflags);
2037    $self->{_RECNO} = ++($ctx->{recno});
2038    $self->{_EOF} = '';
2039
2040    if ($ctx->{strict}) {
2041        $ctx->{strict_n} ||= $ctx->{fld_idx};
2042        if ($ctx->{strict_n} != $ctx->{fld_idx}) {
2043            $self->__parse_error($ctx, 2014, $ctx->{used});
2044            return;
2045        }
2046    }
2047
2048    if ($ctx->{useIO}) {
2049        if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
2050            $self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
2051        } else {
2052            $ctx->{has_ahead} = 0;
2053            if ($ctx->{useIO} & useIO_EOF) {
2054                $self->{_EOF} = 1;
2055            }
2056        }
2057        %{$self->{_CACHE}} = %$ctx;
2058
2059        if ($fflags) {
2060            if ($ctx->{keep_meta_info}) {
2061                $self->{_FFLAGS} = $fflags;
2062            } else {
2063                undef $fflags;
2064            }
2065        }
2066    } else {
2067        %{$self->{_CACHE}} = %$ctx;
2068    }
2069
2070    if ($result and $ctx->{types}) {
2071        my $len = @$fields;
2072        for(my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
2073            my $value = $fields->[$i];
2074            next unless defined $value;
2075            my $type = ord(substr($ctx->{types}, $i, 1));
2076            if ($type == IV) {
2077                $fields->[$i] = int($value);
2078            } elsif ($type == NV) {
2079                $fields->[$i] = $value + 0.0;
2080            }
2081        }
2082    }
2083
2084    $result;
2085}
2086
2087sub ____parse { # cx_Parse
2088    my ($self, $ctx, $src, $fields, $fflags) = @_;
2089
2090    my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
2091
2092    utf8::encode($sep)  if !$ctx->{utf8} and $ctx->{sep_len};
2093    utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
2094    utf8::encode($eol)  if !$ctx->{utf8} and $ctx->{eol_len};
2095
2096    my $seenSomething =  0;
2097    my $waitingForField = 1;
2098    my ($value, $v_ref);
2099    $ctx->{fld_idx} = my $fnum = 0;
2100    $ctx->{flag} = 0;
2101
2102    my $re_str = join '|', map({$_ eq "\0" ? '[\\0]' : quotemeta($_)} sort {length $b <=> length $a} grep {defined $_ and $_ ne ''} $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
2103    $ctx->{_re} = qr/$re_str/;
2104    my $re = qr/$re_str|[^\x09\x20-\x7E]|$/;
2105
2106LOOP:
2107    while($self->__get_from_src($ctx, $src)) {
2108        while($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
2109            my ($hit, $c) = ($1, $2);
2110            $ctx->{used} = pos($ctx->{tmp});
2111            if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
2112                $self->{_AHEAD} = $hit;
2113                $ctx->{has_ahead} = 1;
2114                $ctx->{has_leftover} = 1;
2115                last;
2116            }
2117            last if $seenSomething and $hit eq '' and $c eq ''; # EOF
2118
2119            # new field
2120            if (!$v_ref) {
2121                if ($ctx->{is_bound}) {
2122                    $v_ref = $self->__bound_field($ctx, $fnum, 0);
2123                } else {
2124                    $value = '';
2125                    $v_ref = \$value;
2126                }
2127                $fnum++;
2128                return unless $v_ref;
2129                $ctx->{flag} = 0;   
2130                $ctx->{fld_idx}++;
2131            }
2132
2133            $seenSomething = 1;
2134
2135            if (defined $hit and $hit ne '') {
2136                if ($waitingForField) {
2137                    $waitingForField = 0;
2138                }
2139                if ($hit =~ /[^\x09\x20-\x7E]/) {
2140                    $ctx->{flag} |= IS_BINARY;
2141                }
2142                $$v_ref .= $hit;
2143            }
2144
2145RESTART:
2146            if (defined $c and defined $sep and $c eq $sep) {
2147                if ($waitingForField) {
2148                    # ,1,"foo, 3",,bar,
2149                    # ^           ^
2150                    if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2151                        $$v_ref = undef;
2152                    } else {
2153                        $$v_ref = "";
2154                    }
2155                    unless ($ctx->{is_bound}) {
2156                        push @$fields, $$v_ref;
2157                    }
2158                    $v_ref = undef;
2159                    if ($ctx->{keep_meta_info} and $fflags) {
2160                        push @$fflags, $ctx->{flag};
2161                    }
2162                } elsif ($ctx->{flag} & IS_QUOTED) {
2163                    # ,1,"foo, 3",,bar,
2164                    #        ^
2165                    $$v_ref .= $c;
2166                } else {
2167                    # ,1,"foo, 3",,bar,
2168                    #   ^        ^    ^
2169                    $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2170                    $v_ref = undef;
2171                    $waitingForField = 1;
2172                }
2173            }
2174            elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
2175                if ($waitingForField) {
2176                    # ,1,"foo, 3",,bar,\r\n
2177                    #    ^
2178                    $ctx->{flag} |= IS_QUOTED;
2179                    $waitingForField = 0;
2180                    next;
2181                }
2182                if ($ctx->{flag} & IS_QUOTED) {
2183                    # ,1,"foo, 3",,bar,\r\n
2184                    #           ^
2185                    my $quoesc = 0;
2186                    my $c2 = $self->__get($ctx);
2187
2188                    if ($ctx->{allow_whitespace}) {
2189                        # , 1 , "foo, 3" , , bar , \r\n
2190                        #               ^
2191                        while($self->__is_whitespace($ctx, $c2)) {
2192                            if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
2193                                $$v_ref .= $c;
2194                                $c = $c2;
2195                            }
2196                            $c2 = $self->__get($ctx);
2197                        }
2198                    }
2199
2200                    if (!defined $c2) { # EOF
2201                        # ,1,"foo, 3"
2202                        #            ^
2203                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2204                        return 1;
2205                    }
2206
2207                    if (defined $c2 and defined $sep and $c2 eq $sep) {
2208                        # ,1,"foo, 3",,bar,\r\n
2209                        #            ^
2210                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2211                        $v_ref = undef;
2212                        $waitingForField = 1;
2213                        next;
2214                    }
2215                    if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
2216                        # ,1,"foo, 3",,"bar"\n
2217                        #                   ^
2218                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2219                        return 1;
2220                    }
2221
2222                    if (defined $esc and $c eq $esc) {
2223                        $quoesc = 1;
2224                        if (defined $c2 and $c2 eq '0') {
2225                            # ,1,"foo, 3"056",,bar,\r\n
2226                            #            ^
2227                            $$v_ref .= "\0";
2228                            next;
2229                        }
2230                        if (defined $c2 and defined $quot and $c2 eq $quot) {
2231                            # ,1,"foo, 3""56",,bar,\r\n
2232                            #            ^
2233                            if ($ctx->{utf8}) {
2234                                $ctx->{flag} |= IS_BINARY;
2235                            }
2236                            $$v_ref .= $c2;
2237                            next;
2238                        }
2239                        if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
2240                            # ,1,"foo, 3"56",,bar,\r\n
2241                            #            ^
2242                            $$v_ref .= $c;
2243                            $c = $c2;
2244                            goto RESTART;
2245                        }
2246                    }
2247                    if (defined $c2 and $c2 eq "\015") {
2248                        if ($ctx->{eol_is_cr}) {
2249                            # ,1,"foo, 3"\r
2250                            #            ^
2251                            $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2252                            return 1;
2253                        }
2254
2255                        my $c3 = $self->__get($ctx);
2256                        if (defined $c3 and $c3 eq "\012") {
2257                            # ,1,"foo, 3"\r\n
2258                            #              ^
2259                            $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2260                            return 1;
2261                        }
2262
2263                        if ($ctx->{useIO} and !$ctx->{eol_len} and $c3 !~ /[^\x09\x20-\x7E]/) {
2264                            # ,1,"foo\n 3",,"bar"\r
2265                            # baz,4
2266                            # ^
2267                            $self->__set_eol_is_cr($ctx);
2268                            $ctx->{used}--;
2269                            $ctx->{has_ahead} = 1;
2270                            $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2271                            return 1;
2272                        }
2273
2274                        $self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
2275                        return;
2276                    }
2277
2278                    if ($ctx->{allow_loose_quotes} and !$quoesc) {
2279                        # ,1,"foo, 3"456",,bar,\r\n
2280                        #            ^
2281                        $$v_ref .= $c;
2282                        $c = $c2;
2283                        goto RESTART;
2284                    }
2285                    # 1,"foo" ",3
2286                    #        ^
2287                    if ($quoesc) {
2288                        $ctx->{used}--;
2289                        $self->__error_inside_quotes($ctx, 2023);
2290                        return;
2291                    }
2292                    $self->__error_inside_quotes($ctx, 2011);
2293                    return;
2294                }
2295                # !waitingForField, !InsideQuotes
2296                if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
2297                    $ctx->{flag} |= IS_ERROR;
2298                    $$v_ref .= $c;
2299                } else {
2300                    $self->__error_inside_field($ctx, 2034);
2301                    return;
2302                }
2303            }
2304            elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
2305                # This means quote_char != escape_char
2306                if ($waitingForField) {
2307                    $waitingForField = 0;
2308                    if ($ctx->{allow_unquoted_escape}) {
2309                        # The escape character is the first character of an
2310                        # unquoted field
2311                        # ... get and store next character
2312                        my $c2 = $self->__get($ctx);
2313                        $$v_ref = "";
2314
2315                        if (!defined $c2) { # EOF
2316                            $ctx->{used}--;
2317                            $self->__error_inside_field($ctx, 2035);
2318                            return;
2319                        }
2320                        if ($c2 eq '0') {
2321                            $$v_ref .= "\0";
2322                        }
2323                        elsif (
2324                            (defined $quot and $c2 eq $quot) or
2325                            (defined $sep and $c2 eq $sep) or
2326                            (defined $esc and $c2 eq $esc) or
2327                            $ctx->{allow_loose_escapes}
2328                        ) {
2329                            if ($ctx->{utf8}) {
2330                                $ctx->{flag} |= IS_BINARY;
2331                            }
2332                            $$v_ref .= $c2;
2333                        } else {
2334                            $self->__parse_inside_quotes($ctx, 2025);
2335                            return;
2336                        }
2337                    }
2338                }
2339                elsif ($ctx->{flag} & IS_QUOTED) {
2340                    my $c2 = $self->__get($ctx);
2341                    if (!defined $c2) { # EOF
2342                        $ctx->{used}--;
2343                        $self->__error_inside_quotes($ctx, 2024);
2344                        return;
2345                    }
2346                    if ($c2 eq '0') {
2347                        $$v_ref .= "\0";
2348                    }
2349                    elsif (
2350                        (defined $quot and $c2 eq $quot) or
2351                        (defined $sep and $c2 eq $sep) or
2352                        (defined $esc and $c2 eq $esc) or
2353                        $ctx->{allow_loose_escapes}
2354                    ) {
2355                        if ($ctx->{utf8}) {
2356                            $ctx->{flag} |= IS_BINARY;
2357                        }
2358                        $$v_ref .= $c2;
2359                    } else {
2360                        $ctx->{used}--;
2361                        $self->__error_inside_quotes($ctx, 2025);
2362                        return;
2363                    }
2364                }
2365                elsif ($v_ref) {
2366                    my $c2 = $self->__get($ctx);
2367                    if (!defined $c2) { # EOF
2368                        $ctx->{used}--;
2369                        $self->__error_inside_field($ctx, 2035);
2370                        return;
2371                    }
2372                    $$v_ref .= $c2;
2373                }
2374                else {
2375                    $self->__error_inside_field($ctx, 2036);
2376                    return;
2377                }
2378            }
2379            elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
2380    EOLX:
2381                if ($waitingForField) {
2382                    # ,1,"foo, 3",,bar,
2383                    #                  ^
2384                    if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2385                        $$v_ref = undef;
2386                    } else {
2387                        $$v_ref = "";
2388                    }
2389                    unless ($ctx->{is_bound}) {
2390                        push @$fields, $$v_ref;
2391                    }
2392                    if ($ctx->{keep_meta_info} and $fflags) {
2393                        push @$fflags, $ctx->{flag};
2394                    }
2395                    return 1;
2396                }
2397                if ($ctx->{flag} & IS_QUOTED) {
2398                    # ,1,"foo\n 3",,bar,
2399                    #        ^
2400                    $ctx->{flag} |= IS_BINARY;
2401                    unless ($ctx->{binary}) {
2402                        $self->__error_inside_quotes($ctx, 2021);
2403                        return;
2404                    }
2405                    $$v_ref .= $c;
2406                }
2407                elsif ($ctx->{verbatim}) {
2408                    # ,1,foo\n 3,,bar,
2409                    # This feature should be deprecated
2410                    $ctx->{flag} |= IS_BINARY;
2411                    unless ($ctx->{binary}) {
2412                        $self->__error_inside_field($ctx, 2030);
2413                        return;
2414                    }
2415                    $$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
2416                }
2417                else {
2418                    # sep=,
2419                    #      ^
2420                    if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
2421                        $ctx->{sep} = $1;
2422                        use bytes;
2423                        my $len = length $ctx->{sep};
2424                        if ($len <= 16) {
2425                            $ctx->{sep_len} = $len == 1 ? 0 : $len;
2426                            return $self->____parse($ctx, $src, $fields, $fflags);
2427                        }
2428                    }
2429
2430                    # ,1,"foo\n 3",,bar
2431                    #                  ^
2432                    $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2433                    return 1;
2434                }
2435            }
2436            elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
2437                if ($waitingForField) {
2438                    $waitingForField = 0;
2439                    if ($ctx->{eol_is_cr}) {
2440                        # ,1,"foo\n 3",,bar,\r
2441                        #                   ^
2442                        $c = "\012";
2443                        goto RESTART;
2444                    }
2445
2446                    my $c2 = $self->__get($ctx);
2447                    if (!defined $c2) { # EOF
2448                        # ,1,"foo\n 3",,bar,\r
2449                        #                     ^
2450                        $c = undef;
2451                        goto RESTART;
2452                    }
2453                    if ($c2 eq "\012") { # \r is not optional before EOLX!
2454                        # ,1,"foo\n 3",,bar,\r\n
2455                        #                     ^
2456                        $c = $c2;
2457                        goto RESTART;
2458                    }
2459
2460                    if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2461                        # ,1,"foo\n 3",,bar,\r
2462                        # baz,4
2463                        # ^
2464                        $self->__set_eol_is_cr($ctx);
2465                        $ctx->{used}--;
2466                        $ctx->{has_ahead} = 1;
2467                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2468                        return 1;
2469                    }
2470
2471                    # ,1,"foo\n 3",,bar,\r\t
2472                    #                     ^
2473                    $ctx->{used}--;
2474                    $self->__error_inside_field($ctx, 2031);
2475                    return;
2476                }
2477                if ($ctx->{flag} & IS_QUOTED) {
2478                    # ,1,"foo\r 3",,bar,\r\t
2479                    #        ^
2480                    $ctx->{flag} |= IS_BINARY;
2481                    unless ($ctx->{binary}) {
2482                        $self->__error_inside_quotes($ctx, 2022);
2483                        return;
2484                    }
2485                    $$v_ref .= $c;
2486                }
2487                else {
2488                    if ($ctx->{eol_is_cr}) {
2489                        # ,1,"foo\n 3",,bar\r
2490                        #                  ^
2491                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2492                        return 1;
2493                    }
2494
2495                    my $c2 = $self->__get($ctx);
2496                    if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
2497                        # ,1,"foo\n 3",,bar\r\n
2498                        #                    ^
2499                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2500                        return 1;
2501                    }
2502
2503                    if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2504                        # ,1,"foo\n 3",,bar\r
2505                        # baz,4
2506                        # ^
2507                        $self->__set_eol_is_cr($ctx);
2508                        $ctx->{used}--;
2509                        $ctx->{has_ahead} = 1;
2510                        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2511                        return 1;
2512                    }
2513
2514                    # ,1,"foo\n 3",,bar\r\t
2515                    #                    ^
2516                    $self->__error_inside_field($ctx, 2032);
2517                    return;
2518                }
2519            }
2520            else {
2521                if ($ctx->{eolx} and $c eq $eol) {
2522                    $c = '';
2523                    goto EOLX;
2524                }
2525
2526                if ($waitingForField) {
2527                    if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
2528                        do {
2529                            $c = $self->__get($ctx);
2530                            last if !defined $c;
2531                        } while $self->__is_whitespace($ctx, $c);
2532                        goto RESTART;
2533                    }
2534                    $waitingForField = 0;
2535                    goto RESTART;
2536                }
2537                if ($ctx->{flag} & IS_QUOTED) {
2538                    if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2539                        $ctx->{flag} |= IS_BINARY;
2540                        unless ($ctx->{binary} or $ctx->{utf8}) {
2541                            $self->__error_inside_quotes($ctx, 2026);
2542                            return;
2543                        }
2544                    }
2545                    $$v_ref .= $c;
2546                } else {
2547                    if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2548                        $ctx->{flag} |= IS_BINARY;
2549                        unless ($ctx->{binary} or $ctx->{utf8}) {
2550                            $self->__error_inside_field($ctx, 2037);
2551                            return;
2552                        }
2553                    }
2554                    $$v_ref .= $c;
2555                }
2556            }
2557            last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
2558        }
2559    }
2560
2561    if ($waitingForField) {
2562        if ($seenSomething or !$ctx->{useIO}) {
2563            # new field
2564            if (!$v_ref) {
2565                if ($ctx->{is_bound}) {
2566                    $v_ref = $self->__bound_field($ctx, $fnum, 0);
2567                } else {
2568                    $value = '';
2569                    $v_ref = \$value;
2570                }
2571                $fnum++;
2572                return unless $v_ref;
2573                $ctx->{flag} = 0;
2574                $ctx->{fld_idx}++;
2575            }
2576            if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2577                $$v_ref = undef;
2578            } else {
2579                $$v_ref = "";
2580            }
2581            unless ($ctx->{is_bound}) {
2582                push @$fields, $$v_ref;
2583            }
2584            if ($ctx->{keep_meta_info} and $fflags) {
2585                push @$fflags, $ctx->{flag};
2586            }
2587            return 1;
2588        }
2589        $self->SetDiag(2012);
2590        return;
2591    }
2592
2593    if ($ctx->{flag} & IS_QUOTED) {
2594        $self->__error_inside_quotes($ctx, 2027);
2595        return;
2596    }
2597
2598    if ($v_ref) {
2599        $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2600    }
2601    return 1;
2602}
2603
2604sub __get_from_src {
2605    my ($self, $ctx, $src) = @_;
2606    return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
2607    return 1 if $ctx->{used} < $ctx->{size};
2608    return unless $ctx->{useIO};
2609    my $res = $src->getline;
2610    if (defined $res) {
2611        if ($ctx->{has_ahead}) {
2612            $ctx->{tmp} = $self->{_AHEAD};
2613            $ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
2614            $ctx->{tmp} .= $res;
2615            $ctx->{has_ahead} = 0;
2616        } else {
2617            $ctx->{tmp} = $res;
2618        }
2619        if ($ctx->{size} = length $ctx->{tmp}) {
2620            $ctx->{used} = -1;
2621            $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2622            pos($ctx->{tmp}) = 0;
2623            return 1;
2624        }
2625    } elsif (delete $ctx->{has_leftover}) {
2626        $ctx->{tmp} = $self->{_AHEAD};
2627        $ctx->{has_ahead} = 0;
2628        $ctx->{useIO} |= useIO_EOF;
2629        if ($ctx->{size} = length $ctx->{tmp}) {
2630            $ctx->{used} = -1;
2631            $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2632            pos($ctx->{tmp}) = 0;
2633            return 1;
2634        }
2635    }
2636    $ctx->{tmp} = '' unless defined $ctx->{tmp};
2637    $ctx->{useIO} |= useIO_EOF;
2638    return;
2639}
2640
2641sub __set_eol_is_cr {
2642    my ($self, $ctx) = @_;
2643    $ctx->{eol} = "\015";
2644    $ctx->{eol_is_cr} = 1;
2645    $ctx->{eol_len} = 1;
2646    %{$self->{_CACHE}} = %$ctx;
2647
2648    $self->{eol} = $ctx->{eol};
2649}
2650
2651sub __bound_field {
2652    my ($self, $ctx, $i, $keep) = @_;
2653    if ($i >= $ctx->{is_bound}) {
2654        $self->SetDiag(3006);
2655        return;
2656    }
2657    if (ref $ctx->{bound} eq 'ARRAY') {
2658        my $ref = $ctx->{bound}[$i];
2659        if (ref $ref) {
2660            if ($keep) {
2661                return $ref;
2662            }
2663            unless (Scalar::Util::readonly($$ref)) {
2664                $$ref = "";
2665                return $ref;
2666            }
2667        }
2668    }
2669    $self->SetDiag(3008);
2670    return;
2671}
2672
2673sub __get {
2674    my ($self, $ctx) = @_;
2675    return unless defined $ctx->{used};
2676    return if $ctx->{used} >= $ctx->{size};
2677    my $pos = pos($ctx->{tmp});
2678    if ($ctx->{tmp} =~ /\G($ctx->{_re}|.)/gs) {
2679        my $c = $1;
2680        if ($c =~ /[^\x09\x20-\x7e]/) {
2681            $ctx->{flag} |= IS_BINARY;
2682        }
2683        $ctx->{used} = pos($ctx->{tmp});
2684        return $c;
2685    } else {
2686        pos($ctx->{tmp}) = $pos;
2687        return;
2688    }
2689}
2690
2691sub __error_inside_quotes {
2692    my ($self, $ctx, $error) = @_;
2693    $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2694}
2695
2696sub __error_inside_field {
2697    my ($self, $ctx, $error) = @_;
2698    $self->__parse_error($ctx, $error, $ctx->{used} - 1);
2699}
2700
2701sub __parse_error {
2702    my ($self, $ctx, $error, $pos) = @_;
2703    $self->{_ERROR_POS} = $pos;
2704    $self->{_ERROR_FLD} = $ctx->{fld_idx};
2705    $self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
2706    $self->SetDiag($error);
2707    return;
2708}
2709
2710sub __is_whitespace {
2711    my ($self, $ctx, $c) = @_;
2712    return unless defined $c;
2713    return (
2714        (!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
2715        (!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
2716        (!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
2717        ($c eq " " or $c eq "\t")
2718    );
2719}
2720
2721sub __push_value { # AV_PUSH (part of)
2722    my ($self, $ctx, $v_ref, $fields, $fflags, $flag, $fnum) = @_;
2723    utf8::encode($$v_ref) if $ctx->{utf8};
2724    if ($ctx->{formula} && $$v_ref && substr($$v_ref, 0, 1) eq '=') {
2725        my $value = $self->_formula($ctx, $$v_ref, $fnum);
2726        push @$fields, defined $value ? $value : undef;
2727        return;
2728    }
2729    if (
2730        (!defined $$v_ref or $$v_ref eq '') and
2731        ($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
2732    ) {
2733        $$v_ref = undef;
2734    } else {
2735        if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
2736            $$v_ref =~ s/[ \t]+$//;
2737        }
2738        if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} || _is_valid_utf8($$v_ref))) {
2739            utf8::decode($$v_ref);
2740        }
2741    }
2742    unless ($ctx->{is_bound}) {
2743        push @$fields, $$v_ref;
2744    }
2745    if ($ctx->{keep_meta_info} and $fflags) {
2746        push @$fflags, $flag;
2747    }
2748}
2749
2750sub getline {
2751    my ($self, $io) = @_;
2752
2753    my (@fields, @fflags);
2754    my $res = $self->__parse(\@fields, \@fflags, $io, 1);
2755    $res ? \@fields : undef;
2756}
2757
2758sub getline_all {
2759    my ( $self, $io, $offset, $len ) = @_;
2760
2761    my $ctx = $self->_setup_ctx;
2762
2763    my $tail = 0;
2764    my $n = 0;
2765    $offset ||= 0;
2766
2767    if ( $offset < 0 ) {
2768        $tail = -$offset;
2769        $offset = -1;
2770    }
2771
2772    my (@row, @list);
2773    while ($self->___parse($ctx, \@row, undef, $io, 1)) {
2774        $ctx = $self->_setup_ctx;
2775
2776        if ($offset > 0) {
2777            $offset--;
2778            @row = ();
2779            next;
2780        }
2781        if ($n++ >= $tail and $tail) {
2782            shift @list;
2783            $n--;
2784        }
2785        if (($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
2786            unless ($self->_hook(after_parse => \@row)) {
2787                @row = ();
2788                next;
2789            }
2790        }
2791        push @list, [@row];
2792        @row = ();
2793
2794        last if defined $len && $n >= $len and $offset >= 0;   # exceeds limit size
2795    }
2796
2797    if ( defined $len && $n > $len ) {
2798        @list = splice( @list, 0, $len);
2799    }
2800
2801    return \@list;
2802}
2803
2804sub _is_valid_utf8 {
2805    return ( $_[0] =~ /^(?:
2806         [\x00-\x7F]
2807        |[\xC2-\xDF][\x80-\xBF]
2808        |[\xE0][\xA0-\xBF][\x80-\xBF]
2809        |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
2810        |[\xED][\x80-\x9F][\x80-\xBF]
2811        |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
2812        |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
2813        |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
2814        |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
2815    )+$/x )  ? 1 : 0;
2816}
2817
2818################################################################################
2819# methods for errors
2820################################################################################
2821
2822sub _set_error_diag {
2823    my ( $self, $error, $pos ) = @_;
2824
2825    $self->SetDiag($error);
2826
2827    if (defined $pos) {
2828        $_[0]->{_ERROR_POS} = $pos;
2829    }
2830
2831    return;
2832}
2833
2834sub error_input {
2835    my $self = shift;
2836    if ($self and ((Scalar::Util::reftype($self) || '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
2837        return $self->{_ERROR_INPUT};
2838    }
2839    return;
2840}
2841
2842sub _sv_diag {
2843    my ($self, $error) = @_;
2844    bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
2845}
2846
2847sub _set_diag {
2848    my ($self, $ctx, $error) = @_;
2849
2850    $last_error = $self->_sv_diag($error);
2851    $self->{_ERROR_DIAG} = $last_error;
2852    if ($error == 0) {
2853        $self->{_ERROR_POS} = 0;
2854        $self->{_ERROR_FLD} = 0;
2855        $self->{_ERROR_INPUT} = undef;
2856        $ctx->{has_error_input} = 0;
2857    }
2858    if ($error == 2012) { # EOF
2859        $self->{_EOF} = 1;
2860    }
2861    if ($ctx->{auto_diag}) {
2862        $self->error_diag;
2863    }
2864    return $last_error;
2865}
2866
2867sub SetDiag {
2868    my ($self, $error, $errstr) = @_;
2869    my $res;
2870    if (ref $self) {
2871        my $ctx = $self->_setup_ctx;
2872        $res = $self->_set_diag($ctx, $error);
2873
2874    } else {
2875        $res = $self->_sv_diag($error);
2876    }
2877    if (defined $errstr) {
2878        $res->[1] = $errstr;
2879    }
2880    $res;
2881}
2882
2883################################################################################
2884package Text::CSV::ErrorDiag;
2885
2886use strict;
2887use overload (
2888    '""' => \&stringify,
2889    '+'  => \&numeric,
2890    '-'  => \&numeric,
2891    '*'  => \&numeric,
2892    '/'  => \&numeric,
2893    fallback => 1,
2894);
2895
2896
2897sub numeric {
2898    my ($left, $right) = @_;
2899    return ref $left ? $left->[0] : $right->[0];
2900}
2901
2902
2903sub stringify {
2904    $_[0]->[1];
2905}
2906################################################################################
29071;
2908__END__
2909
2910=head1 NAME
2911
2912Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
2913
2914
2915=head1 SYNOPSIS
2916
2917This section is taken from Text::CSV_XS.
2918
2919 # Functional interface
2920 use Text::CSV_PP qw( csv );
2921
2922 # Read whole file in memory
2923 my $aoa = csv (in => "data.csv");    # as array of array
2924 my $aoh = csv (in => "data.csv",
2925                headers => "auto");   # as array of hash
2926
2927 # Write array of arrays as csv file
2928 csv (in => $aoa, out => "file.csv", sep_char=> ";");
2929
2930 # Only show lines where "code" is odd
2931 csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
2932
2933 # Object interface
2934 use Text::CSV_PP;
2935
2936 my @rows;
2937 # Read/parse CSV
2938 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
2939 open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
2940 while (my $row = $csv->getline ($fh)) {
2941     $row->[2] =~ m/pattern/ or next; # 3rd field should match
2942     push @rows, $row;
2943     }
2944 close $fh;
2945
2946 # and write as CSV
2947 open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
2948 $csv->say ($fh, $_) for @rows;
2949 close $fh or die "new.csv: $!";
2950
2951=head1 DESCRIPTION
2952
2953Text::CSV_PP is a pure-perl module that provides facilities for the
2954composition and decomposition of comma-separated values. This is
2955(almost) compatible with much faster L<Text::CSV_XS>, and mainly
2956used as its fallback module when you use L<Text::CSV> module without
2957having installed Text::CSV_XS. If you don't have any reason to use
2958this module directly, use Text::CSV for speed boost and portability
2959(or maybe Text::CSV_XS when you write an one-off script and don't need
2960to care about portability).
2961
2962The following caveats are taken from the doc of Text::CSV_XS.
2963
2964=head2 Embedded newlines
2965
2966B<Important Note>:  The default behavior is to accept only ASCII characters
2967in the range from C<0x20> (space) to C<0x7E> (tilde).   This means that the
2968fields can not contain newlines. If your data contains newlines embedded in
2969fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
2970set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
2971parsing options, you will always want to set binary.
2972
2973But you still have the problem  that you have to pass a correct line to the
2974L</parse> method, which is more complicated from the usual point of usage:
2975
2976 my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
2977 while (<>) {       #  WRONG!
2978     $csv->parse ($_);
2979     my @fields = $csv->fields ();
2980     }
2981
2982this will break, as the C<while> might read broken lines:  it does not care
2983about the quoting. If you need to support embedded newlines,  the way to go
2984is to  B<not>  pass L<C<eol>|/eol> in the parser  (it accepts C<\n>, C<\r>,
2985B<and> C<\r\n> by default) and then
2986
2987 my $csv = Text::CSV_PP->new ({ binary => 1 });
2988 open my $fh, "<", $file or die "$file: $!";
2989 while (my $row = $csv->getline ($fh)) {
2990     my @fields = @$row;
2991     }
2992
2993The old(er) way of using global file handles is still supported
2994
2995 while (my $row = $csv->getline (*ARGV)) { ... }
2996
2997=head2 Unicode
2998
2999Unicode is only tested to work with perl-5.8.2 and up.
3000
3001See also L</BOM>.
3002
3003The simplest way to ensure the correct encoding is used for  in- and output
3004is by either setting layers on the filehandles, or setting the L</encoding>
3005argument for L</csv>.
3006
3007 open my $fh, "<:encoding(UTF-8)", "in.csv"  or die "in.csv: $!";
3008or
3009 my $aoa = csv (in => "in.csv",     encoding => "UTF-8");
3010
3011 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
3012or
3013 csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
3014
3015On parsing (both for  L</getline> and  L</parse>),  if the source is marked
3016being UTF8, then all fields that are marked binary will also be marked UTF8.
3017
3018On combining (L</print>  and  L</combine>):  if any of the combining fields
3019was marked UTF8, the resulting string will be marked as UTF8.  Note however
3020that all fields  I<before>  the first field marked UTF8 and contained 8-bit
3021characters that were not upgraded to UTF8,  these will be  C<bytes>  in the
3022resulting string too, possibly causing unexpected errors.  If you pass data
3023of different encoding,  or you don't know if there is  different  encoding,
3024force it to be upgraded before you pass them on:
3025
3026 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
3027
3028For complete control over encoding, please use L<Text::CSV::Encoded>:
3029
3030 use Text::CSV::Encoded;
3031 my $csv = Text::CSV::Encoded->new ({
3032     encoding_in  => "iso-8859-1", # the encoding comes into   Perl
3033     encoding_out => "cp1252",     # the encoding comes out of Perl
3034     });
3035
3036 $csv = Text::CSV::Encoded->new ({ encoding  => "utf8" });
3037 # combine () and print () accept *literally* utf8 encoded data
3038 # parse () and getline () return *literally* utf8 encoded data
3039
3040 $csv = Text::CSV::Encoded->new ({ encoding  => undef }); # default
3041 # combine () and print () accept UTF8 marked data
3042 # parse () and getline () return UTF8 marked data
3043
3044=head2 BOM
3045
3046BOM  (or Byte Order Mark)  handling is available only inside the L</header>
3047method.   This method supports the following encodings: C<utf-8>, C<utf-1>,
3048C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
3049C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>.
3050
3051If a file has a BOM, the easiest way to deal with that is
3052
3053 my $aoh = csv (in => $file, detect_bom => 1);
3054
3055All records will be encoded based on the detected BOM.
3056
3057This implies a call to the  L</header>  method,  which defaults to also set
3058the L</column_names>. So this is B<not> the same as
3059
3060 my $aoh = csv (in => $file, headers => "auto");
3061
3062which only reads the first record to set  L</column_names>  but ignores any
3063meaning of possible present BOM.
3064
3065=head1 METHODS
3066
3067This section is also taken from Text::CSV_XS.
3068
3069=head2 version
3070
3071(Class method) Returns the current module version.
3072
3073=head2 new
3074
3075(Class method) Returns a new instance of class Text::CSV_PP. The attributes
3076are described by the (optional) hash ref C<\%attr>.
3077
3078 my $csv = Text::CSV_PP->new ({ attributes ... });
3079
3080The following attributes are available:
3081
3082=head3 eol
3083
3084 my $csv = Text::CSV_PP->new ({ eol => $/ });
3085           $csv->eol (undef);
3086 my $eol = $csv->eol;
3087
3088The end-of-line string to add to rows for L</print> or the record separator
3089for L</getline>.
3090
3091When not passed in a B<parser> instance,  the default behavior is to accept
3092C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
3093all. Passing C<undef> or the empty string behave the same.
3094
3095When not passed in a B<generating> instance,  records are not terminated at
3096all, so it is probably wise to pass something you expect. A safe choice for
3097C<eol> on output is either C<$/> or C<\r\n>.
3098
3099Common values for C<eol> are C<"\012"> (C<\n> or Line Feed),  C<"\015\012">
3100(C<\r\n> or Carriage Return, Line Feed),  and C<"\015">  (C<\r> or Carriage
3101Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
3102
3103If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
3104only a Carriage Return without Line Feed, will be L</parse>d correct.
3105
3106=head3 sep_char
3107
3108 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
3109         $csv->sep_char (";");
3110 my $c = $csv->sep_char;
3111
3112The char used to separate fields, by default a comma. (C<,>).  Limited to a
3113single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
3114(tilde). When longer sequences are required, use L<C<sep>|/sep>.
3115
3116The separation character can not be equal to the quote character  or to the
3117escape character.
3118
3119=head3 sep
3120
3121 my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
3122           $csv->sep (";");
3123 my $sep = $csv->sep;
3124
3125The chars used to separate fields, by default undefined. Limited to 8 bytes.
3126
3127When set, overrules L<C<sep_char>|/sep_char>.  If its length is one byte it
3128acts as an alias to L<C<sep_char>|/sep_char>.
3129
3130=head3 quote_char
3131
3132 my $csv = Text::CSV_PP->new ({ quote_char => "'" });
3133         $csv->quote_char (undef);
3134 my $c = $csv->quote_char;
3135
3136The character to quote fields containing blanks or binary data,  by default
3137the double quote character (C<">).  A value of undef suppresses quote chars
3138(for simple cases only). Limited to a single-byte character, usually in the
3139range from  C<0x20> (space) to  C<0x7E> (tilde).  When longer sequences are
3140required, use L<C<quote>|/quote>.
3141
3142C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
3143
3144=head3 quote
3145
3146 my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
3147             $csv->quote ("'");
3148 my $quote = $csv->quote;
3149
3150The chars used to quote fields, by default undefined. Limited to 8 bytes.
3151
3152When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
3153it acts as an alias to L<C<quote_char>|/quote_char>.
3154
3155=head3 escape_char
3156
3157 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3158         $csv->escape_char (":");
3159 my $c = $csv->escape_char;
3160
3161The character to  escape  certain characters inside quoted fields.  This is
3162limited to a  single-byte  character,  usually  in the  range from  C<0x20>
3163(space) to C<0x7E> (tilde).
3164
3165The C<escape_char> defaults to being the double-quote mark (C<">). In other
3166words the same as the default L<C<quote_char>|/quote_char>. This means that
3167doubling the quote mark in a field escapes it:
3168
3169 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
3170
3171If  you  change  the   L<C<quote_char>|/quote_char>  without  changing  the
3172C<escape_char>,  the  C<escape_char> will still be the double-quote (C<">).
3173If instead you want to escape the  L<C<quote_char>|/quote_char> by doubling
3174it you will need to also change the  C<escape_char>  to be the same as what
3175you have changed the L<C<quote_char>|/quote_char> to.
3176
3177Setting C<escape_char> to <undef> or C<""> will disable escaping completely
3178and is greatly discouraged. This will also disable C<escape_null>.
3179
3180The escape character can not be equal to the separation character.
3181
3182=head3 binary
3183
3184 my $csv = Text::CSV_PP->new ({ binary => 1 });
3185         $csv->binary (0);
3186 my $f = $csv->binary;
3187
3188If this attribute is C<1>,  you may use binary characters in quoted fields,
3189including line feeds, carriage returns and C<NULL> bytes. (The latter could
3190be escaped as C<"0>.) By default this feature is off.
3191
3192If a string is marked UTF8,  C<binary> will be turned on automatically when
3193binary characters other than C<CR> and C<NL> are encountered.   Note that a
3194simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
3195so setting C<< { binary => 1 } >> is still a wise option.
3196
3197=head3 strict
3198
3199 my $csv = Text::CSV_PP->new ({ strict => 1 });
3200         $csv->strict (0);
3201 my $f = $csv->strict;
3202
3203If this attribute is set to C<1>, any row that parses to a different number
3204of fields than the previous row will cause the parser to throw error 2014.
3205
3206=head3 formula_handling
3207
3208=head3 formula
3209
3210 my $csv = Text::CSV_PP->new ({ formula => "none" });
3211         $csv->formula ("none");
3212 my $f = $csv->formula;
3213
3214This defines the behavior of fields containing I<formulas>. As formulas are
3215considered dangerous in spreadsheets, this attribute can define an optional
3216action to be taken if a field starts with an equal sign (C<=>).
3217
3218For purpose of code-readability, this can also be written as
3219
3220 my $csv = Text::CSV_PP->new ({ formula_handling => "none" });
3221         $csv->formula_handling ("none");
3222 my $f = $csv->formula_handling;
3223
3224Possible values for this attribute are
3225
3226=over 2
3227
3228=item none
3229
3230Take no specific action. This is the default.
3231
3232 $csv->formula ("none");
3233
3234=item die
3235
3236Cause the process to C<die> whenever a leading C<=> is encountered.
3237
3238 $csv->formula ("die");
3239
3240=item croak
3241
3242Cause the process to C<croak> whenever a leading C<=> is encountered.  (See
3243L<Carp>)
3244
3245 $csv->formula ("croak");
3246
3247=item diag
3248
3249Report position and content of the field whenever a leading  C<=> is found.
3250The value of the field is unchanged.
3251
3252 $csv->formula ("diag");
3253
3254=item empty
3255
3256Replace the content of fields that start with a C<=> with the empty string.
3257
3258 $csv->formula ("empty");
3259 $csv->formula ("");
3260
3261=item undef
3262
3263Replace the content of fields that start with a C<=> with C<undef>.
3264
3265 $csv->formula ("undef");
3266 $csv->formula (undef);
3267
3268=back
3269
3270All other values will give a warning and then fallback to C<diag>.
3271
3272=head3 decode_utf8
3273
3274 my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
3275         $csv->decode_utf8 (0);
3276 my $f = $csv->decode_utf8;
3277
3278This attributes defaults to TRUE.
3279
3280While I<parsing>,  fields that are valid UTF-8, are automatically set to be
3281UTF-8, so that
3282
3283  $csv->parse ("\xC4\xA8\n");
3284
3285results in
3286
3287  PV("\304\250"\0) [UTF8 "\x{128}"]
3288
3289Sometimes it might not be a desired action.  To prevent those upgrades, set
3290this attribute to false, and the result will be
3291
3292  PV("\304\250"\0)
3293
3294=head3 auto_diag
3295
3296 my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
3297         $csv->auto_diag (2);
3298 my $l = $csv->auto_diag;
3299
3300Set this attribute to a number between C<1> and C<9> causes  L</error_diag>
3301to be automatically called in void context upon errors.
3302
3303In case of error C<2012 - EOF>, this call will be void.
3304
3305If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
3306on errors instead of C<warn>.  If set to anything unrecognized,  it will be
3307silently ignored.
3308
3309Future extensions to this feature will include more reliable auto-detection
3310of  C<autodie>  being active in the scope of which the error occurred which
3311will increment the value of C<auto_diag> with  C<1> the moment the error is
3312detected.
3313
3314=head3 diag_verbose
3315
3316 my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
3317         $csv->diag_verbose (2);
3318 my $l = $csv->diag_verbose;
3319
3320Set the verbosity of the output triggered by C<auto_diag>.   Currently only
3321adds the current  input-record-number  (if known)  to the diagnostic output
3322with an indication of the position of the error.
3323
3324=head3 blank_is_undef
3325
3326 my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
3327         $csv->blank_is_undef (0);
3328 my $f = $csv->blank_is_undef;
3329
3330Under normal circumstances, C<CSV> data makes no distinction between quoted-
3331and unquoted empty fields.  These both end up in an empty string field once
3332read, thus
3333
3334 1,"",," ",2
3335
3336is read as
3337
3338 ("1", "", "", " ", "2")
3339
3340When I<writing>  C<CSV> files with either  L<C<always_quote>|/always_quote>
3341or  L<C<quote_empty>|/quote_empty> set, the unquoted  I<empty> field is the
3342result of an undefined value.   To enable this distinction when  I<reading>
3343C<CSV>  data,  the  C<blank_is_undef>  attribute will cause  unquoted empty
3344fields to be set to C<undef>, causing the above to be parsed as
3345
3346 ("1", "", undef, " ", "2")
3347
3348note that this is specifically important when loading  C<CSV> fields into a
3349database that allows C<NULL> values,  as the perl equivalent for C<NULL> is
3350C<undef> in L<DBI> land.
3351
3352=head3 empty_is_undef
3353
3354 my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
3355         $csv->empty_is_undef (0);
3356 my $f = $csv->empty_is_undef;
3357
3358Going one  step  further  than  L<C<blank_is_undef>|/blank_is_undef>,  this
3359attribute converts all empty fields to C<undef>, so
3360
3361 1,"",," ",2
3362
3363is read as
3364
3365 (1, undef, undef, " ", 2)
3366
3367Note that this effects only fields that are  originally  empty,  not fields
3368that are empty after stripping allowed whitespace. YMMV.
3369
3370=head3 allow_whitespace
3371
3372 my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
3373         $csv->allow_whitespace (0);
3374 my $f = $csv->allow_whitespace;
3375
3376When this option is set to true,  the whitespace  (C<TAB>'s and C<SPACE>'s)
3377surrounding  the  separation character  is removed when parsing.  If either
3378C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
3379L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
3380be considered whitespace.
3381
3382Now lines like:
3383
3384 1 , "foo" , bar , 3 , zapp
3385
3386are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
3387
3388Note that  B<all>  whitespace is stripped from both  start and  end of each
3389field.  That would make it  I<more> than a I<feature> to enable parsing bad
3390C<CSV> lines, as
3391
3392 1,   2.0,  3,   ape  , monkey
3393
3394will now be parsed as
3395
3396 ("1", "2.0", "3", "ape", "monkey")
3397
3398even if the original line was perfectly acceptable C<CSV>.
3399
3400=head3 allow_loose_quotes
3401
3402 my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
3403         $csv->allow_loose_quotes (0);
3404 my $f = $csv->allow_loose_quotes;
3405
3406By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
3407characters like
3408
3409 1,foo "bar" baz,42
3410
3411would result in parse error 2034.  Though it is still bad practice to allow
3412this format,  we  cannot  help  the  fact  that  some  vendors  make  their
3413applications spit out lines styled this way.
3414
3415If there is B<really> bad C<CSV> data, like
3416
3417 1,"foo "bar" baz",42
3418
3419or
3420
3421 1,""foo bar baz"",42
3422
3423there is a way to get this data-line parsed and leave the quotes inside the
3424quoted field as-is.  This can be achieved by setting  C<allow_loose_quotes>
3425B<AND> making sure that the L<C<escape_char>|/escape_char> is  I<not> equal
3426to L<C<quote_char>|/quote_char>.
3427
3428=head3 allow_loose_escapes
3429
3430 my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
3431         $csv->allow_loose_escapes (0);
3432 my $f = $csv->allow_loose_escapes;
3433
3434Parsing fields  that  have  L<C<escape_char>|/escape_char>  characters that
3435escape characters that do not need to be escaped, like:
3436
3437 my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3438 $csv->parse (qq{1,"my bar\'s",baz,42});
3439
3440would result in parse error 2025.   Though it is bad practice to allow this
3441format,  this attribute enables you to treat all escape character sequences
3442equal.
3443
3444=head3 allow_unquoted_escape
3445
3446 my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
3447         $csv->allow_unquoted_escape (0);
3448 my $f = $csv->allow_unquoted_escape;
3449
3450A backward compatibility issue where L<C<escape_char>|/escape_char> differs
3451from L<C<quote_char>|/quote_char>  prevents  L<C<escape_char>|/escape_char>
3452to be in the first position of a field.  If L<C<quote_char>|/quote_char> is
3453equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
3454this would be illegal:
3455
3456 1,\0,2
3457
3458Setting this attribute to C<1>  might help to overcome issues with backward
3459compatibility and allow this style.
3460
3461=head3 always_quote
3462
3463 my $csv = Text::CSV_PP->new ({ always_quote => 1 });
3464         $csv->always_quote (0);
3465 my $f = $csv->always_quote;
3466
3467By default the generated fields are quoted only if they I<need> to be.  For
3468example, if they contain the separator character. If you set this attribute
3469to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
3470quoted, see L</blank_is_undef>). This makes it quite often easier to handle
3471exported data in external applications.
3472
3473=head3 quote_space
3474
3475 my $csv = Text::CSV_PP->new ({ quote_space => 1 });
3476         $csv->quote_space (0);
3477 my $f = $csv->quote_space;
3478
3479By default,  a space in a field would trigger quotation.  As no rule exists
3480this to be forced in C<CSV>,  nor any for the opposite, the default is true
3481for safety.   You can exclude the space  from this trigger  by setting this
3482attribute to 0.
3483
3484=head3 quote_empty
3485
3486 my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
3487         $csv->quote_empty (0);
3488 my $f = $csv->quote_empty;
3489
3490By default the generated fields are quoted only if they I<need> to be.   An
3491empty (defined) field does not need quotation. If you set this attribute to
3492C<1> then I<empty> defined fields will be quoted.  (C<undef> fields are not
3493quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
3494
3495=head3 quote_binary
3496
3497 my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
3498         $csv->quote_binary (0);
3499 my $f = $csv->quote_binary;
3500
3501By default,  all "unsafe" bytes inside a string cause the combined field to
3502be quoted.  By setting this attribute to C<0>, you can disable that trigger
3503for bytes >= C<0x7F>.
3504
3505=head3 escape_null
3506
3507 my $csv = Text::CSV_PP->new ({ escape_null => 1 });
3508         $csv->escape_null (0);
3509 my $f = $csv->escape_null;
3510
3511By default, a C<NULL> byte in a field would be escaped. This option enables
3512you to treat the  C<NULL>  byte as a simple binary character in binary mode
3513(the C<< { binary => 1 } >> is set).  The default is true.  You can prevent
3514C<NULL> escapes by setting this attribute to C<0>.
3515
3516When the C<escape_char> attribute is set to undefined,  this attribute will
3517be set to false.
3518
3519The default setting will encode "=\x00=" as
3520
3521 "="0="
3522
3523With C<escape_null> set, this will result in
3524
3525 "=\x00="
3526
3527The default when using the C<csv> function is C<false>.
3528
3529For backward compatibility reasons,  the deprecated old name  C<quote_null>
3530is still recognized.
3531
3532=head3 keep_meta_info
3533
3534 my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
3535         $csv->keep_meta_info (0);
3536 my $f = $csv->keep_meta_info;
3537
3538By default, the parsing of input records is as simple and fast as possible.
3539However,  some parsing information - like quotation of the original field -
3540is lost in that process.  Setting this flag to true enables retrieving that
3541information after parsing with  the methods  L</meta_info>,  L</is_quoted>,
3542and L</is_binary> described below.  Default is false for performance.
3543
3544If you set this attribute to a value greater than 9,   than you can control
3545output quotation style like it was used in the input of the the last parsed
3546record (unless quotation was added because of other reasons).
3547
3548 my $csv = Text::CSV_PP->new ({
3549    binary         => 1,
3550    keep_meta_info => 1,
3551    quote_space    => 0,
3552    });
3553
3554 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
3555
3556 $csv->print (*STDOUT, \@row);
3557 # 1,,, , ,f,g,"h""h",help,help
3558 $csv->keep_meta_info (11);
3559 $csv->print (*STDOUT, \@row);
3560 # 1,,"", ," ",f,"g","h""h",help,"help"
3561
3562=head3 undef_str
3563
3564 my $csv = Text::CSV_PP->new ({ undef_str => "\\N" });
3565         $csv->undef_str (undef);
3566 my $s = $csv->undef_str;
3567
3568This attribute optionally defines the output of undefined fields. The value
3569passed is not changed at all, so if it needs quotation, the quotation needs
3570to be included in the value of the attribute.  Use with caution, as passing
3571a value like  C<",",,,,""">  will for sure mess up your output. The default
3572for this attribute is C<undef>, meaning no special treatment.
3573
3574This attribute is useful when exporting  CSV data  to be imported in custom
3575loaders, like for MySQL, that recognize special sequences for C<NULL> data.
3576
3577This attribute has no meaning when parsing CSV data.
3578
3579=head3 verbatim
3580
3581 my $csv = Text::CSV_PP->new ({ verbatim => 1 });
3582         $csv->verbatim (0);
3583 my $f = $csv->verbatim;
3584
3585This is a quite controversial attribute to set,  but makes some hard things
3586possible.
3587
3588The rationale behind this attribute is to tell the parser that the normally
3589special characters newline (C<NL>) and Carriage Return (C<CR>)  will not be
3590special when this flag is set,  and be dealt with  as being ordinary binary
3591characters. This will ease working with data with embedded newlines.
3592
3593When  C<verbatim>  is used with  L</getline>,  L</getline>  auto-C<chomp>'s
3594every line.
3595
3596Imagine a file format like
3597
3598 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
3599
3600where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
3601C<^> (caret).   None of the fields is quoted,   but embedded binary data is
3602likely to be present. With the specific line ending, this should not be too
3603hard to detect.
3604
3605By default,  Text::CSV_PP'  parse function is instructed to only know about
3606C<"\n"> and C<"\r">  to be legal line endings,  and so has to deal with the
3607embedded newline as a real C<end-of-line>,  so it can scan the next line if
3608binary is true, and the newline is inside a quoted field. With this option,
3609we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
3610a binary character.
3611
3612For L</parse> this means that the parser has no more idea about line ending
3613and L</getline> C<chomp>s line endings on reading.
3614
3615=head3 types
3616
3617A set of column types; the attribute is immediately passed to the L</types>
3618method.
3619
3620=head3 callbacks
3621
3622See the L</Callbacks> section below.
3623
3624=head3 accessors
3625
3626To sum it up,
3627
3628 $csv = Text::CSV_PP->new ();
3629
3630is equivalent to
3631
3632 $csv = Text::CSV_PP->new ({
3633     eol                   => undef, # \r, \n, or \r\n
3634     sep_char              => ',',
3635     sep                   => undef,
3636     quote_char            => '"',
3637     quote                 => undef,
3638     escape_char           => '"',
3639     binary                => 0,
3640     decode_utf8           => 1,
3641     auto_diag             => 0,
3642     diag_verbose          => 0,
3643     blank_is_undef        => 0,
3644     empty_is_undef        => 0,
3645     allow_whitespace      => 0,
3646     allow_loose_quotes    => 0,
3647     allow_loose_escapes   => 0,
3648     allow_unquoted_escape => 0,
3649     always_quote          => 0,
3650     quote_empty           => 0,
3651     quote_space           => 1,
3652     escape_null           => 1,
3653     quote_binary          => 1,
3654     keep_meta_info        => 0,
3655     strict                => 0,
3656     formula               => 0,
3657     verbatim              => 0,
3658     undef_str             => undef,
3659     types                 => undef,
3660     callbacks             => undef,
3661     });
3662
3663For all of the above mentioned flags, an accessor method is available where
3664you can inquire the current value, or change the value
3665
3666 my $quote = $csv->quote_char;
3667 $csv->binary (1);
3668
3669It is not wise to change these settings halfway through writing C<CSV> data
3670to a stream. If however you want to create a new stream using the available
3671C<CSV> object, there is no harm in changing them.
3672
3673If the L</new> constructor call fails,  it returns C<undef>,  and makes the
3674fail reason available through the L</error_diag> method.
3675
3676 $csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
3677     die "".Text::CSV_PP->error_diag ();
3678
3679L</error_diag> will return a string like
3680
3681 "INI - Unknown attribute 'ecs_char'"
3682
3683=head2 known_attributes
3684
3685 @attr = Text::CSV_PP->known_attributes;
3686 @attr = Text::CSV_PP::known_attributes;
3687 @attr = $csv->known_attributes;
3688
3689This method will return an ordered list of all the supported  attributes as
3690described above.   This can be useful for knowing what attributes are valid
3691in classes that use or extend Text::CSV_PP.
3692
3693=head2 print
3694
3695 $status = $csv->print ($fh, $colref);
3696
3697Similar to  L</combine> + L</string> + L</print>,  but much more efficient.
3698It expects an array ref as input  (not an array!)  and the resulting string
3699is not really  created,  but  immediately  written  to the  C<$fh>  object,
3700typically an IO handle or any other object that offers a L</print> method.
3701
3702For performance reasons  C<print>  does not create a result string,  so all
3703L</string>, L</status>, L</fields>, and L</error_input> methods will return
3704undefined information after executing this method.
3705
3706If C<$colref> is C<undef>  (explicit,  not through a variable argument) and
3707L</bind_columns>  was used to specify fields to be printed,  it is possible
3708to make performance improvements, as otherwise data would have to be copied
3709as arguments to the method call:
3710
3711 $csv->bind_columns (\($foo, $bar));
3712 $status = $csv->print ($fh, undef);
3713
3714A short benchmark
3715
3716 my @data = ("aa" .. "zz");
3717 $csv->bind_columns (\(@data));
3718
3719 $csv->print ($fh, [ @data ]);   # 11800 recs/sec
3720 $csv->print ($fh,  \@data  );   # 57600 recs/sec
3721 $csv->print ($fh,   undef  );   # 48500 recs/sec
3722
3723=head2 say
3724
3725 $status = $csv->say ($fh, $colref);
3726
3727Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
3728
3729=head2 print_hr
3730
3731 $csv->print_hr ($fh, $ref);
3732
3733Provides an easy way  to print a  C<$ref>  (as fetched with L</getline_hr>)
3734provided the column names are set with L</column_names>.
3735
3736It is just a wrapper method with basic parameter checks over
3737
3738 $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
3739
3740=head2 combine
3741
3742 $status = $csv->combine (@fields);
3743
3744This method constructs a C<CSV> record from  C<@fields>,  returning success
3745or failure.   Failure can result from lack of arguments or an argument that
3746contains an invalid character.   Upon success,  L</string> can be called to
3747retrieve the resultant C<CSV> string.  Upon failure,  the value returned by
3748L</string> is undefined and L</error_input> could be called to retrieve the
3749invalid argument.
3750
3751=head2 string
3752
3753 $line = $csv->string ();
3754
3755This method returns the input to  L</parse>  or the resultant C<CSV> string
3756of L</combine>, whichever was called more recently.
3757
3758=head2 getline
3759
3760 $colref = $csv->getline ($fh);
3761
3762This is the counterpart to  L</print>,  as L</parse>  is the counterpart to
3763L</combine>:  it parses a row from the C<$fh>  handle using the L</getline>
3764method associated with C<$fh>  and parses this row into an array ref.  This
3765array ref is returned by the function or C<undef> for failure.  When C<$fh>
3766does not support C<getline>, you are likely to hit errors.
3767
3768When fields are bound with L</bind_columns> the return value is a reference
3769to an empty list.
3770
3771The L</string>, L</fields>, and L</status> methods are meaningless again.
3772
3773=head2 getline_all
3774
3775 $arrayref = $csv->getline_all ($fh);
3776 $arrayref = $csv->getline_all ($fh, $offset);
3777 $arrayref = $csv->getline_all ($fh, $offset, $length);
3778
3779This will return a reference to a list of L<getline ($fh)|/getline> results.
3780In this call, C<keep_meta_info> is disabled.  If C<$offset> is negative, as
3781with C<splice>, only the last  C<abs ($offset)> records of C<$fh> are taken
3782into consideration.
3783
3784Given a CSV file with 10 lines:
3785
3786 lines call
3787 ----- ---------------------------------------------------------
3788 0..9  $csv->getline_all ($fh)         # all
3789 0..9  $csv->getline_all ($fh,  0)     # all
3790 8..9  $csv->getline_all ($fh,  8)     # start at 8
3791 -     $csv->getline_all ($fh,  0,  0) # start at 0 first 0 rows
3792 0..4  $csv->getline_all ($fh,  0,  5) # start at 0 first 5 rows
3793 4..5  $csv->getline_all ($fh,  4,  2) # start at 4 first 2 rows
3794 8..9  $csv->getline_all ($fh, -2)     # last 2 rows
3795 6..7  $csv->getline_all ($fh, -4,  2) # first 2 of last  4 rows
3796
3797=head2 getline_hr
3798
3799The L</getline_hr> and L</column_names> methods work together  to allow you
3800to have rows returned as hashrefs.  You must call L</column_names> first to
3801declare your column names.
3802
3803 $csv->column_names (qw( code name price description ));
3804 $hr = $csv->getline_hr ($fh);
3805 print "Price for $hr->{name} is $hr->{price} EUR\n";
3806
3807L</getline_hr> will croak if called before L</column_names>.
3808
3809Note that  L</getline_hr>  creates a hashref for every row and will be much
3810slower than the combined use of L</bind_columns>  and L</getline> but still
3811offering the same ease of use hashref inside the loop:
3812
3813 my @cols = @{$csv->getline ($fh)};
3814 $csv->column_names (@cols);
3815 while (my $row = $csv->getline_hr ($fh)) {
3816     print $row->{price};
3817     }
3818
3819Could easily be rewritten to the much faster:
3820
3821 my @cols = @{$csv->getline ($fh)};
3822 my $row = {};
3823 $csv->bind_columns (\@{$row}{@cols});
3824 while ($csv->getline ($fh)) {
3825     print $row->{price};
3826     }
3827
3828Your mileage may vary for the size of the data and the number of rows. With
3829perl-5.14.2 the comparison for a 100_000 line file with 14 rows:
3830
3831            Rate hashrefs getlines
3832 hashrefs 1.00/s       --     -76%
3833 getlines 4.15/s     313%       --
3834
3835=head2 getline_hr_all
3836
3837 $arrayref = $csv->getline_hr_all ($fh);
3838 $arrayref = $csv->getline_hr_all ($fh, $offset);
3839 $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
3840
3841This will return a reference to a list of   L<getline_hr ($fh)|/getline_hr>
3842results.  In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
3843
3844=head2 parse
3845
3846 $status = $csv->parse ($line);
3847
3848This method decomposes a  C<CSV>  string into fields,  returning success or
3849failure.   Failure can result from a lack of argument  or the given  C<CSV>
3850string is improperly formatted.   Upon success, L</fields> can be called to
3851retrieve the decomposed fields. Upon failure calling L</fields> will return
3852undefined data and  L</error_input>  can be called to retrieve  the invalid
3853argument.
3854
3855You may use the L</types>  method for setting column types.  See L</types>'
3856description below.
3857
3858The C<$line> argument is supposed to be a simple scalar. Everything else is
3859supposed to croak and set error 1500.
3860
3861=head2 fragment
3862
3863This function tries to implement RFC7111  (URI Fragment Identifiers for the
3864text/csv Media Type) - http://tools.ietf.org/html/rfc7111
3865
3866 my $AoA = $csv->fragment ($fh, $spec);
3867
3868In specifications,  C<*> is used to specify the I<last> item, a dash (C<->)
3869to indicate a range.   All indices are C<1>-based:  the first row or column
3870has index C<1>. Selections can be combined with the semi-colon (C<;>).
3871
3872When using this method in combination with  L</column_names>,  the returned
3873reference  will point to a  list of hashes  instead of a  list of lists.  A
3874disjointed  cell-based combined selection  might return rows with different
3875number of columns making the use of hashes unpredictable.
3876
3877 $csv->column_names ("Name", "Age");
3878 my $AoH = $csv->fragment ($fh, "col=3;8");
3879
3880If the L</after_parse> callback is active,  it is also called on every line
3881parsed and skipped before the fragment.
3882
3883=over 2
3884
3885=item row
3886
3887 row=4
3888 row=5-7
3889 row=6-*
3890 row=1-2;4;6-*
3891
3892=item col
3893
3894 col=2
3895 col=1-3
3896 col=4-*
3897 col=1-2;4;7-*
3898
3899=item cell
3900
3901In cell-based selection, the comma (C<,>) is used to pair row and column
3902
3903 cell=4,1
3904
3905The range operator (C<->) using C<cell>s can be used to define top-left and
3906bottom-right C<cell> location
3907
3908 cell=3,1-4,6
3909
3910The C<*> is only allowed in the second part of a pair
3911
3912 cell=3,2-*,2    # row 3 till end, only column 2
3913 cell=3,2-3,*    # column 2 till end, only row 3
3914 cell=3,2-*,*    # strip row 1 and 2, and column 1
3915
3916Cells and cell ranges may be combined with C<;>, possibly resulting in rows
3917with different number of columns
3918
3919 cell=1,1-2,2;3,3-4,4;1,4;4,1
3920
3921Disjointed selections will only return selected cells.   The cells that are
3922not  specified  will  not  be  included  in the  returned set,  not even as
3923C<undef>.  As an example given a C<CSV> like
3924
3925 11,12,13,...19
3926 21,22,...28,29
3927 :            :
3928 91,...97,98,99
3929
3930with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
3931
3932 11,12,14
3933 21,22
3934 33,34
3935 41,43,44
3936
3937Overlapping cell-specs will return those cells only once, So
3938C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
3939
3940 11,12,13
3941 21,22,23,24
3942 31,32,33,34
3943 42,43,44
3944
3945=back
3946
3947L<RFC7111|http://tools.ietf.org/html/rfc7111> does  B<not>  allow different
3948types of specs to be combined   (either C<row> I<or> C<col> I<or> C<cell>).
3949Passing an invalid fragment specification will croak and set error 2013.
3950
3951=head2 column_names
3952
3953Set the "keys" that will be used in the  L</getline_hr>  calls.  If no keys
3954(column names) are passed, it will return the current setting as a list.
3955
3956L</column_names> accepts a list of scalars  (the column names)  or a single
3957array_ref, so you can pass the return value from L</getline> too:
3958
3959 $csv->column_names ($csv->getline ($fh));
3960
3961L</column_names> does B<no> checking on duplicates at all, which might lead
3962to unexpected results.   Undefined entries will be replaced with the string
3963C<"\cAUNDEF\cA">, so
3964
3965 $csv->column_names (undef, "", "name", "name");
3966 $hr = $csv->getline_hr ($fh);
3967
3968Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field,  C<< $hr->{""} >> to
3969the 2nd field, and C<< $hr->{name} >> to the 4th field,  discarding the 3rd
3970field.
3971
3972L</column_names> croaks on invalid arguments.
3973
3974=head2 header
3975
3976This method does NOT work in perl-5.6.x
3977
3978Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
3979
3980 my @hdr = $csv->header ($fh);
3981 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3982 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3983
3984The first argument should be a file handle.
3985
3986This method resets some object properties,  as it is supposed to be invoked
3987only once per file or stream.  It will leave attributes C<column_names> and
3988C<bound_columns> alone of setting column names is disabled. Reading headers
3989on previously process objects might fail on perl-5.8.0 and older.
3990
3991Assuming that the file opened for parsing has a header, and the header does
3992not contain problematic characters like embedded newlines,   read the first
3993line from the open handle then auto-detect whether the header separates the
3994column names with a character from the allowed separator list.
3995
3996If any of the allowed separators matches,  and none of the I<other> allowed
3997separators match,  set  L<C<sep>|/sep>  to that  separator  for the current
3998CSV_PP instance and use it to parse the first line, map those to lowercase,
3999and use that to set the instance L</column_names>:
4000
4001 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4002 open my $fh, "<", "file.csv";
4003 binmode $fh; # for Windows
4004 $csv->header ($fh);
4005 while (my $row = $csv->getline_hr ($fh)) {
4006     ...
4007     }
4008
4009If the header is empty,  contains more than one unique separator out of the
4010allowed set,  contains empty fields,   or contains identical fields  (after
4011folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
4012
4013If the header contains embedded newlines or is not valid  CSV  in any other
4014way, this method will croak and leave the parse error untouched.
4015
4016A successful call to C<header>  will always set the  L<C<sep>|/sep>  of the
4017C<$csv> object. This behavior can not be disabled.
4018
4019=head3 return value
4020
4021On error this method will croak.
4022
4023In list context,  the headers will be returned whether they are used to set
4024L</column_names> or not.
4025
4026In scalar context, the instance itself is returned.  B<Note>: the values as
4027found in the header will effectively be  B<lost> if  C<set_column_names> is
4028false.
4029
4030=head3 Options
4031
4032=over 2
4033
4034=item sep_set
4035
4036 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
4037
4038The list of legal separators defaults to C<[ ";", "," ]> and can be changed
4039by this option.  As this is probably the most often used option,  it can be
4040passed on its own as an unnamed argument:
4041
4042 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
4043
4044Multi-byte  sequences are allowed,  both multi-character and  Unicode.  See
4045L<C<sep>|/sep>.
4046
4047=item detect_bom
4048
4049 $csv->header ($fh, { detect_bom => 1 });
4050
4051The default behavior is to detect if the header line starts with a BOM.  If
4052the header has a BOM, use that to set the encoding of C<$fh>.  This default
4053behavior can be disabled by passing a false value to C<detect_bom>.
4054
4055Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE,  and
4056UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1,  and GB-18030
4057but L<Encode> does not (yet). UTF-7 is not supported.
4058
4059If a supported BOM was detected as start of the stream, it is stored in the
4060abject attribute C<ENCODING>.
4061
4062 my $enc = $csv->{ENCODING};
4063
4064The encoding is used with C<binmode> on C<$fh>.
4065
4066If the handle was opened in a (correct) encoding,  this method will  B<not>
4067alter the encoding, as it checks the leading B<bytes> of the first line. In
4068case the stream starts with a decode BOM (C<U+FEFF>), C<{ENCODING}> will be
4069C<""> (empty) instead of the default C<undef>.
4070
4071=item munge_column_names
4072
4073This option offers the means to modify the column names into something that
4074is most useful to the application.   The default is to map all column names
4075to lower case.
4076
4077 $csv->header ($fh, { munge_column_names => "lc" });
4078
4079The following values are available:
4080
4081  lc     - lower case
4082  uc     - upper case
4083  none   - do not change
4084  \%hash - supply a mapping
4085  \&cb   - supply a callback
4086
4087Literal:
4088
4089 $csv->header ($fh, { munge_column_names => "none" });
4090
4091Hash:
4092
4093 $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
4094
4095if a value does not exist, the original value is used unchanged
4096
4097Callback:
4098
4099 $csv->header ($fh, { munge_column_names => sub { fc } });
4100 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
4101 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
4102
4103As this callback is called in a C<map>, you can use C<$_> directly.
4104
4105=item set_column_names
4106
4107 $csv->header ($fh, { set_column_names => 1 });
4108
4109The default is to set the instances column names using  L</column_names> if
4110the method is successful,  so subsequent calls to L</getline_hr> can return
4111a hash. Disable setting the header can be forced by using a false value for
4112this option.
4113
4114As described in L</return value> above, content is lost in scalar context.
4115
4116=back
4117
4118=head3 Validation
4119
4120When receiving CSV files from external sources,  this method can be used to
4121protect against changes in the layout by restricting to known headers  (and
4122typos in the header fields).
4123
4124 my %known = (
4125     "record key" => "c_rec",
4126     "rec id"     => "c_rec",
4127     "id_rec"     => "c_rec",
4128     "kode"       => "code",
4129     "code"       => "code",
4130     "vaule"      => "value",
4131     "value"      => "value",
4132     );
4133 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4134 open my $fh, "<", $source or die "$source: $!";
4135 $csv->header ($fh, { munge_column_names => sub {
4136     s/\s+$//;
4137     s/^\s+//;
4138     $known{lc $_} or die "Unknown column '$_' in $source";
4139     }});
4140 while (my $row = $csv->getline_hr ($fh)) {
4141     say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
4142     }
4143
4144=head2 bind_columns
4145
4146Takes a list of scalar references to be used for output with  L</print>  or
4147to store in the fields fetched by L</getline>.  When you do not pass enough
4148references to store the fetched fields in, L</getline> will fail with error
4149C<3006>.  If you pass more than there are fields to return,  the content of
4150the remaining references is left untouched.
4151
4152 $csv->bind_columns (\$code, \$name, \$price, \$description);
4153 while ($csv->getline ($fh)) {
4154     print "The price of a $name is \x{20ac} $price\n";
4155     }
4156
4157To reset or clear all column binding, call L</bind_columns> with the single
4158argument C<undef>. This will also clear column names.
4159
4160 $csv->bind_columns (undef);
4161
4162If no arguments are passed at all, L</bind_columns> will return the list of
4163current bindings or C<undef> if no binds are active.
4164
4165Note that in parsing with  C<bind_columns>,  the fields are set on the fly.
4166That implies that if the third field of a row causes an error  (or this row
4167has just two fields where the previous row had more),  the first two fields
4168already have been assigned the values of the current row, while the rest of
4169the fields will still hold the values of the previous row.  If you want the
4170parser to fail in these cases, use the L<C<strict>|/strict> attribute.
4171
4172=head2 eof
4173
4174 $eof = $csv->eof ();
4175
4176If L</parse> or  L</getline>  was used with an IO stream,  this method will
4177return true (1) if the last call hit end of file,  otherwise it will return
4178false ('').  This is useful to see the difference between a failure and end
4179of file.
4180
4181Note that if the parsing of the last line caused an error,  C<eof> is still
4182true.  That means that if you are I<not> using L</auto_diag>, an idiom like
4183
4184 while (my $row = $csv->getline ($fh)) {
4185     # ...
4186     }
4187 $csv->eof or $csv->error_diag;
4188
4189will I<not> report the error. You would have to change that to
4190
4191 while (my $row = $csv->getline ($fh)) {
4192     # ...
4193     }
4194 +$csv->error_diag and $csv->error_diag;
4195
4196=head2 types
4197
4198 $csv->types (\@tref);
4199
4200This method is used to force that  (all)  columns are of a given type.  For
4201example, if you have an integer column,  two  columns  with  doubles  and a
4202string column, then you might do a
4203
4204 $csv->types ([Text::CSV_PP::IV (),
4205               Text::CSV_PP::NV (),
4206               Text::CSV_PP::NV (),
4207               Text::CSV_PP::PV ()]);
4208
4209Column types are used only for I<decoding> columns while parsing,  in other
4210words by the L</parse> and L</getline> methods.
4211
4212You can unset column types by doing a
4213
4214 $csv->types (undef);
4215
4216or fetch the current type settings with
4217
4218 $types = $csv->types ();
4219
4220=over 4
4221
4222=item IV
4223
4224Set field type to integer.
4225
4226=item NV
4227
4228Set field type to numeric/float.
4229
4230=item PV
4231
4232Set field type to string.
4233
4234=back
4235
4236=head2 fields
4237
4238 @columns = $csv->fields ();
4239
4240This method returns the input to   L</combine>  or the resultant decomposed
4241fields of a successful L</parse>, whichever was called more recently.
4242
4243Note that the return value is undefined after using L</getline>, which does
4244not fill the data structures returned by L</parse>.
4245
4246=head2 meta_info
4247
4248 @flags = $csv->meta_info ();
4249
4250This method returns the "flags" of the input to L</combine> or the flags of
4251the resultant  decomposed fields of  L</parse>,   whichever was called more
4252recently.
4253
4254For each field,  a meta_info field will hold  flags that  inform  something
4255about  the  field  returned  by  the  L</fields>  method or  passed to  the
4256L</combine> method. The flags are bit-wise-C<or>'d like:
4257
4258=over 2
4259
4260=item C< >0x0001
4261
4262The field was quoted.
4263
4264=item C< >0x0002
4265
4266The field was binary.
4267
4268=back
4269
4270See the C<is_***> methods below.
4271
4272=head2 is_quoted
4273
4274 my $quoted = $csv->is_quoted ($column_idx);
4275
4276Where  C<$column_idx> is the  (zero-based)  index of the column in the last
4277result of L</parse>.
4278
4279This returns a true value  if the data in the indicated column was enclosed
4280in L<C<quote_char>|/quote_char> quotes.  This might be important for fields
4281where content C<,20070108,> is to be treated as a numeric value,  and where
4282C<,"20070108",> is explicitly marked as character string data.
4283
4284This method is only valid when L</keep_meta_info> is set to a true value.
4285
4286=head2 is_binary
4287
4288 my $binary = $csv->is_binary ($column_idx);
4289
4290Where  C<$column_idx> is the  (zero-based)  index of the column in the last
4291result of L</parse>.
4292
4293This returns a true value if the data in the indicated column contained any
4294byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
4295
4296This method is only valid when L</keep_meta_info> is set to a true value.
4297
4298=head2 is_missing
4299
4300 my $missing = $csv->is_missing ($column_idx);
4301
4302Where  C<$column_idx> is the  (zero-based)  index of the column in the last
4303result of L</getline_hr>.
4304
4305 $csv->keep_meta_info (1);
4306 while (my $hr = $csv->getline_hr ($fh)) {
4307     $csv->is_missing (0) and next; # This was an empty line
4308     }
4309
4310When using  L</getline_hr>,  it is impossible to tell if the  parsed fields
4311are C<undef> because they where not filled in the C<CSV> stream  or because
4312they were not read at all, as B<all> the fields defined by L</column_names>
4313are set in the hash-ref.    If you still need to know if all fields in each
4314row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
4315you can check the flags.
4316
4317If  L<C<keep_meta_info>|/keep_meta_info>  is C<false>,  C<is_missing>  will
4318always return C<undef>, regardless of C<$column_idx> being valid or not. If
4319this attribute is C<true> it will return either C<0> (the field is present)
4320or C<1> (the field is missing).
4321
4322A special case is the empty line.  If the line is completely empty -  after
4323dealing with the flags - this is still a valid CSV line:  it is a record of
4324just one single empty field. However, if C<keep_meta_info> is set, invoking
4325C<is_missing> with index C<0> will now return true.
4326
4327=head2 status
4328
4329 $status = $csv->status ();
4330
4331This method returns the status of the last invoked L</combine> or L</parse>
4332call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
4333
4334=head2 error_input
4335
4336 $bad_argument = $csv->error_input ();
4337
4338This method returns the erroneous argument (if it exists) of L</combine> or
4339L</parse>,  whichever was called more recently.  If the last invocation was
4340successful, C<error_input> will return C<undef>.
4341
4342=head2 error_diag
4343
4344 Text::CSV_PP->error_diag ();
4345 $csv->error_diag ();
4346 $error_code               = 0  + $csv->error_diag ();
4347 $error_str                = "" . $csv->error_diag ();
4348 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
4349
4350If (and only if) an error occurred,  this function returns  the diagnostics
4351of that error.
4352
4353If called in void context,  this will print the internal error code and the
4354associated error message to STDERR.
4355
4356If called in list context,  this will return  the error code  and the error
4357message in that order.  If the last error was from parsing, the rest of the
4358values returned are a best guess at the location  within the line  that was
4359being parsed. Their values are 1-based.  The position currently is index of
4360the byte at which the parsing failed in the current record. It might change
4361to be the index of the current character in a later release. The records is
4362the index of the record parsed by the csv instance. The field number is the
4363index of the field the parser thinks it is currently  trying to  parse. See
4364F<examples/csv-check> for how this can be used.
4365
4366If called in  scalar context,  it will return  the diagnostics  in a single
4367scalar, a-la C<$!>.  It will contain the error code in numeric context, and
4368the diagnostics message in string context.
4369
4370When called as a class method or a  direct function call,  the  diagnostics
4371are that of the last L</new> call.
4372
4373=head2 record_number
4374
4375 $recno = $csv->record_number ();
4376
4377Returns the records parsed by this csv instance.  This value should be more
4378accurate than C<$.> when embedded newlines come in play. Records written by
4379this instance are not counted.
4380
4381=head2 SetDiag
4382
4383 $csv->SetDiag (0);
4384
4385Use to reset the diagnostics if you are dealing with errors.
4386
4387=head1 FUNCTIONS
4388
4389This section is also taken from Text::CSV_XS.
4390
4391=head2 csv
4392
4393This function is not exported by default and should be explicitly requested:
4394
4395 use Text::CSV_PP qw( csv );
4396
4397This is an high-level function that aims at simple (user) interfaces.  This
4398can be used to read/parse a C<CSV> file or stream (the default behavior) or
4399to produce a file or write to a stream (define the  C<out>  attribute).  It
4400returns an array- or hash-reference on parsing (or C<undef> on fail) or the
4401numeric value of  L</error_diag>  on writing.  When this function fails you
4402can get to the error using the class call to L</error_diag>
4403
4404 my $aoa = csv (in => "test.csv") or
4405     die Text::CSV_PP->error_diag;
4406
4407This function takes the arguments as key-value pairs. This can be passed as
4408a list or as an anonymous hash:
4409
4410 my $aoa = csv (  in => "test.csv", sep_char => ";");
4411 my $aoh = csv ({ in => $fh, headers => "auto" });
4412
4413The arguments passed consist of two parts:  the arguments to L</csv> itself
4414and the optional attributes to the  C<CSV>  object used inside the function
4415as enumerated and explained in L</new>.
4416
4417If not overridden, the default option used for CSV is
4418
4419 auto_diag   => 1
4420 escape_null => 0
4421
4422The option that is always set and cannot be altered is
4423
4424 binary      => 1
4425
4426As this function will likely be used in one-liners,  it allows  C<quote> to
4427be abbreviated as C<quo>,  and  C<escape_char> to be abbreviated as  C<esc>
4428or C<escape>.
4429
4430Alternative invocations:
4431
4432 my $aoa = Text::CSV_PP::csv (in => "file.csv");
4433
4434 my $csv = Text::CSV_PP->new ();
4435 my $aoa = $csv->csv (in => "file.csv");
4436
4437In the latter case, the object attributes are used from the existing object
4438and the attribute arguments in the function call are ignored:
4439
4440 my $csv = Text::CSV_PP->new ({ sep_char => ";" });
4441 my $aoh = $csv->csv (in => "file.csv", bom => 1);
4442
4443will parse using C<;> as C<sep_char>, not C<,>.
4444
4445=head3 in
4446
4447Used to specify the source.  C<in> can be a file name (e.g. C<"file.csv">),
4448which will be  opened for reading  and closed when finished,  a file handle
4449(e.g.  C<$fh> or C<FH>),  a reference to a glob (e.g. C<\*ARGV>),  the glob
4450itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
4451
4452When used with L</out>, C<in> should be a reference to a CSV structure (AoA
4453or AoH)  or a CODE-ref that returns an array-reference or a hash-reference.
4454The code-ref will be invoked with no arguments.
4455
4456 my $aoa = csv (in => "file.csv");
4457
4458 open my $fh, "<", "file.csv";
4459 my $aoa = csv (in => $fh);
4460
4461 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
4462 my $err = csv (in => $csv, out => "file.csv");
4463
4464If called in void context without the L</out> attribute, the resulting ref
4465will be used as input to a subsequent call to csv:
4466
4467 csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
4468
4469will be a shortcut to
4470
4471 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
4472
4473where, in the absence of the C<out> attribute, this is a shortcut to
4474
4475 csv (in  => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
4476      out => *STDOUT)
4477
4478=head3 out
4479
4480 csv (in => $aoa, out => "file.csv");
4481 csv (in => $aoa, out => $fh);
4482 csv (in => $aoa, out =>   STDOUT);
4483 csv (in => $aoa, out =>  *STDOUT);
4484 csv (in => $aoa, out => \*STDOUT);
4485 csv (in => $aoa, out => \my $data);
4486 csv (in => $aoa, out =>  undef);
4487 csv (in => $aoa, out => \"skip");
4488
4489In output mode, the default CSV options when producing CSV are
4490
4491 eol       => "\r\n"
4492
4493The L</fragment> attribute is ignored in output mode.
4494
4495C<out> can be a file name  (e.g.  C<"file.csv">),  which will be opened for
4496writing and closed when finished,  a file handle (e.g. C<$fh> or C<FH>),  a
4497reference to a glob (e.g. C<\*STDOUT>),  the glob itself (e.g. C<*STDOUT>),
4498or a reference to a scalar (e.g. C<\my $data>).
4499
4500 csv (in => sub { $sth->fetch },            out => "dump.csv");
4501 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
4502      headers => $sth->{NAME_lc});
4503
4504When a code-ref is used for C<in>, the output is generated  per invocation,
4505so no buffering is involved. This implies that there is no size restriction
4506on the number of records. The C<csv> function ends when the coderef returns
4507a false value.
4508
4509If C<out> is set to a reference of the literal string C<"skip">, the output
4510will be suppressed completely,  which might be useful in combination with a
4511filter for side effects only.
4512
4513 my %cache;
4514 csv (in    => "dump.csv",
4515      out   => \"skip",
4516      on_in => sub { $cache{$_[1][1]}++ });
4517
4518Currently,  setting C<out> to any false value  (C<undef>, C<"">, 0) will be
4519equivalent to C<\"skip">.
4520
4521=head3 encoding
4522
4523If passed,  it should be an encoding accepted by the  C<:encoding()> option
4524to C<open>. There is no default value. This attribute does not work in perl
45255.6.x.  C<encoding> can be abbreviated to C<enc> for ease of use in command
4526line invocations.
4527
4528If C<encoding> is set to the literal value C<"auto">, the method L</header>
4529will be invoked on the opened stream to check if there is a BOM and set the
4530encoding accordingly.   This is equal to passing a true value in the option
4531L<C<detect_bom>|/detect_bom>.
4532
4533=head3 detect_bom
4534
4535If  C<detect_bom>  is given, the method  L</header>  will be invoked on the
4536opened stream to check if there is a BOM and set the encoding accordingly.
4537
4538C<detect_bom> can be abbreviated to C<bom>.
4539
4540This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
4541
4542Note that as the method  L</header> is invoked,  its default is to also set
4543the headers.
4544
4545=head3 headers
4546
4547If this attribute is not given, the default behavior is to produce an array
4548of arrays.
4549
4550If C<headers> is supplied,  it should be an anonymous list of column names,
4551an anonymous hashref, a coderef, or a literal flag:  C<auto>, C<lc>, C<uc>,
4552or C<skip>.
4553
4554=over 2
4555
4556=item skip
4557
4558When C<skip> is used, the header will not be included in the output.
4559
4560 my $aoa = csv (in => $fh, headers => "skip");
4561
4562=item auto
4563
4564If C<auto> is used, the first line of the C<CSV> source will be read as the
4565list of field headers and used to produce an array of hashes.
4566
4567 my $aoh = csv (in => $fh, headers => "auto");
4568
4569=item lc
4570
4571If C<lc> is used,  the first line of the  C<CSV> source will be read as the
4572list of field headers mapped to  lower case and used to produce an array of
4573hashes. This is a variation of C<auto>.
4574
4575 my $aoh = csv (in => $fh, headers => "lc");
4576
4577=item uc
4578
4579If C<uc> is used,  the first line of the  C<CSV> source will be read as the
4580list of field headers mapped to  upper case and used to produce an array of
4581hashes. This is a variation of C<auto>.
4582
4583 my $aoh = csv (in => $fh, headers => "uc");
4584
4585=item CODE
4586
4587If a coderef is used,  the first line of the  C<CSV> source will be read as
4588the list of mangled field headers in which each field is passed as the only
4589argument to the coderef. This list is used to produce an array of hashes.
4590
4591 my $aoh = csv (in      => $fh,
4592                headers => sub { lc ($_[0]) =~ s/kode/code/gr });
4593
4594this example is a variation of using C<lc> where all occurrences of C<kode>
4595are replaced with C<code>.
4596
4597=item ARRAY
4598
4599If  C<headers>  is an anonymous list,  the entries in the list will be used
4600as field names. The first line is considered data instead of headers.
4601
4602 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
4603 csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
4604
4605=item HASH
4606
4607If C<headers> is an hash reference, this implies C<auto>, but header fields
4608for that exist as key in the hashref will be replaced by the value for that
4609key. Given a CSV file like
4610
4611 post-kode,city,name,id number,fubble
4612 1234AA,Duckstad,Donald,13,"X313DF"
4613
4614using
4615
4616 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
4617
4618will return an entry like
4619
4620 { pc     => "1234AA",
4621   city   => "Duckstad",
4622   name   => "Donald",
4623   ID     => "13",
4624   fubble => "X313DF",
4625   }
4626
4627=back
4628
4629See also L<C<munge_column_names>|/munge_column_names> and
4630L<C<set_column_names>|/set_column_names>.
4631
4632=head3 munge_column_names
4633
4634If C<munge_column_names> is set,  the method  L</header>  is invoked on the
4635opened stream with all matching arguments to detect and set the headers.
4636
4637C<munge_column_names> can be abbreviated to C<munge>.
4638
4639=head3 key
4640
4641If passed,  will default  L<C<headers>|/headers>  to C<"auto"> and return a
4642hashref instead of an array of hashes. Allowed values are simple scalars or
4643array-references where the first element is the joiner and the rest are the
4644fields to join to combine the key.
4645
4646 my $ref = csv (in => "test.csv", key => "code");
4647 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
4648
4649with test.csv like
4650
4651 code,product,price,color
4652 1,pc,850,gray
4653 2,keyboard,12,white
4654 3,mouse,5,black
4655
4656the first example will return
4657
4658  { 1   => {
4659        code    => 1,
4660        color   => 'gray',
4661        price   => 850,
4662        product => 'pc'
4663        },
4664    2   => {
4665        code    => 2,
4666        color   => 'white',
4667        price   => 12,
4668        product => 'keyboard'
4669        },
4670    3   => {
4671        code    => 3,
4672        color   => 'black',
4673        price   => 5,
4674        product => 'mouse'
4675        }
4676    }
4677
4678the second example will return
4679
4680  { "1:gray"    => {
4681        code    => 1,
4682        color   => 'gray',
4683        price   => 850,
4684        product => 'pc'
4685        },
4686    "2:white"   => {
4687        code    => 2,
4688        color   => 'white',
4689        price   => 12,
4690        product => 'keyboard'
4691        },
4692    "3:black"   => {
4693        code    => 3,
4694        color   => 'black',
4695        price   => 5,
4696        product => 'mouse'
4697        }
4698    }
4699
4700The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV>
4701date that has no header line, like
4702
4703 my $ref = csv (
4704     in      => "foo.csv",
4705     headers => [qw( c_foo foo bar description stock )],
4706     key     =>     "c_foo",
4707     );
4708
4709=head3 value
4710
4711Used to create key-value hashes.
4712
4713Only allowed when C<key> is valid. A C<value> can be either a single column
4714label or an anonymous list of column labels.  In the first case,  the value
4715will be a simple scalar value, in the latter case, it will be a hashref.
4716
4717 my $ref = csv (in => "test.csv", key   => "code",
4718                                  value => "price");
4719 my $ref = csv (in => "test.csv", key   => "code",
4720                                  value => [ "product", "price" ]);
4721 my $ref = csv (in => "test.csv", key   => [ ":" => "code", "color" ],
4722                                  value => "price");
4723 my $ref = csv (in => "test.csv", key   => [ ":" => "code", "color" ],
4724                                  value => [ "product", "price" ]);
4725
4726with test.csv like
4727
4728 code,product,price,color
4729 1,pc,850,gray
4730 2,keyboard,12,white
4731 3,mouse,5,black
4732
4733the first example will return
4734
4735  { 1 => 850,
4736    2 =>  12,
4737    3 =>   5,
4738    }
4739
4740the second example will return
4741
4742  { 1   => {
4743        price   => 850,
4744        product => 'pc'
4745        },
4746    2   => {
4747        price   => 12,
4748        product => 'keyboard'
4749        },
4750    3   => {
4751        price   => 5,
4752        product => 'mouse'
4753        }
4754    }
4755
4756the third example will return
4757
4758  { "1:gray"    => 850,
4759    "2:white"   =>  12,
4760    "3:black"   =>   5,
4761    }
4762
4763the fourth example will return
4764
4765  { "1:gray"    => {
4766        price   => 850,
4767        product => 'pc'
4768        },
4769    "2:white"   => {
4770        price   => 12,
4771        product => 'keyboard'
4772        },
4773    "3:black"   => {
4774        price   => 5,
4775        product => 'mouse'
4776        }
4777    }
4778
4779=head3 keep_headers
4780
4781When using hashes,  keep the column names into the arrayref passed,  so all
4782headers are available after the call in the original order.
4783
4784 my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
4785
4786This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
4787
4788This attribute implies a default of C<auto> for the C<headers> attribute.
4789
4790=head3 fragment
4791
4792Only output the fragment as defined in the L</fragment> method. This option
4793is ignored when I<generating> C<CSV>. See L</out>.
4794
4795Combining all of them could give something like
4796
4797 use Text::CSV_PP qw( csv );
4798 my $aoh = csv (
4799     in       => "test.txt",
4800     encoding => "utf-8",
4801     headers  => "auto",
4802     sep_char => "|",
4803     fragment => "row=3;6-9;15-*",
4804     );
4805 say $aoh->[15]{Foo};
4806
4807=head3 sep_set
4808
4809If C<sep_set> is set, the method L</header> is invoked on the opened stream
4810to detect and set L<C<sep_char>|/sep_char> with the given set.
4811
4812C<sep_set> can be abbreviated to C<seps>.
4813
4814Note that as the  L</header> method is invoked,  its default is to also set
4815the headers.
4816
4817=head3 set_column_names
4818
4819If  C<set_column_names> is passed,  the method L</header> is invoked on the
4820opened stream with all arguments meant for L</header>.
4821
4822If C<set_column_names> is passed as a false value, the content of the first
4823row is only preserved if the output is AoA:
4824
4825With an input-file like
4826
4827 bAr,foo
4828 1,2
4829 3,4,5
4830
4831This call
4832
4833 my $aoa = csv (in => $file, set_column_names => 0);
4834
4835will result in
4836
4837 [[ "bar", "foo"     ],
4838  [ "1",   "2"       ],
4839  [ "3",   "4",  "5" ]]
4840
4841and
4842
4843 my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
4844
4845will result in
4846
4847 [[ "bAr", "foo"     ],
4848  [ "1",   "2"       ],
4849  [ "3",   "4",  "5" ]]
4850
4851=head2 Callbacks
4852
4853Callbacks enable actions triggered from the I<inside> of Text::CSV_PP.
4854
4855While most of what this enables  can easily be done in an  unrolled loop as
4856described in the L</SYNOPSIS> callbacks can be used to meet special demands
4857or enhance the L</csv> function.
4858
4859=over 2
4860
4861=item error
4862
4863 $csv->callbacks (error => sub { $csv->SetDiag (0) });
4864
4865the C<error>  callback is invoked when an error occurs,  but  I<only>  when
4866L</auto_diag> is set to a true value. A callback is invoked with the values
4867returned by L</error_diag>:
4868
4869 my ($c, $s);
4870
4871 sub ignore3006
4872 {
4873     my ($err, $msg, $pos, $recno, $fldno) = @_;
4874     if ($err == 3006) {
4875         # ignore this error
4876         ($c, $s) = (undef, undef);
4877         Text::CSV_PP->SetDiag (0);
4878         }
4879     # Any other error
4880     return;
4881     } # ignore3006
4882
4883 $csv->callbacks (error => \&ignore3006);
4884 $csv->bind_columns (\$c, \$s);
4885 while ($csv->getline ($fh)) {
4886     # Error 3006 will not stop the loop
4887     }
4888
4889=item after_parse
4890
4891 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4892 while (my $row = $csv->getline ($fh)) {
4893     $row->[-1] eq "NEW";
4894     }
4895
4896This callback is invoked after parsing with  L</getline>  only if no  error
4897occurred.  The callback is invoked with two arguments:   the current C<CSV>
4898parser object and an array reference to the fields parsed.
4899
4900The return code of the callback is ignored  unless it is a reference to the
4901string "skip", in which case the record will be skipped in L</getline_all>.
4902
4903 sub add_from_db
4904 {
4905     my ($csv, $row) = @_;
4906     $sth->execute ($row->[4]);
4907     push @$row, $sth->fetchrow_array;
4908     } # add_from_db
4909
4910 my $aoa = csv (in => "file.csv", callbacks => {
4911     after_parse => \&add_from_db });
4912
4913This hook can be used for validation:
4914
4915=over 2
4916
4917=item FAIL
4918
4919Die if any of the records does not validate a rule:
4920
4921 after_parse => sub {
4922     $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4923         die "5th field does not have a valid Dutch zipcode";
4924     }
4925
4926=item DEFAULT
4927
4928Replace invalid fields with a default value:
4929
4930 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4931
4932=item SKIP
4933
4934Skip records that have invalid fields (only applies to L</getline_all>):
4935
4936 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4937
4938=back
4939
4940=item before_print
4941
4942 my $idx = 1;
4943 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4944 $csv->print (*STDOUT, [ 0, $_ ]) for @members;
4945
4946This callback is invoked  before printing with  L</print>  only if no error
4947occurred.  The callback is invoked with two arguments:  the current  C<CSV>
4948parser object and an array reference to the fields passed.
4949
4950The return code of the callback is ignored.
4951
4952 sub max_4_fields
4953 {
4954     my ($csv, $row) = @_;
4955     @$row > 4 and splice @$row, 4;
4956     } # max_4_fields
4957
4958 csv (in => csv (in => "file.csv"), out => *STDOUT,
4959     callbacks => { before print => \&max_4_fields });
4960
4961This callback is not active for L</combine>.
4962
4963=back
4964
4965=head3 Callbacks for csv ()
4966
4967The L</csv> allows for some callbacks that do not integrate in XS internals
4968but only feature the L</csv> function.
4969
4970  csv (in        => "file.csv",
4971       callbacks => {
4972           filter       => { 6 => sub { $_ > 15 } },    # first
4973           after_parse  => sub { say "AFTER PARSE";  }, # first
4974           after_in     => sub { say "AFTER IN";     }, # second
4975           on_in        => sub { say "ON IN";        }, # third
4976           },
4977       );
4978
4979  csv (in        => $aoh,
4980       out       => "file.csv",
4981       callbacks => {
4982           on_in        => sub { say "ON IN";        }, # first
4983           before_out   => sub { say "BEFORE OUT";   }, # second
4984           before_print => sub { say "BEFORE PRINT"; }, # third
4985           },
4986       );
4987
4988=over 2
4989
4990=item filter
4991
4992This callback can be used to filter records.  It is called just after a new
4993record has been scanned.  The callback accepts a:
4994
4995=over 2
4996
4997=item hashref
4998
4999The keys are the index to the row (the field name or field number, 1-based)
5000and the values are subs to return a true or false value.
5001
5002 csv (in => "file.csv", filter => {
5003            3 => sub { m/a/ },       # third field should contain an "a"
5004            5 => sub { length > 4 }, # length of the 5th field minimal 5
5005            });
5006
5007 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
5008
5009If the keys to the filter hash contain any character that is not a digit it
5010will also implicitly set L</headers> to C<"auto">  unless  L</headers>  was
5011already passed as argument.  When headers are active, returning an array of
5012hashes, the filter is not applicable to the header itself.
5013
5014All sub results should match, as in AND.
5015
5016The context of the callback sets  C<$_> localized to the field indicated by
5017the filter. The two arguments are as with all other callbacks, so the other
5018fields in the current row can be seen:
5019
5020 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
5021
5022If the context is set to return a list of hashes  (L</headers> is defined),
5023the current record will also be available in the localized C<%_>:
5024
5025 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000  }}
5026
5027If the filter is used to I<alter> the content by changing C<$_>,  make sure
5028that the sub returns true in order not to have that record skipped:
5029
5030 filter => { 2 => sub { $_ = uc }}
5031
5032will upper-case the second field, and then skip it if the resulting content
5033evaluates to false. To always accept, end with truth:
5034
5035 filter => { 2 => sub { $_ = uc; 1 }}
5036
5037=item coderef
5038
5039 csv (in => "file.csv", filter => sub { $n++; 0; });
5040
5041If the argument to C<filter> is a coderef,  it is an alias or shortcut to a
5042filter on column 0:
5043
5044 csv (filter => sub { $n++; 0 });
5045
5046is equal to
5047
5048 csv (filter => { 0 => sub { $n++; 0 });
5049
5050=item filter-name
5051
5052 csv (in => "file.csv", filter => "not_blank");
5053 csv (in => "file.csv", filter => "not_empty");
5054 csv (in => "file.csv", filter => "filled");
5055
5056These are predefined filters
5057
5058Given a file like (line numbers prefixed for doc purpose only):
5059
5060 1:1,2,3
5061 2:
5062 3:,
5063 4:""
5064 5:,,
5065 6:, ,
5066 7:"",
5067 8:" "
5068 9:4,5,6
5069
5070=over 2
5071
5072=item not_blank
5073
5074Filter out the blank lines
5075
5076This filter is a shortcut for
5077
5078 filter => { 0 => sub { @{$_[1]} > 1 or
5079             defined $_[1][0] && $_[1][0] ne "" } }
5080
5081Due to the implementation,  it is currently impossible to also filter lines
5082that consists only of a quoted empty field. These lines are also considered
5083blank lines.
5084
5085With the given example, lines 2 and 4 will be skipped.
5086
5087=item not_empty
5088
5089Filter out lines where all the fields are empty.
5090
5091This filter is a shortcut for
5092
5093 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
5094
5095A space is not regarded being empty, so given the example data, lines 2, 3,
50964, 5, and 7 are skipped.
5097
5098=item filled
5099
5100Filter out lines that have no visible data
5101
5102This filter is a shortcut for
5103
5104 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
5105
5106This filter rejects all lines that I<not> have at least one field that does
5107not evaluate to the empty string.
5108
5109With the given example data, this filter would skip lines 2 through 8.
5110
5111=back
5112
5113=back
5114
5115=item after_in
5116
5117This callback is invoked for each record after all records have been parsed
5118but before returning the reference to the caller.  The hook is invoked with
5119two arguments:  the current  C<CSV>  parser object  and a  reference to the
5120record.   The reference can be a reference to a  HASH  or a reference to an
5121ARRAY as determined by the arguments.
5122
5123This callback can also be passed as  an attribute without the  C<callbacks>
5124wrapper.
5125
5126=item before_out
5127
5128This callback is invoked for each record before the record is printed.  The
5129hook is invoked with two arguments:  the current C<CSV> parser object and a
5130reference to the record.   The reference can be a reference to a  HASH or a
5131reference to an ARRAY as determined by the arguments.
5132
5133This callback can also be passed as an attribute  without the  C<callbacks>
5134wrapper.
5135
5136This callback makes the row available in C<%_> if the row is a hashref.  In
5137this case C<%_> is writable and will change the original row.
5138
5139=item on_in
5140
5141This callback acts exactly as the L</after_in> or the L</before_out> hooks.
5142
5143This callback can also be passed as an attribute  without the  C<callbacks>
5144wrapper.
5145
5146This callback makes the row available in C<%_> if the row is a hashref.  In
5147this case C<%_> is writable and will change the original row. So e.g. with
5148
5149  my $aoh = csv (
5150      in      => \"foo\n1\n2\n",
5151      headers => "auto",
5152      on_in   => sub { $_{bar} = 2; },
5153      );
5154
5155C<$aoh> will be:
5156
5157  [ { foo => 1,
5158      bar => 2,
5159      }
5160    { foo => 2,
5161      bar => 2,
5162      }
5163    ]
5164
5165=item csv
5166
5167The I<function>  L</csv> can also be called as a method or with an existing
5168Text::CSV_PP object. This could help if the function is to be invoked a lot
5169of times and the overhead of creating the object internally over  and  over
5170again would be prevented by passing an existing instance.
5171
5172 my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
5173
5174 my $aoa = $csv->csv (in => $fh);
5175 my $aoa = csv (in => $fh, csv => $csv);
5176
5177both act the same. Running this 20000 times on a 20 lines CSV file,  showed
5178a 53% speedup.
5179
5180=back
5181
5182=head1 DIAGNOSTICS
5183
5184This section is also taken from Text::CSV_XS.
5185
5186Still under construction ...
5187
5188If an error occurs,  C<< $csv->error_diag >> can be used to get information
5189on the cause of the failure. Note that for speed reasons the internal value
5190is never cleared on success,  so using the value returned by L</error_diag>
5191in normal cases - when no error occurred - may cause unexpected results.
5192
5193If the constructor failed, the cause can be found using L</error_diag> as a
5194class method, like C<< Text::CSV_PP->error_diag >>.
5195
5196The C<< $csv->error_diag >> method is automatically invoked upon error when
5197the contractor was called with  L<C<auto_diag>|/auto_diag>  set to  C<1> or
5198C<2>, or when L<autodie> is in effect.  When set to C<1>, this will cause a
5199C<warn> with the error message,  when set to C<2>, it will C<die>. C<2012 -
5200EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
5201
5202Errors can be (individually) caught using the L</error> callback.
5203
5204The errors as described below are available. I have tried to make the error
5205itself explanatory enough, but more descriptions will be added. For most of
5206these errors, the first three capitals describe the error category:
5207
5208=over 2
5209
5210=item *
5211INI
5212
5213Initialization error or option conflict.
5214
5215=item *
5216ECR
5217
5218Carriage-Return related parse error.
5219
5220=item *
5221EOF
5222
5223End-Of-File related parse error.
5224
5225=item *
5226EIQ
5227
5228Parse error inside quotation.
5229
5230=item *
5231EIF
5232
5233Parse error inside field.
5234
5235=item *
5236ECB
5237
5238Combine error.
5239
5240=item *
5241EHR
5242
5243HashRef parse related error.
5244
5245=back
5246
5247And below should be the complete list of error codes that can be returned:
5248
5249=over 2
5250
5251=item *
52521001 "INI - sep_char is equal to quote_char or escape_char"
5253
5254The  L<separation character|/sep_char>  cannot be equal to  L<the quotation
5255character|/quote_char> or to L<the escape character|/escape_char>,  as this
5256would invalidate all parsing rules.
5257
5258=item *
52591002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
5260
5261Using the  L<C<allow_whitespace>|/allow_whitespace>  attribute  when either
5262L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char>  is equal to
5263C<SPACE> or C<TAB> is too ambiguous to allow.
5264
5265=item *
52661003 "INI - \r or \n in main attr not allowed"
5267
5268Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
5269L<C<quote_char>|/quote_char>,   or  L<C<escape_char>|/escape_char>  is  not
5270allowed.
5271
5272=item *
52731004 "INI - callbacks should be undef or a hashref"
5274
5275The L<C<callbacks>|/Callbacks>  attribute only allows one to be C<undef> or
5276a hash reference.
5277
5278=item *
52791005 "INI - EOL too long"
5280
5281The value passed for EOL is exceeding its maximum length (16).
5282
5283=item *
52841006 "INI - SEP too long"
5285
5286The value passed for SEP is exceeding its maximum length (16).
5287
5288=item *
52891007 "INI - QUOTE too long"
5290
5291The value passed for QUOTE is exceeding its maximum length (16).
5292
5293=item *
52941008 "INI - SEP undefined"
5295
5296The value passed for SEP should be defined and not empty.
5297
5298=item *
52991010 "INI - the header is empty"
5300
5301The header line parsed in the L</header> is empty.
5302
5303=item *
53041011 "INI - the header contains more than one valid separator"
5305
5306The header line parsed in the  L</header>  contains more than one  (unique)
5307separator character out of the allowed set of separators.
5308
5309=item *
53101012 "INI - the header contains an empty field"
5311
5312The header line parsed in the L</header> is contains an empty field.
5313
5314=item *
53151013 "INI - the header contains nun-unique fields"
5316
5317The header line parsed in the  L</header>  contains at least  two identical
5318fields.
5319
5320=item *
53211014 "INI - header called on undefined stream"
5322
5323The header line cannot be parsed from an undefined sources.
5324
5325=item *
53261500 "PRM - Invalid/unsupported argument(s)"
5327
5328Function or method called with invalid argument(s) or parameter(s).
5329
5330=item *
53311501 "PRM - The key attribute is passed as an unsupported type"
5332
5333The C<key> attribute is of an unsupported type.
5334
5335=item *
53361502 "PRM - The value attribute is passed without the key attribute"
5337
5338The C<value> attribute is only allowed when a valid key is given.
5339
5340=item *
53411503 "PRM - The value attribute is passed as an unsupported type"
5342
5343The C<value> attribute is of an unsupported type.
5344
5345=item *
53462010 "ECR - QUO char inside quotes followed by CR not part of EOL"
5347
5348When  L<C<eol>|/eol>  has  been  set  to  anything  but the  default,  like
5349C<"\r\t\n">,  and  the  C<"\r">  is  following  the   B<second>   (closing)
5350L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
5351not make up the L<C<eol>|/eol> sequence, this is an error.
5352
5353=item *
53542011 "ECR - Characters after end of quoted field"
5355
5356Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
5357field and after the closing double-quote, there should be either a new-line
5358sequence or a separation character.
5359
5360=item *
53612012 "EOF - End of data in parsing input stream"
5362
5363Self-explaining. End-of-file while inside parsing a stream. Can happen only
5364when reading from streams with L</getline>,  as using  L</parse> is done on
5365strings that are not required to have a trailing L<C<eol>|/eol>.
5366
5367=item *
53682013 "INI - Specification error for fragments RFC7111"
5369
5370Invalid specification for URI L</fragment> specification.
5371
5372=item *
53732014 "ENF - Inconsistent number of fields"
5374
5375Inconsistent number of fields under strict parsing.
5376
5377=item *
53782021 "EIQ - NL char inside quotes, binary off"
5379
5380Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
5381has been selected with the constructor.
5382
5383=item *
53842022 "EIQ - CR char inside quotes, binary off"
5385
5386Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
5387has been selected with the constructor.
5388
5389=item *
53902023 "EIQ - QUO character not allowed"
5391
5392Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
5393will cause this error.
5394
5395=item *
53962024 "EIQ - EOF cannot be escaped, not even inside quotes"
5397
5398The escape character is not allowed as last character in an input stream.
5399
5400=item *
54012025 "EIQ - Loose unescaped escape"
5402
5403An escape character should escape only characters that need escaping.
5404
5405Allowing  the escape  for other characters  is possible  with the attribute
5406L</allow_loose_escape>.
5407
5408=item *
54092026 "EIQ - Binary character inside quoted field, binary off"
5410
5411Binary characters are not allowed by default.    Exceptions are fields that
5412contain valid UTF-8,  that will automatically be upgraded if the content is
5413valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
5414
5415=item *
54162027 "EIQ - Quoted field not terminated"
5417
5418When parsing a field that started with a quotation character,  the field is
5419expected to be closed with a quotation character.   When the parsed line is
5420exhausted before the quote is found, that field is not terminated.
5421
5422=item *
54232030 "EIF - NL char inside unquoted verbatim, binary off"
5424
5425=item *
54262031 "EIF - CR char is first char of field, not part of EOL"
5427
5428=item *
54292032 "EIF - CR char inside unquoted, not part of EOL"
5430
5431=item *
54322034 "EIF - Loose unescaped quote"
5433
5434=item *
54352035 "EIF - Escaped EOF in unquoted field"
5436
5437=item *
54382036 "EIF - ESC error"
5439
5440=item *
54412037 "EIF - Binary character in unquoted field, binary off"
5442
5443=item *
54442110 "ECB - Binary character in Combine, binary off"
5445
5446=item *
54472200 "EIO - print to IO failed. See errno"
5448
5449=item *
54503001 "EHR - Unsupported syntax for column_names ()"
5451
5452=item *
54533002 "EHR - getline_hr () called before column_names ()"
5454
5455=item *
54563003 "EHR - bind_columns () and column_names () fields count mismatch"
5457
5458=item *
54593004 "EHR - bind_columns () only accepts refs to scalars"
5460
5461=item *
54623006 "EHR - bind_columns () did not pass enough refs for parsed fields"
5463
5464=item *
54653007 "EHR - bind_columns needs refs to writable scalars"
5466
5467=item *
54683008 "EHR - unexpected error in bound fields"
5469
5470=item *
54713009 "EHR - print_hr () called before column_names ()"
5472
5473=item *
54743010 "EHR - print_hr () called with invalid arguments"
5475
5476=back
5477
5478=head1 SEE ALSO
5479
5480L<Text::CSV_XS>, L<Text::CSV>
5481
5482Older versions took many regexp from L<http://www.din.or.jp/~ohzaki/perl.htm>
5483
5484=head1 AUTHOR
5485
5486Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5487Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5488
5489Text::CSV_XS was written by E<lt>joe[at]ispsoft.deE<gt>
5490and maintained by E<lt>h.m.brand[at]xs4all.nlE<gt>.
5491
5492Text::CSV was written by E<lt>alan[at]mfgrtl.comE<gt>.
5493
5494=head1 COPYRIGHT AND LICENSE
5495
5496Copyright 2017- by Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5497Copyright 2005-2015 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5498
5499Most of the code and doc is directly taken from the pure perl part of
5500Text::CSV_XS.
5501
5502Copyright (C) 2007-2016 H.Merijn Brand.  All rights reserved.
5503Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
5504Copyright (C) 1997      Alan Citterman.  All rights reserved.
5505
5506This library is free software; you can redistribute it and/or modify
5507it under the same terms as Perl itself.
5508
5509=cut
Note: See TracBrowser for help on using the browser.