Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

CSV_PP.pm@ 33235

Last change on this file since 33235 was 33235, checked in by davidb, 5 years ago
CPAN module for processing CSV files
File size: 168.2 KB

Line
1	package Text::CSV_PP;
2
3	################################################################################
4	#
5	# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
6	#
7	################################################################################
8	require 5.006001;
9
10	use strict;
11	use Exporter ();
12	use vars qw($VERSION @ISA @EXPORT_OK);
13	use Carp;
14
15	$VERSION = '1.99';
16	@ISA = qw(Exporter);
17	@EXPORT_OK = qw(csv);
18
19	sub PV { 0 }
20	sub IV { 1 }
21	sub NV { 2 }
22
23	sub IS_QUOTED () { 0x0001; }
24	sub IS_BINARY () { 0x0002; }
25	sub IS_ERROR () { 0x0004; }
26	sub IS_MISSING () { 0x0010; }
27
28	sub HOOK_ERROR () { 0x0001; }
29	sub HOOK_AFTER_PARSE () { 0x0002; }
30	sub HOOK_BEFORE_PRINT () { 0x0004; }
31
32	sub useIO_EOF () { 0x0010; }
33
34	my $ERRORS = {
35	# Generic errors
36	1000 => "INI - constructor failed",
37	1001 => "INI - sep_char is equal to quote_char or escape_char",
38	1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
39	1003 => "INI - \\r or \\n in main attr not allowed",
40	1004 => "INI - callbacks should be undef or a hashref",
41	1005 => "INI - EOL too long",
42	1006 => "INI - SEP too long",
43	1007 => "INI - QUOTE too long",
44	1008 => "INI - SEP undefined",
45
46	1010 => "INI - the header is empty",
47	1011 => "INI - the header contains more than one valid separator",
48	1012 => "INI - the header contains an empty field",
49	1013 => "INI - the header contains nun-unique fields",
50	1014 => "INI - header called on undefined stream",
51
52	# Syntax errors
53	1500 => "PRM - Invalid/unsupported arguments(s)",
54	1501 => "PRM - The key attribute is passed as an unsupported type",
55	1502 => "PRM - The value attribute is passed without the key attribute",
56	1503 => "PRM - The value attribute is passed as an unsupported type",
57
58	# Parse errors
59	2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
60	2011 => "ECR - Characters after end of quoted field",
61	2012 => "EOF - End of data in parsing input stream",
62	2013 => "ESP - Specification error for fragments RFC7111",
63	2014 => "ENF - Inconsistent number of fields",
64
65	# EIQ - Error Inside Quotes
66	2021 => "EIQ - NL char inside quotes, binary off",
67	2022 => "EIQ - CR char inside quotes, binary off",
68	2023 => "EIQ - QUO character not allowed",
69	2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
70	2025 => "EIQ - Loose unescaped escape",
71	2026 => "EIQ - Binary character inside quoted field, binary off",
72	2027 => "EIQ - Quoted field not terminated",
73
74	# EIF - Error Inside Field
75	2030 => "EIF - NL char inside unquoted verbatim, binary off",
76	2031 => "EIF - CR char is first char of field, not part of EOL",
77	2032 => "EIF - CR char inside unquoted, not part of EOL",
78	2034 => "EIF - Loose unescaped quote",
79	2035 => "EIF - Escaped EOF in unquoted field",
80	2036 => "EIF - ESC error",
81	2037 => "EIF - Binary character in unquoted field, binary off",
82
83	# Combine errors
84	2110 => "ECB - Binary character in Combine, binary off",
85
86	# IO errors
87	2200 => "EIO - print to IO failed. See errno",
88
89	# Hash-Ref errors
90	3001 => "EHR - Unsupported syntax for column_names ()",
91	3002 => "EHR - getline_hr () called before column_names ()",
92	3003 => "EHR - bind_columns () and column_names () fields count mismatch",
93	3004 => "EHR - bind_columns () only accepts refs to scalars",
94	3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
95	3007 => "EHR - bind_columns needs refs to writable scalars",
96	3008 => "EHR - unexpected error in bound fields",
97	3009 => "EHR - print_hr () called before column_names ()",
98	3010 => "EHR - print_hr () called with invalid arguments",
99
100	4001 => "PRM - The key does not exist as field in the data",
101
102	0 => "",
103	};
104
105	BEGIN {
106	if ( $] < 5.006 ) {
107	$INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
108	no strict 'refs';
109	*{"utf8::is_utf8"} = sub { 0; };
110	*{"utf8::decode"} = sub { };
111	}
112	elsif ( $] < 5.008 ) {
113	no strict 'refs';
114	*{"utf8::is_utf8"} = sub { 0; };
115	*{"utf8::decode"} = sub { };
116	*{"utf8::encode"} = sub { };
117	}
118	elsif ( !defined &utf8::is_utf8 ) {
119	require Encode;
120	utf8::is_utf8 = Encode::is_utf8;
121	}
122
123	eval q\| require Scalar::Util \|;
124	if ( $@ ) {
125	eval q\| require B \|;
126	if ( $@ ) {
127	Carp::croak $@;
128	}
129	else {
130	my %tmap = qw(
131	B::NULL SCALAR
132	B::HV HASH
133	B::AV ARRAY
134	B::CV CODE
135	B::IO IO
136	B::GV GLOB
137	B::REGEXP REGEXP
138	);
139	*Scalar::Util::reftype = sub (\$) {
140	my $r = shift;
141	return undef unless length(ref($r));
142	my $t = ref(B::svref_2object($r));
143	return
144	exists $tmap{$t} ? $tmap{$t}
145	: length(ref($$r)) ? 'REF'
146	: 'SCALAR';
147	};
148	*Scalar::Util::readonly = sub (\$) {
149	my $b = B::svref_2object( $_[0] );
150	$b->FLAGS & 0x00800000; # SVf_READONLY?
151	};
152	}
153	}
154	}
155
156	################################################################################
157	#
158	# Common pure perl methods, taken almost directly from Text::CSV_XS.
159	# (These should be moved into a common class eventually, so that
160	# both XS and PP don't need to apply the same changes.)
161	#
162	################################################################################
163
164	################################################################################
165	# version
166	################################################################################
167
168	sub version {
169	return $VERSION;
170	}
171
172	################################################################################
173	# new
174	################################################################################
175
176	my %def_attr = (
177	eol => '',
178	sep_char => ',',
179	quote_char => '"',
180	escape_char => '"',
181	binary => 0,
182	decode_utf8 => 1,
183	auto_diag => 0,
184	diag_verbose => 0,
185	strict => 0,
186	blank_is_undef => 0,
187	empty_is_undef => 0,
188	allow_whitespace => 0,
189	allow_loose_quotes => 0,
190	allow_loose_escapes => 0,
191	allow_unquoted_escape => 0,
192	always_quote => 0,
193	quote_empty => 0,
194	quote_space => 1,
195	quote_binary => 1,
196	escape_null => 1,
197	keep_meta_info => 0,
198	verbatim => 0,
199	formula => 0,
200	undef_str => undef,
201	types => undef,
202	callbacks => undef,
203
204	_EOF => 0,
205	_RECNO => 0,
206	_STATUS => undef,
207	_FIELDS => undef,
208	_FFLAGS => undef,
209	_STRING => undef,
210	_ERROR_INPUT => undef,
211	_COLUMN_NAMES => undef,
212	_BOUND_COLUMNS => undef,
213	_AHEAD => undef,
214
215	ENCODING => undef,
216	);
217
218	my %attr_alias = (
219	quote_always => "always_quote",
220	verbose_diag => "diag_verbose",
221	quote_null => "escape_null",
222	escape => "escape_char",
223	);
224
225	my $last_new_error = Text::CSV_PP->SetDiag(0);
226	my $last_error;
227
228	# NOT a method: is also used before bless
229	sub _unhealthy_whitespace {
230	my ($self, $aw) = @_;
231	$aw or return 0; # no checks needed without allow_whitespace
232
233	my $quo = $self->{quote};
234	defined $quo && length ($quo) or $quo = $self->{quote_char};
235	my $esc = $self->{escape_char};
236
237	defined $quo && $quo =~ m/^[ \t]/ and return 1002;
238	defined $esc && $esc =~ m/^[ \t]/ and return 1002;
239
240	return 0;
241	}
242
243	sub _check_sanity {
244	my $self = shift;
245
246	my $eol = $self->{eol};
247	my $sep = $self->{sep};
248	defined $sep && length ($sep) or $sep = $self->{sep_char};
249	my $quo = $self->{quote};
250	defined $quo && length ($quo) or $quo = $self->{quote_char};
251	my $esc = $self->{escape_char};
252
253	# use DP;::diag ("SEP: '", DPeek ($sep),
254	# "', QUO: '", DPeek ($quo),
255	# "', ESC: '", DPeek ($esc),"'");
256
257	# sep_char should not be undefined
258	$sep ne "" or return 1008;
259	length ($sep) > 16 and return 1006;
260	$sep =~ m/[\r\n]/ and return 1003;
261
262	if (defined $quo) {
263	$quo eq $sep and return 1001;
264	length ($quo) > 16 and return 1007;
265	$quo =~ m/[\r\n]/ and return 1003;
266	}
267	if (defined $esc) {
268	$esc eq $sep and return 1001;
269	$esc =~ m/[\r\n]/ and return 1003;
270	}
271	if (defined $eol) {
272	length ($eol) > 16 and return 1005;
273	}
274
275	return _unhealthy_whitespace ($self, $self->{allow_whitespace});
276	}
277
278	sub known_attributes {
279	sort grep !m/^_/ => "sep", "quote", keys %def_attr;
280	}
281
282	sub new {
283	$last_new_error = Text::CSV_PP->SetDiag(1000,
284	'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);');
285
286	my $proto = shift;
287	my $class = ref ($proto) \|\| $proto or return;
288	@_ > 0 && ref $_[0] ne "HASH" and return;
289	my $attr = shift \|\| {};
290	my %attr = map {
291	my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
292	exists $attr_alias{$k} and $k = $attr_alias{$k};
293	$k => $attr->{$_};
294	} keys %$attr;
295
296	my $sep_aliased = 0;
297	if (exists $attr{sep}) {
298	$attr{sep_char} = delete $attr{sep};
299	$sep_aliased = 1;
300	}
301	my $quote_aliased = 0;
302	if (exists $attr{quote}) {
303	$attr{quote_char} = delete $attr{quote};
304	$quote_aliased = 1;
305	}
306	exists $attr{formula_handling} and
307	$attr{formula} = delete $attr{formula_handling};
308	exists $attr{formula} and
309	$attr{formula} = _supported_formula (undef, $attr{formula});
310	for (keys %attr) {
311	if (m/^[a-z]/ && exists $def_attr{$_}) {
312	# uncoverable condition false
313	defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
314	next;
315	}
316	# croak?
317	$last_new_error = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
318	$attr{auto_diag} and error_diag ();
319	return;
320	}
321	if ($sep_aliased) {
322	my @b = unpack "U0C*", $attr{sep_char};
323	if (@b > 1) {
324	$attr{sep} = $attr{sep_char};
325	$attr{sep_char} = "\0";
326	}
327	else {
328	$attr{sep} = undef;
329	}
330	}
331	if ($quote_aliased and defined $attr{quote_char}) {
332	my @b = unpack "U0C*", $attr{quote_char};
333	if (@b > 1) {
334	$attr{quote} = $attr{quote_char};
335	$attr{quote_char} = "\0";
336	}
337	else {
338	$attr{quote} = undef;
339	}
340	}
341
342	my $self = { %def_attr, %attr };
343	if (my $ec = _check_sanity ($self)) {
344	$last_new_error = Text::CSV_PP->SetDiag($ec);
345	$attr{auto_diag} and error_diag ();
346	return;
347	}
348	if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
349	Carp::carp "The 'callbacks' attribute is set but is not a hash: ignored\n";
350	$self->{callbacks} = undef;
351	}
352
353	$last_new_error = Text::CSV_PP->SetDiag(0);
354	defined $\ && !exists $attr{eol} and $self->{eol} = $\;
355	bless $self, $class;
356	defined $self->{types} and $self->types ($self->{types});
357	$self;
358	}
359
360	# Keep in sync with XS!
361	my %_cache_id = ( # Only expose what is accessed from within PM
362	quote_char => 0,
363	escape_char => 1,
364	sep_char => 2,
365	sep => 39, # 39 .. 55
366	binary => 3,
367	keep_meta_info => 4,
368	always_quote => 5,
369	allow_loose_quotes => 6,
370	allow_loose_escapes => 7,
371	allow_unquoted_escape => 8,
372	allow_whitespace => 9,
373	blank_is_undef => 10,
374	eol => 11,
375	quote => 15,
376	verbatim => 22,
377	empty_is_undef => 23,
378	auto_diag => 24,
379	diag_verbose => 33,
380	quote_space => 25,
381	quote_empty => 37,
382	quote_binary => 32,
383	escape_null => 31,
384	decode_utf8 => 35,
385	_has_ahead => 30,
386	_has_hooks => 36,
387	_is_bound => 26, # 26 .. 29
388	formula => 38,
389	strict => 42,
390	undef_str => 46,
391	);
392
393	my %_hidden_cache_id = qw(
394	sep_len 38
395	eol_len 12
396	eol_is_cr 13
397	quo_len 16
398	has_error_input 34
399	);
400
401	my %_reverse_cache_id = (
402	map({$_cache_id{$_} => $_} keys %_cache_id),
403	map({$_hidden_cache_id{$_} => $_} keys %_hidden_cache_id),
404	);
405
406	# A `character'
407	sub _set_attr_C {
408	my ($self, $name, $val, $ec) = @_;
409	defined $val or $val = 0;
410	utf8::decode ($val);
411	$self->{$name} = $val;
412	$ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
413	$self->_cache_set ($_cache_id{$name}, $val);
414	}
415
416	# A flag
417	sub _set_attr_X {
418	my ($self, $name, $val) = @_;
419	defined $val or $val = 0;
420	$self->{$name} = $val;
421	$self->_cache_set ($_cache_id{$name}, 0 + $val);
422	}
423
424	# A number
425	sub _set_attr_N {
426	my ($self, $name, $val) = @_;
427	$self->{$name} = $val;
428	$self->_cache_set ($_cache_id{$name}, 0 + $val);
429	}
430
431	# Accessor methods.
432	# It is unwise to change them halfway through a single file!
433	sub quote_char {
434	my $self = shift;
435	if (@_) {
436	$self->_set_attr_C ("quote_char", shift);
437	$self->_cache_set ($_cache_id{quote}, "");
438	}
439	$self->{quote_char};
440	}
441
442	sub quote {
443	my $self = shift;
444	if (@_) {
445	my $quote = shift;
446	defined $quote or $quote = "";
447	utf8::decode ($quote);
448	my @b = unpack "U0C*", $quote;
449	if (@b > 1) {
450	@b > 16 and croak ($self->SetDiag (1007));
451	$self->quote_char ("\0");
452	}
453	else {
454	$self->quote_char ($quote);
455	$quote = "";
456	}
457	$self->{quote} = $quote;
458
459	my $ec = _check_sanity ($self);
460	$ec and croak ($self->SetDiag ($ec));
461
462	$self->_cache_set ($_cache_id{quote}, $quote);
463	}
464	my $quote = $self->{quote};
465	defined $quote && length ($quote) ? $quote : $self->{quote_char};
466	}
467
468	sub escape_char {
469	my $self = shift;
470	if (@_) {
471	my $ec = shift;
472	$self->_set_attr_C ("escape_char", $ec);
473	$ec or $self->_set_attr_X ("escape_null", 0);
474	}
475	$self->{escape_char};
476	}
477
478	sub sep_char {
479	my $self = shift;
480	if (@_) {
481	$self->_set_attr_C ("sep_char", shift);
482	$self->_cache_set ($_cache_id{sep}, "");
483	}
484	$self->{sep_char};
485	}
486
487	sub sep {
488	my $self = shift;
489	if (@_) {
490	my $sep = shift;
491	defined $sep or $sep = "";
492	utf8::decode ($sep);
493	my @b = unpack "U0C*", $sep;
494	if (@b > 1) {
495	@b > 16 and croak ($self->SetDiag (1006));
496	$self->sep_char ("\0");
497	}
498	else {
499	$self->sep_char ($sep);
500	$sep = "";
501	}
502	$self->{sep} = $sep;
503
504	my $ec = _check_sanity ($self);
505	$ec and croak ($self->SetDiag ($ec));
506
507	$self->_cache_set ($_cache_id{sep}, $sep);
508	}
509	my $sep = $self->{sep};
510	defined $sep && length ($sep) ? $sep : $self->{sep_char};
511	}
512
513	sub eol {
514	my $self = shift;
515	if (@_) {
516	my $eol = shift;
517	defined $eol or $eol = "";
518	length ($eol) > 16 and croak ($self->SetDiag (1005));
519	$self->{eol} = $eol;
520	$self->_cache_set ($_cache_id{eol}, $eol);
521	}
522	$self->{eol};
523	}
524
525	sub always_quote {
526	my $self = shift;
527	@_ and $self->_set_attr_X ("always_quote", shift);
528	$self->{always_quote};
529	}
530
531	sub quote_space {
532	my $self = shift;
533	@_ and $self->_set_attr_X ("quote_space", shift);
534	$self->{quote_space};
535	}
536
537	sub quote_empty {
538	my $self = shift;
539	@_ and $self->_set_attr_X ("quote_empty", shift);
540	$self->{quote_empty};
541	}
542
543	sub escape_null {
544	my $self = shift;
545	@_ and $self->_set_attr_X ("escape_null", shift);
546	$self->{escape_null};
547	}
548
549	sub quote_null { goto &escape_null; }
550
551	sub quote_binary {
552	my $self = shift;
553	@_ and $self->_set_attr_X ("quote_binary", shift);
554	$self->{quote_binary};
555	}
556
557	sub binary {
558	my $self = shift;
559	@_ and $self->_set_attr_X ("binary", shift);
560	$self->{binary};
561	}
562
563	sub strict {
564	my $self = shift;
565	@_ and $self->_set_attr_X ("strict", shift);
566	$self->{strict};
567	}
568
569	sub _SetDiagInfo {
570	my ($self, $err, $msg) = @_;
571	$self->SetDiag ($err);
572	my $em = $self->error_diag;
573	$em =~ s/^\d+$// and $msg =~ s/^/# /;
574	my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
575	join $sep => grep m/\S\S\S/ => $em, $msg;
576	}
577
578	sub _supported_formula {
579	my ($self, $f) = @_;
580	defined $f or return 5;
581	$f =~ m/^(?: 0 \| none )$/xi ? 0 :
582	$f =~ m/^(?: 1 \| die )$/xi ? 1 :
583	$f =~ m/^(?: 2 \| croak )$/xi ? 2 :
584	$f =~ m/^(?: 3 \| diag )$/xi ? 3 :
585	$f =~ m/^(?: 4 \| empty \| )$/xi ? 4 :
586	$f =~ m/^(?: 5 \| undef )$/xi ? 5 : do {
587	$self \|\|= "Text::CSV_PP";
588	croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
589	};
590	}
591
592	sub formula {
593	my $self = shift;
594	@_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
595	[qw( none die croak diag empty undef )]->[_supported_formula ($self, $self->{formula})];
596	}
597	sub formula_handling {
598	my $self = shift;
599	$self->formula (@_);
600	}
601
602	sub decode_utf8 {
603	my $self = shift;
604	@_ and $self->_set_attr_X ("decode_utf8", shift);
605	$self->{decode_utf8};
606	}
607
608	sub keep_meta_info {
609	my $self = shift;
610	if (@_) {
611	my $v = shift;
612	!defined $v \|\| $v eq "" and $v = 0;
613	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
614	$self->_set_attr_X ("keep_meta_info", $v);
615	}
616	$self->{keep_meta_info};
617	}
618
619	sub allow_loose_quotes {
620	my $self = shift;
621	@_ and $self->_set_attr_X ("allow_loose_quotes", shift);
622	$self->{allow_loose_quotes};
623	}
624
625	sub allow_loose_escapes {
626	my $self = shift;
627	@_ and $self->_set_attr_X ("allow_loose_escapes", shift);
628	$self->{allow_loose_escapes};
629	}
630
631	sub allow_whitespace {
632	my $self = shift;
633	if (@_) {
634	my $aw = shift;
635	_unhealthy_whitespace ($self, $aw) and
636	croak ($self->SetDiag (1002));
637	$self->_set_attr_X ("allow_whitespace", $aw);
638	}
639	$self->{allow_whitespace};
640	}
641
642	sub allow_unquoted_escape {
643	my $self = shift;
644	@_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
645	$self->{allow_unquoted_escape};
646	}
647
648	sub blank_is_undef {
649	my $self = shift;
650	@_ and $self->_set_attr_X ("blank_is_undef", shift);
651	$self->{blank_is_undef};
652	}
653
654	sub empty_is_undef {
655	my $self = shift;
656	@_ and $self->_set_attr_X ("empty_is_undef", shift);
657	$self->{empty_is_undef};
658	}
659
660	sub verbatim {
661	my $self = shift;
662	@_ and $self->_set_attr_X ("verbatim", shift);
663	$self->{verbatim};
664	}
665
666	sub undef_str {
667	my $self = shift;
668	if (@_) {
669	my $v = shift;
670	$self->{undef_str} = defined $v ? "$v" : undef;
671	$self->_cache_set ($_cache_id{undef_str}, $self->{undef_str});
672	}
673	$self->{undef_str};
674	}
675
676	sub auto_diag {
677	my $self = shift;
678	if (@_) {
679	my $v = shift;
680	!defined $v \|\| $v eq "" and $v = 0;
681	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
682	$self->_set_attr_X ("auto_diag", $v);
683	}
684	$self->{auto_diag};
685	}
686
687	sub diag_verbose {
688	my $self = shift;
689	if (@_) {
690	my $v = shift;
691	!defined $v \|\| $v eq "" and $v = 0;
692	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
693	$self->_set_attr_X ("diag_verbose", $v);
694	}
695	$self->{diag_verbose};
696	}
697
698	################################################################################
699	# status
700	################################################################################
701
702	sub status {
703	$_[0]->{_STATUS};
704	}
705
706	sub eof {
707	$_[0]->{_EOF};
708	}
709
710	sub types {
711	my $self = shift;
712
713	if (@_) {
714	if (my $types = shift) {
715	$self->{'_types'} = join("", map{ chr($_) } @$types);
716	$self->{'types'} = $types;
717	}
718	else {
719	delete $self->{'types'};
720	delete $self->{'_types'};
721	undef;
722	}
723	}
724	else {
725	$self->{'types'};
726	}
727	}
728
729	sub callbacks {
730	my $self = shift;
731	if (@_) {
732	my $cb;
733	my $hf = 0x00;
734	if (defined $_[0]) {
735	grep { !defined } @_ and croak ($self->SetDiag (1004));
736	$cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
737	: @_ % 2 == 0 ? { @_ }
738	: croak ($self->SetDiag (1004));
739	foreach my $cbk (keys %$cb) {
740	# A key cannot be a ref. That would be stored as the *string
741	# 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
742	$cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
743	croak ($self->SetDiag (1004));
744	}
745	exists $cb->{error} and $hf \|= 0x01;
746	exists $cb->{after_parse} and $hf \|= 0x02;
747	exists $cb->{before_print} and $hf \|= 0x04;
748	}
749	elsif (@_ > 1) {
750	# (undef, whatever)
751	croak ($self->SetDiag (1004));
752	}
753	$self->_set_attr_X ("_has_hooks", $hf);
754	$self->{callbacks} = $cb;
755	}
756	$self->{callbacks};
757	}
758
759	################################################################################
760	# error_diag
761	################################################################################
762
763	sub error_diag {
764	my $self = shift;
765	my @diag = (0 + $last_new_error, $last_new_error, 0, 0, 0);
766
767	# Docs state to NEVER use UNIVERSAL::isa, because it will never call an
768	# overridden isa method in any class. Well, that is exacly what I want here
769	if ($self && ref $self && # Not a class method or direct call
770	UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{_ERROR_DIAG}) {
771	$diag[0] = 0 + $self->{_ERROR_DIAG};
772	$diag[1] = $self->{_ERROR_DIAG};
773	$diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
774	$diag[3] = $self->{_RECNO};
775	$diag[4] = $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
776
777	$diag[0] && $self->{callbacks} && $self->{callbacks}{error} and
778	return $self->{callbacks}{error}->(@diag);
779	}
780
781	my $context = wantarray;
782
783	unless (defined $context) { # Void context, auto-diag
784	if ($diag[0] && $diag[0] != 2012) {
785	my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
786	$diag[4] and $msg =~ s/$/ field $diag[4]/;
787
788	unless ($self && ref $self) { # auto_diag
789	# called without args in void context
790	warn $msg;
791	return;
792	}
793
794	if ($self->{diag_verbose} and $self->{_ERROR_INPUT}) {
795	$msg .= "$self->{_ERROR_INPUT}'\n";
796	$msg .= " " x ($diag[2] - 1);
797	$msg .= "^\n";
798	}
799
800	my $lvl = $self->{auto_diag};
801	if ($lvl < 2) {
802	my @c = caller (2);
803	if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
804	my $hints = $c[10];
805	(exists $hints->{autodie} && $hints->{autodie} or
806	exists $hints->{"guard Fatal"} &&
807	!exists $hints->{"no Fatal"}) and
808	$lvl++;
809	# Future releases of autodie will probably set $^H{autodie}
810	# to "autodie @args", like "autodie :all" or "autodie open"
811	# so we can/should check for "open" or "new"
812	}
813	}
814	$lvl > 1 ? die $msg : warn $msg;
815	}
816	return;
817	}
818
819	return $context ? @diag : $diag[1];
820	}
821
822	sub record_number {
823	return shift->{_RECNO};
824	}
825
826	################################################################################
827	# string
828	################################################################################
829
830	*string = \&_string;
831	sub _string {
832	defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef;
833	}
834
835	################################################################################
836	# fields
837	################################################################################
838
839	*fields = \&_fields;
840	sub _fields {
841	ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef;
842	}
843
844	################################################################################
845	# meta_info
846	################################################################################
847
848	sub meta_info {
849	$_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef;
850	}
851
852	sub is_quoted {
853	return unless (defined $_[0]->{_FFLAGS});
854	return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
855
856	$_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0;
857	}
858
859	sub is_binary {
860	return unless (defined $_[0]->{_FFLAGS});
861	return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
862	$_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0;
863	}
864
865	sub is_missing {
866	my ($self, $idx, $val) = @_;
867	return unless $self->{keep_meta_info}; # FIXME
868	$idx < 0 \|\| !ref $self->{_FFLAGS} and return;
869	$idx >= @{$self->{_FFLAGS}} and return 1;
870	$self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0;
871	}
872
873	################################################################################
874	# combine
875	################################################################################
876	*combine = \&_combine;
877	sub _combine {
878	my ($self, @fields) = @_;
879	my $str = "";
880	$self->{_FIELDS} = \@fields;
881	$self->{_STATUS} = (@fields > 0) && $self->__combine(\$str, \@fields, 0);
882	$self->{_STRING} = \$str;
883	$self->{_STATUS};
884	}
885
886	################################################################################
887	# parse
888	################################################################################
889	*parse = \&_parse;
890	sub _parse {
891	my ($self, $str) = @_;
892
893	ref $str and croak ($self->SetDiag (1500));
894
895	my $fields = [];
896	my $fflags = [];
897	$self->{_STRING} = \$str;
898	if (defined $str && $self->__parse ($fields, $fflags, $str, 0)) {
899	$self->{_FIELDS} = $fields;
900	$self->{_FFLAGS} = $fflags;
901	$self->{_STATUS} = 1;
902	}
903	else {
904	$self->{_FIELDS} = undef;
905	$self->{_FFLAGS} = undef;
906	$self->{_STATUS} = 0;
907	}
908	$self->{_STATUS};
909	}
910
911	sub column_names {
912	my ( $self, @columns ) = @_;
913
914	@columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
915	@columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef;
916
917	if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) {
918	@columns = @{ $columns[0] };
919	}
920	elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) {
921	croak $self->SetDiag( 3001 );
922	}
923
924	if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) {
925	croak $self->SetDiag( 3003 );
926	}
927
928	$self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ];
929	@{ $self->{_COLUMN_NAMES} };
930	}
931
932	sub header {
933	my ($self, $fh, @args) = @_;
934
935	$fh or croak ($self->SetDiag (1014));
936
937	my (@seps, %args);
938	for (@args) {
939	if (ref $_ eq "ARRAY") {
940	push @seps, @$_;
941	next;
942	}
943	if (ref $_ eq "HASH") {
944	%args = %$_;
945	next;
946	}
947	croak (q{usage: $csv->header ($fh, [ seps ], { options })});
948	}
949
950	defined $args{munge} && !defined $args{munge_column_names} and
951	$args{munge_column_names} = $args{munge}; # munge as alias
952	defined $args{detect_bom} or $args{detect_bom} = 1;
953	defined $args{set_column_names} or $args{set_column_names} = 1;
954	defined $args{munge_column_names} or $args{munge_column_names} = "lc";
955
956	# Reset any previous leftovers
957	$self->{_RECNO} = 0;
958	$self->{_AHEAD} = undef;
959	$self->{_COLUMN_NAMES} = undef if $args{set_column_names};
960	$self->{_BOUND_COLUMNS} = undef if $args{set_column_names};
961	$self->_cache_set($_cache_id{'_has_ahead'}, 0);
962
963	if (defined $args{sep_set}) {
964	ref $args{sep_set} eq "ARRAY" or
965	croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
966	@seps = @{$args{sep_set}};
967	}
968
969	$^O eq "MSWin32" and binmode $fh;
970	my $hdr = <$fh>;
971	# check if $hdr can be empty here, I don't think so
972	defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
973
974	my %sep;
975	@seps or @seps = (",", ";");
976	foreach my $sep (@seps) {
977	index ($hdr, $sep) >= 0 and $sep{$sep}++;
978	}
979
980	keys %sep >= 2 and croak ($self->SetDiag (1011));
981
982	$self->sep (keys %sep);
983	my $enc = "";
984	if ($args{detect_bom}) { # UTF-7 is not supported
985	if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
986	elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
987	elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
988	elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
989	elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
990	elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
991	elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
992	elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
993	elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
994	elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
995	elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
996
997	$self->{ENCODING} = uc $enc;
998
999	$hdr eq "" and croak ($self->SetDiag (1010));
1000
1001	if ($enc) {
1002	if ($enc =~ m/([13]).le$/) {
1003	my $l = 0 + $1;
1004	my $x;
1005	$hdr .= "\0" x $l;
1006	read $fh, $x, $l;
1007	}
1008	if ($enc ne "utf-8") {
1009	require Encode;
1010	$hdr = Encode::decode ($enc, $hdr);
1011	}
1012	binmode $fh, ":encoding($enc)";
1013	}
1014	}
1015
1016	my ($ahead, $eol);
1017	if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
1018	$eol = $2;
1019	$ahead = $3;
1020	}
1021
1022	$args{munge_column_names} eq "lc" and $hdr = lc $hdr;
1023	$args{munge_column_names} eq "uc" and $hdr = uc $hdr;
1024
1025	my $hr = \$hdr; # Will cause croak on perl-5.6.x
1026	open my $h, "<", $hr or croak ($self->SetDiag (1010));
1027
1028	my $row = $self->getline ($h) or croak;
1029	close $h;
1030
1031	if ($ahead) { # Must be after getline, which creates the cache
1032	$self->_cache_set ($_cache_id{_has_ahead}, 1);
1033	$self->{_AHEAD} = $ahead;
1034	$eol =~ m/^\r([^\n]\|\z)/ and $self->eol ($eol);
1035	}
1036
1037	my @hdr = @$row;
1038	ref $args{munge_column_names} eq "CODE" and
1039	@hdr = map { $args{munge_column_names}->($_) } @hdr;
1040	ref $args{munge_column_names} eq "HASH" and
1041	@hdr = map { $args{munge_column_names}->{$_} \|\| $_ } @hdr;
1042	my %hdr; $hdr{$_}++ for @hdr;
1043	exists $hdr{""} and croak ($self->SetDiag (1012));
1044	unless (keys %hdr == @hdr) {
1045	croak ($self->_SetDiagInfo (1013, join ", " =>
1046	map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
1047	}
1048	$args{set_column_names} and $self->column_names (@hdr);
1049	wantarray ? @hdr : $self;
1050	}
1051
1052	sub bind_columns {
1053	my ( $self, @refs ) = @_;
1054
1055	@refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
1056	@refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef;
1057
1058	if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) {
1059	croak $self->SetDiag( 3003 );
1060	}
1061
1062	if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep?
1063	croak $self->SetDiag( 3004 );
1064	}
1065
1066	$self->_set_attr_N("_is_bound", scalar @refs);
1067	$self->{_BOUND_COLUMNS} = [ @refs ];
1068	@refs;
1069	}
1070
1071	sub getline_hr {
1072	my ($self, @args, %hr) = @_;
1073	$self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
1074	my $fr = $self->getline (@args) or return;
1075	if (ref $self->{_FFLAGS}) { # missing
1076	$self->{_FFLAGS}[$_] = IS_MISSING
1077	for (@$fr ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
1078	@$fr == 1 && (!defined $fr->[0] \|\| $fr->[0] eq "") and
1079	$self->{_FFLAGS}[0] \|\|= IS_MISSING;
1080	}
1081	@hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
1082	\%hr;
1083	}
1084
1085	sub getline_hr_all {
1086	my ( $self, $io, @args ) = @_;
1087
1088	unless ( $self->{_COLUMN_NAMES} ) {
1089	croak $self->SetDiag( 3002 );
1090	}
1091
1092	my @cn = @{$self->{_COLUMN_NAMES}};
1093
1094	return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ];
1095	}
1096
1097	sub say {
1098	my ($self, $io, @f) = @_;
1099	my $eol = $self->eol;
1100	$eol eq "" and $self->eol ($\ \|\| $/);
1101	# say ($fh, undef) does not propage actual undef to print ()
1102	my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1103	$self->eol ($eol);
1104	return $state;
1105	}
1106
1107	sub print_hr {
1108	my ($self, $io, $hr) = @_;
1109	$self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
1110	ref $hr eq "HASH" or croak($self->SetDiag(3010));
1111	$self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
1112	}
1113
1114	sub fragment {
1115	my ($self, $io, $spec) = @_;
1116
1117	my $qd = qr{\s* [0-9]+ \s* }x; # digit
1118	my $qs = qr{\s* (?: [0-9]+ \| \* ) \s*}x; # digit or star
1119	my $qr = qr{$qd (?: - $qs )?}x; # range
1120	my $qc = qr{$qr (?: ; $qr )*}x; # list
1121	defined $spec && $spec =~ m{^ \s*
1122	\x23 ? \s* # optional leading #
1123	( row \| col \| cell ) \s* =
1124	( $qc # for row and col
1125	\| $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
1126	(?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
1127	) \s* $}xi or croak ($self->SetDiag (2013));
1128	my ($type, $range) = (lc $1, $2);
1129
1130	my @h = $self->column_names ();
1131
1132	my @c;
1133	if ($type eq "cell") {
1134	my @spec;
1135	my $min_row;
1136	my $max_row = 0;
1137	for (split m/\s;\s/ => $range) {
1138	my ($tlr, $tlc, $brr, $brc) = (m{
1139	^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
1140	(?: - \s* ([0-9]+ \| \) \s , \s* ([0-9]+ \| \) \s )?
1141	$}x) or croak ($self->SetDiag (2013));
1142	defined $brr or ($brr, $brc) = ($tlr, $tlc);
1143	$tlr == 0 \|\| $tlc == 0 \|\|
1144	($brr ne "*" && ($brr == 0 \|\| $brr < $tlr)) \|\|
1145	($brc ne "*" && ($brc == 0 \|\| $brc < $tlc))
1146	and croak ($self->SetDiag (2013));
1147	$tlc--;
1148	$brc-- unless $brc eq "*";
1149	defined $min_row or $min_row = $tlr;
1150	$tlr < $min_row and $min_row = $tlr;
1151	$brr eq "*" \|\| $brr > $max_row and
1152	$max_row = $brr;
1153	push @spec, [ $tlr, $tlc, $brr, $brc ];
1154	}
1155	my $r = 0;
1156	while (my $row = $self->getline ($io)) {
1157	++$r < $min_row and next;
1158	my %row;
1159	my $lc;
1160	foreach my $s (@spec) {
1161	my ($tlr, $tlc, $brr, $brc) = @$s;
1162	$r < $tlr \|\| ($brr ne "*" && $r > $brr) and next;
1163	!defined $lc \|\| $tlc < $lc and $lc = $tlc;
1164	my $rr = $brc eq "*" ? $#$row : $brc;
1165	$row{$_} = $row->[$_] for $tlc .. $rr;
1166	}
1167	push @c, [ @row{sort { $a <=> $b } keys %row } ];
1168	if (@h) {
1169	my %h; @h{@h} = @{$c[-1]};
1170	$c[-1] = \%h;
1171	}
1172	$max_row ne "*" && $r == $max_row and last;
1173	}
1174	return \@c;
1175	}
1176
1177	# row or col
1178	my @r;
1179	my $eod = 0;
1180	for (split m/\s;\s/ => $range) {
1181	my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ \| \* ))? \s* $/x
1182	or croak ($self->SetDiag (2013));
1183	$to \|\|= $from;
1184	$to eq "*" and ($to, $eod) = ($from, 1);
1185	# $to cannot be <= 0 due to regex and \|\|=
1186	$from <= 0 \|\| $to < $from and croak ($self->SetDiag (2013));
1187	$r[$_] = 1 for $from .. $to;
1188	}
1189
1190	my $r = 0;
1191	$type eq "col" and shift @r;
1192	$_ \|\|= 0 for @r;
1193	while (my $row = $self->getline ($io)) {
1194	$r++;
1195	if ($type eq "row") {
1196	if (($r > $#r && $eod) \|\| $r[$r]) {
1197	push @c, $row;
1198	if (@h) {
1199	my %h; @h{@h} = @{$c[-1]};
1200	$c[-1] = \%h;
1201	}
1202	}
1203	next;
1204	}
1205	push @c, [ map { ($_ > $#r && $eod) \|\| $r[$_] ? $row->[$_] : () } 0..$#$row ];
1206	if (@h) {
1207	my %h; @h{@h} = @{$c[-1]};
1208	$c[-1] = \%h;
1209	}
1210	}
1211
1212	return \@c;
1213	}
1214
1215	my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1216
1217	sub _csv_attr {
1218	my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
1219
1220	$attr{binary} = 1;
1221
1222	my $enc = delete $attr{enc} \|\| delete $attr{encoding} \|\| "";
1223	$enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
1224	$enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1225
1226	my $fh;
1227	my $sink = 0;
1228	my $cls = 0; # If I open a file, I have to close it
1229	my $in = delete $attr{in} \|\| delete $attr{file} or croak $csv_usage;
1230	my $out = exists $attr{out} && !$attr{out} ? \"skip"
1231	: delete $attr{out} \|\| delete $attr{file};
1232
1233	ref $in eq "CODE" \|\| ref $in eq "ARRAY" and $out \|\|= \*STDOUT;
1234
1235	$in && $out && !ref $in && !ref $out and croak join "\n" =>
1236	qq{Cannot use a string for both in and out. Instead use:},
1237	qq{ csv (in => csv (in => "$in"), out => "$out");\n};
1238
1239	if ($out) {
1240	if ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1241	$fh = $out;
1242	}
1243	elsif (ref $out and "SCALAR" eq ref $out and defined $$out and $$out eq "skip") {
1244	delete $attr{out};
1245	$sink = 1;
1246	}
1247	else {
1248	open $fh, ">", $out or croak "$out: $!";
1249	$cls = 1;
1250	}
1251	if ($fh) {
1252	$enc and binmode $fh, $enc;
1253	unless (defined $attr{eol}) {
1254	my @layers = eval { PerlIO::get_layers ($fh) };
1255	$attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1256	}
1257	}
1258	}
1259
1260	if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
1261	# All done
1262	}
1263	elsif (ref $in eq "SCALAR") {
1264	# Strings with code points over 0xFF may not be mapped into in-memory file handles
1265	# "<$enc" does not change that :(
1266	open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
1267	$cls = 1;
1268	}
1269	elsif (ref $in or "GLOB" eq ref \$in) {
1270	if (!ref $in && $] < 5.008005) {
1271	$fh = \*$in; # uncoverable statement ancient perl version required
1272	}
1273	else {
1274	$fh = $in;
1275	}
1276	}
1277	else {
1278	open $fh, "<$enc", $in or croak "$in: $!";
1279	$cls = 1;
1280	}
1281	$fh \|\| $sink or croak qq{No valid source passed. "in" is required};
1282
1283	my $hdrs = delete $attr{headers};
1284	my $frag = delete $attr{fragment};
1285	my $key = delete $attr{key};
1286	my $val = delete $attr{value};
1287	my $kh = delete $attr{keep_headers} \|\|
1288	delete $attr{keep_column_names} \|\|
1289	delete $attr{kh};
1290
1291	my $cbai = delete $attr{callbacks}{after_in} \|\|
1292	delete $attr{after_in} \|\|
1293	delete $attr{callbacks}{after_parse} \|\|
1294	delete $attr{after_parse};
1295	my $cbbo = delete $attr{callbacks}{before_out} \|\|
1296	delete $attr{before_out};
1297	my $cboi = delete $attr{callbacks}{on_in} \|\|
1298	delete $attr{on_in};
1299
1300	my $hd_s = delete $attr{sep_set} \|\|
1301	delete $attr{seps};
1302	my $hd_b = delete $attr{detect_bom} \|\|
1303	delete $attr{bom};
1304	my $hd_m = delete $attr{munge} \|\|
1305	delete $attr{munge_column_names};
1306	my $hd_c = delete $attr{set_column_names};
1307
1308	for ([ quo => "quote" ],
1309	[ esc => "escape" ],
1310	[ escape => "escape_char" ],
1311	) {
1312	my ($f, $t) = @$_;
1313	exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1314	}
1315
1316	my $fltr = delete $attr{filter};
1317	my %fltr = (
1318	not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
1319	not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
1320	filled => sub { grep { defined && m/\S/ } @{$_[1]} },
1321	);
1322	defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1323	$fltr = { 0 => $fltr{$fltr} };
1324	ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1325	ref $fltr eq "HASH" or $fltr = undef;
1326
1327	exists $attr{formula} and
1328	$attr{formula} = _supported_formula (undef, $attr{formula});
1329
1330	defined $attr{auto_diag} or $attr{auto_diag} = 1;
1331	defined $attr{escape_null} or $attr{escape_null} = 0;
1332	my $csv = delete $attr{csv} \|\| Text::CSV_PP->new (\%attr)
1333	or croak $last_new_error;
1334
1335	return {
1336	csv => $csv,
1337	attr => { %attr },
1338	fh => $fh,
1339	cls => $cls,
1340	in => $in,
1341	sink => $sink,
1342	out => $out,
1343	enc => $enc,
1344	hdrs => $hdrs,
1345	key => $key,
1346	val => $val,
1347	kh => $kh,
1348	frag => $frag,
1349	fltr => $fltr,
1350	cbai => $cbai,
1351	cbbo => $cbbo,
1352	cboi => $cboi,
1353	hd_s => $hd_s,
1354	hd_b => $hd_b,
1355	hd_m => $hd_m,
1356	hd_c => $hd_c,
1357	};
1358	}
1359
1360	sub csv {
1361	@_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
1362	@_ or croak $csv_usage;
1363
1364	my $c = _csv_attr (@_);
1365
1366	my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
1367	my %hdr;
1368	if (ref $hdrs eq "HASH") {
1369	%hdr = %$hdrs;
1370	$hdrs = "auto";
1371	}
1372
1373	if ($c->{out} && !$c->{sink}) {
1374	if (ref $in eq "CODE") {
1375	my $hdr = 1;
1376	while (my $row = $in->($csv)) {
1377	if (ref $row eq "ARRAY") {
1378	$csv->print ($fh, $row);
1379	next;
1380	}
1381	if (ref $row eq "HASH") {
1382	if ($hdr) {
1383	$hdrs \|\|= [ map { $hdr{$_} \|\| $_ } keys %$row ];
1384	$csv->print ($fh, $hdrs);
1385	$hdr = 0;
1386	}
1387	$csv->print ($fh, [ @{$row}{@$hdrs} ]);
1388	}
1389	}
1390	}
1391	elsif (ref $in->[0] eq "ARRAY") { # aoa
1392	ref $hdrs and $csv->print ($fh, $hdrs);
1393	for (@{$in}) {
1394	$c->{cboi} and $c->{cboi}->($csv, $_);
1395	$c->{cbbo} and $c->{cbbo}->($csv, $_);
1396	$csv->print ($fh, $_);
1397	}
1398	}
1399	else { # aoh
1400	my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
1401	defined $hdrs or $hdrs = "auto";
1402	ref $hdrs \|\| $hdrs eq "auto" and
1403	$csv->print ($fh, [ map { $hdr{$_} \|\| $_ } @hdrs ]);
1404	for (@{$in}) {
1405	local %_;
1406	*_ = $_;
1407	$c->{cboi} and $c->{cboi}->($csv, $_);
1408	$c->{cbbo} and $c->{cbbo}->($csv, $_);
1409	$csv->print ($fh, [ @{$_}{@hdrs} ]);
1410	}
1411	}
1412
1413	$c->{cls} and close $fh;
1414	return 1;
1415	}
1416
1417	my @row1;
1418	if (defined $c->{hd_s} \|\| defined $c->{hd_b} \|\| defined $c->{hd_m} \|\| defined $c->{hd_c}) {
1419	my %harg;
1420	defined $c->{hd_s} and $harg{set_set} = $c->{hd_s};
1421	defined $c->{hd_d} and $harg{detect_bom} = $c->{hd_b};
1422	defined $c->{hd_m} and $harg{munge_column_names} = $hdrs ? "none" : $c->{hd_m};
1423	defined $c->{hd_c} and $harg{set_column_names} = $hdrs ? 0 : $c->{hd_c};
1424	@row1 = $csv->header ($fh, \%harg);
1425	my @hdr = $csv->column_names;
1426	@hdr and $hdrs \|\|= \@hdr;
1427	}
1428
1429	if ($c->{kh}) {
1430	ref $c->{kh} eq "ARRAY" or croak ($csv->SetDiag (1501));
1431	$hdrs \|\|= "auto";
1432	}
1433
1434	my $key = $c->{key};
1435	if ($key) {
1436	!ref $key or ref $key eq "ARRAY" && @$key > 1 or croak ($csv->SetDiag (1501));
1437	$hdrs \|\|= "auto";
1438	}
1439	my $val = $c->{val};
1440	if ($val) {
1441	$key or croak ($csv->SetDiag (1502));
1442	!ref $val or ref $val eq "ARRAY" && @$val > 0 or croak ($csv->SetDiag (1503));
1443	}
1444
1445	$c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs \|\|= "auto";
1446	if (defined $hdrs) {
1447	if (!ref $hdrs) {
1448	if ($hdrs eq "skip") {
1449	$csv->getline ($fh); # discard;
1450	}
1451	elsif ($hdrs eq "auto") {
1452	my $h = $csv->getline ($fh) or return;
1453	$hdrs = [ map { $hdr{$_} \|\| $_ } @$h ];
1454	}
1455	elsif ($hdrs eq "lc") {
1456	my $h = $csv->getline ($fh) or return;
1457	$hdrs = [ map { lc ($hdr{$_} \|\| $_) } @$h ];
1458	}
1459	elsif ($hdrs eq "uc") {
1460	my $h = $csv->getline ($fh) or return;
1461	$hdrs = [ map { uc ($hdr{$_} \|\| $_) } @$h ];
1462	}
1463	}
1464	elsif (ref $hdrs eq "CODE") {
1465	my $h = $csv->getline ($fh) or return;
1466	my $cr = $hdrs;
1467	$hdrs = [ map { $cr->($hdr{$_} \|\| $_) } @$h ];
1468	}
1469	$c->{kh} and $hdrs and @{$c->{kh}} = @$hdrs;
1470	}
1471
1472	if ($c->{fltr}) {
1473	my %f = %{$c->{fltr}};
1474	# convert headers to index
1475	my @hdr;
1476	if (ref $hdrs) {
1477	@hdr = @{$hdrs};
1478	for (0 .. $#hdr) {
1479	exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1480	}
1481	}
1482	$csv->callbacks (after_parse => sub {
1483	my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1484	foreach my $FLD (sort keys %f) {
1485	local $_ = $ROW->[$FLD - 1];
1486	local %_;
1487	@hdr and @_{@hdr} = @$ROW;
1488	$f{$FLD}->($CSV, $ROW) or return \"skip";
1489	$ROW->[$FLD - 1] = $_;
1490	}
1491	});
1492	}
1493
1494	my $frag = $c->{frag};
1495	my $ref = ref $hdrs
1496	? # aoh
1497	do {
1498	my @h = $csv->column_names ($hdrs);
1499	my %h; $h{$_}++ for @h;
1500	exists $h{""} and croak ($csv->SetDiag (1012));
1501	unless (keys %h == @h) {
1502	croak ($csv->_SetDiagInfo (1013, join ", " =>
1503	map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
1504	}
1505	$frag ? $csv->fragment ($fh, $frag) :
1506	$key ? do {
1507	my ($k, $j, @f) = ref $key ? (undef, @$key) : ($key);
1508	if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
1509	croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1510	}
1511	+{ map {
1512	my $r = $_;
1513	my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
1514	( $K => (
1515	$val
1516	? ref $val
1517	? { map { $_ => $r->{$_} } @$val }
1518	: $r->{$val}
1519	: $r ));
1520	} @{$csv->getline_hr_all ($fh)} }
1521	}
1522	: $csv->getline_hr_all ($fh);
1523	}
1524	: # aoa
1525	$frag ? $csv->fragment ($fh, $frag)
1526	: $csv->getline_all ($fh);
1527	if ($ref) {
1528	@row1 && !$c->{hd_c} && !ref $hdrs and unshift @$ref, \@row1;
1529	}
1530	else {
1531	Text::CSV_PP->auto_diag;
1532	}
1533	$c->{cls} and close $fh;
1534	if ($ref and $c->{cbai} \|\| $c->{cboi}) {
1535	# Default is ARRAYref, but with key =>, you'll get a hashref
1536	foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
1537	local %_;
1538	ref $r eq "HASH" and *_ = $r;
1539	$c->{cbai} and $c->{cbai}->($csv, $r);
1540	$c->{cboi} and $c->{cboi}->($csv, $r);
1541	}
1542	}
1543
1544	$c->{sink} and return;
1545
1546	defined wantarray or
1547	return csv (%{$c->{attr}}, in => $ref, headers => $hdrs, %{$c->{attr}});
1548
1549	return $ref;
1550	}
1551
1552	# The end of the common pure perl part.
1553
1554	################################################################################
1555	#
1556	# The following are methods implemented in XS in Text::CSV_XS or
1557	# helper methods for Text::CSV_PP only
1558	#
1559	################################################################################
1560
1561	sub _setup_ctx {
1562	my $self = shift;
1563
1564	$last_error = undef;
1565
1566	my $ctx;
1567	if ($self->{_CACHE}) {
1568	%$ctx = %{$self->{_CACHE}};
1569	} else {
1570	$ctx->{sep} = ',';
1571	if (defined $self->{sep_char}) {
1572	$ctx->{sep} = $self->{sep_char};
1573	}
1574	if (defined $self->{sep} and $self->{sep} ne '') {
1575	use bytes;
1576	$ctx->{sep} = $self->{sep};
1577	my $sep_len = length($ctx->{sep});
1578	$ctx->{sep_len} = $sep_len if $sep_len > 1;
1579	}
1580
1581	$ctx->{quo} = '"';
1582	if (exists $self->{quote_char}) {
1583	my $quote_char = $self->{quote_char};
1584	if (defined $quote_char and length $quote_char) {
1585	$ctx->{quo} = $quote_char;
1586	} else {
1587	$ctx->{quo} = "\0";
1588	}
1589	}
1590	if (defined $self->{quote} and $self->{quote} ne '') {
1591	use bytes;
1592	$ctx->{quo} = $self->{quote};
1593	my $quote_len = length($ctx->{quo});
1594	$ctx->{quo_len} = $quote_len if $quote_len > 1;
1595	}
1596
1597	$ctx->{escape_char} = '"';
1598	if (exists $self->{escape_char}) {
1599	my $escape_char = $self->{escape_char};
1600	if (defined $escape_char and length $escape_char) {
1601	$ctx->{escape_char} = $escape_char;
1602	} else {
1603	$ctx->{escape_char} = "\0";
1604	}
1605	}
1606
1607	if (defined $self->{eol}) {
1608	my $eol = $self->{eol};
1609	my $eol_len = length($eol);
1610	$ctx->{eol} = $eol;
1611	$ctx->{eol_len} = $eol_len;
1612	if ($eol_len == 1 and $eol eq "\015") {
1613	$ctx->{eol_is_cr} = 1;
1614	}
1615	}
1616
1617	$ctx->{undef_flg} = 0;
1618	if (defined $self->{undef_str}) {
1619	$ctx->{undef_str} = $self->{undef_str};
1620	$ctx->{undef_flg} = 3 if utf8::is_utf8($self->{undef_str});
1621	} else {
1622	$ctx->{undef_str} = undef;
1623	}
1624
1625	if (defined $self->{_types}) {
1626	$ctx->{types} = $self->{_types};
1627	$ctx->{types_len} = length($ctx->{types});
1628	}
1629
1630	if (defined $self->{_is_bound}) {
1631	$ctx->{is_bound} = $self->{_is_bound};
1632	}
1633
1634	if (defined $self->{callbacks}) {
1635	my $cb = $self->{callbacks};
1636	$ctx->{has_hooks} = 0;
1637	if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
1638	$ctx->{has_hooks} \|= HOOK_AFTER_PARSE;
1639	}
1640	if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
1641	$ctx->{has_hooks} \|= HOOK_BEFORE_PRINT;
1642	}
1643	}
1644
1645	for (qw/
1646	binary decode_utf8 always_quote strict quote_empty
1647	allow_loose_quotes allow_loose_escapes
1648	allow_unquoted_escape allow_whitespace blank_is_undef
1649	empty_is_undef verbatim auto_diag diag_verbose
1650	keep_meta_info formula
1651	/) {
1652	$ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
1653	}
1654	for (qw/quote_space escape_null quote_binary/) {
1655	$ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
1656	}
1657	if ($ctx->{escape_char} eq "\0") {
1658	$ctx->{escape_null} = 0;
1659	}
1660
1661	# FIXME: readonly
1662	%{$self->{_CACHE}} = %$ctx;
1663	}
1664
1665	$ctx->{utf8} = 0;
1666	$ctx->{size} = 0;
1667	$ctx->{used} = 0;
1668
1669	if ($ctx->{is_bound}) {
1670	my $bound = $self->{_BOUND_COLUMNS};
1671	if ($bound and ref $bound eq 'ARRAY') {
1672	$ctx->{bound} = $bound;
1673	} else {
1674	$ctx->{is_bound} = 0;
1675	}
1676	}
1677
1678	$ctx->{eol_pos} = -1;
1679	$ctx->{eolx} = $ctx->{eol_len}
1680	? $ctx->{verbatim} \|\| $ctx->{eol_len} >= 2
1681	? 1
1682	: $ctx->{eol} =~ /\A[\015\|\012]/ ? 0 : 1
1683	: 0;
1684
1685	if ($ctx->{sep_len} and $ctx->{sep_len} > 1 and _is_valid_utf8($ctx->{sep})) {
1686	$ctx->{utf8} = 1;
1687	}
1688	if ($ctx->{quo_len} and $ctx->{quo_len} > 1 and _is_valid_utf8($ctx->{quo})) {
1689	$ctx->{utf8} = 1;
1690	}
1691
1692	$ctx;
1693	}
1694
1695	sub _cache_set {
1696	my ($self, $idx, $value) = @_;
1697	return unless exists $self->{_CACHE};
1698	my $cache = $self->{_CACHE};
1699
1700	my $key = $_reverse_cache_id{$idx};
1701	if (!defined $key) {
1702	warn (sprintf "Unknown cache index %d ignored\n", $idx);
1703	} elsif ($key eq 'sep_char') {
1704	$cache->{sep} = $value;
1705	$cache->{sep_len} = 0;
1706	}
1707	elsif ($key eq 'quote_char') {
1708	$cache->{quo} = $value;
1709	$cache->{quo_len} = 0;
1710	}
1711	elsif ($key eq '_has_ahead') {
1712	$cache->{has_ahead} = $value;
1713	}
1714	elsif ($key eq '_has_hooks') {
1715	$cache->{has_hooks} = $value;
1716	}
1717	elsif ($key eq '_is_bound') {
1718	$cache->{is_bound} = $value;
1719	}
1720	elsif ($key eq 'sep') {
1721	use bytes;
1722	my $len = bytes::length($value);
1723	$cache->{sep} = $value if $len;
1724	$cache->{sep_len} = $len == 1 ? 0 : $len;
1725	}
1726	elsif ($key eq 'quote') {
1727	use bytes;
1728	my $len = bytes::length($value);
1729	$cache->{quo} = $value if $len;
1730	$cache->{quo_len} = $len == 1 ? 0 : $len;
1731	}
1732	elsif ($key eq 'eol') {
1733	$cache->{eol} = $value if defined($value);
1734	$cache->{eol_is_cr} = $value eq "\015" ? 1 : 0;
1735	}
1736	elsif ($key eq 'undef_str') {
1737	if (defined $value) {
1738	$cache->{undef_str} = $value;
1739	$cache->{undef_flg} = 3 if utf8::is_utf8($value);
1740	} else {
1741	$cache->{undef_str} = undef;
1742	$cache->{undef_flg} = 0;
1743	}
1744	}
1745	else {
1746	$cache->{$key} = $value;
1747	}
1748	return 1;
1749	}
1750
1751	sub _cache_diag {
1752	my $self = shift;
1753	unless (exists $self->{_CACHE}) {
1754	warn ("CACHE: invalid\n");
1755	return;
1756	}
1757
1758	my $cache = $self->{_CACHE};
1759	warn ("CACHE:\n");
1760	$self->__cache_show_char(quote_char => $cache->{quo});
1761	$self->__cache_show_char(escape_char => $cache->{escape_char});
1762	$self->__cache_show_char(sep_char => $cache->{sep});
1763	for (qw/
1764	binary decode_utf8 allow_loose_escapes allow_loose_quotes allow_unquoted_escape
1765	allow_whitespace always_quote quote_empty quote_space
1766	escape_null quote_binary auto_diag diag_verbose formula strict
1767	has_error_input blank_is_undef empty_is_undef has_ahead
1768	keep_meta_info verbatim has_hooks eol_is_cr eol_len
1769	/) {
1770	$self->__cache_show_byte($_ => $cache->{$_});
1771	}
1772	$self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
1773	$self->__cache_show_byte(sep_len => $cache->{sep_len});
1774	if ($cache->{sep_len} and $cache->{sep_len} > 1) {
1775	$self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
1776	}
1777	$self->__cache_show_byte(quo_len => $cache->{quo_len});
1778	if ($cache->{quo_len} and $cache->{quo_len} > 1) {
1779	$self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
1780	}
1781	}
1782
1783	sub __cache_show_byte {
1784	my ($self, $key, $value) = @_;
1785	warn (sprintf " %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
1786	}
1787
1788	sub __cache_show_char {
1789	my ($self, $key, $value) = @_;
1790	my $v = $value;
1791	if (defined $value) {
1792	my @b = unpack "U0C*", $value;
1793	$v = pack "U*", $b[0];
1794	}
1795	warn (sprintf " %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
1796	}
1797
1798	sub __cache_show_str {
1799	my ($self, $key, $len, $value) = @_;
1800	warn (sprintf " %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
1801	}
1802
1803	sub __pretty_str { # FIXME
1804	my ($self, $str, $len) = @_;
1805	return '' unless defined $str;
1806	$str = substr($str, 0, $len);
1807	$str =~ s/"/\\"/g;
1808	$str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
1809	qq{"$str"};
1810	}
1811
1812	sub _hook {
1813	my ($self, $name, $fields) = @_;
1814	return 0 unless $self->{callbacks};
1815
1816	my $cb = $self->{callbacks}{$name};
1817	return 0 unless $cb && ref $cb eq 'CODE';
1818
1819	my (@res) = $cb->($self, $fields);
1820	if (@res) {
1821	return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
1822	}
1823	scalar @res;
1824	}
1825
1826	################################################################################
1827	# methods for combine
1828	################################################################################
1829
1830	sub __combine {
1831	my ($self, $dst, $fields, $useIO) = @_;
1832
1833	my $ctx = $self->_setup_ctx;
1834
1835	my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
1836
1837	if(!defined $quot or $quot eq "\0"){ $quot = ''; }
1838
1839	my $re_esc;
1840	if ($esc ne '' and $esc ne "\0") {
1841	if ($quot ne '') {
1842	$re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} \|\|= qr/(\Q$quot\E\|\Q$esc\E)/;
1843	} else {
1844	$re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} \|\|= qr/(\Q$esc\E)/;
1845	}
1846	}
1847
1848	my $bound = 0;
1849	my $n = @$fields - 1;
1850	if ($n < 0 and $ctx->{is_bound}) {
1851	$n = $ctx->{is_bound} - 1;
1852	$bound = 1;
1853	}
1854
1855	my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} \|\| []} >= $n) ? 1 : 0;
1856
1857	my $must_be_quoted;
1858	my @results;
1859	for(my $i = 0; $i <= $n; $i++) {
1860	my $v_ref;
1861	if ($bound) {
1862	$v_ref = $self->__bound_field($ctx, $i, 1);
1863	} else {
1864	if (@$fields > $i) {
1865	$v_ref = \($fields->[$i]);
1866	}
1867	}
1868	next unless $v_ref;
1869
1870	my $value = $$v_ref;
1871
1872	if (!defined $value) {
1873	if ($ctx->{undef_str}) {
1874	if ($ctx->{undef_flg}) {
1875	$ctx->{utf8} = 1;
1876	$ctx->{binary} = 1;
1877	}
1878	push @results, $ctx->{undef_str};
1879	} else {
1880	push @results, '';
1881	}
1882	next;
1883	}
1884
1885	if ( substr($value, 0, 1) eq '=' && $ctx->{formula} ) {
1886	$value = $self->_formula($ctx, $value, $i);
1887	if (!defined $value) {
1888	push @results, '';
1889	next;
1890	}
1891	}
1892
1893	$must_be_quoted = $ctx->{always_quote} ? 1 : 0;
1894	if ($value eq '') {
1895	$must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
1896	}
1897	else {
1898
1899	if (utf8::is_utf8 $value) {
1900	$ctx->{utf8} = 1;
1901	$ctx->{binary} = 1;
1902	}
1903
1904	$must_be_quoted++ if $check_meta && $self->is_quoted($i);
1905
1906	if (!$must_be_quoted and $quot ne '') {
1907	use bytes;
1908	$must_be_quoted++ if
1909	($value =~ /\Q$quot\E/) \|\|
1910	($sep ne '' and $sep ne "\0" and $value =~ /\Q$sep\E/) \|\|
1911	($esc ne '' and $esc ne "\0" and $value =~ /\Q$esc\E/) \|\|
1912	($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) \|\|
1913	($ctx->{quote_space} && $value =~ /[\x09\x20]/);
1914	}
1915
1916	if (!$ctx->{binary} and $value =~ /[^\x09\x20-\x7E]/) {
1917	# an argument contained an invalid character...
1918	$self->{_ERROR_INPUT} = $value;
1919	$self->SetDiag(2110);
1920	return 0;
1921	}
1922
1923	if ($re_esc) {
1924	$value =~ s/($re_esc)/$esc$1/g;
1925	}
1926	if ($ctx->{escape_null}) {
1927	$value =~ s/\0/${esc}0/g;
1928	}
1929	}
1930
1931	if ($must_be_quoted) {
1932	$value = $quot . $value . $quot;
1933	}
1934	push @results, $value;
1935	}
1936
1937	$$dst = join($sep, @results) . ( defined $ctx->{eol} ? $ctx->{eol} : '' );
1938
1939	return 1;
1940	}
1941
1942	sub _formula {
1943	my ($self, $ctx, $value, $i) = @_;
1944
1945	my $fa = $ctx->{formula} or return;
1946	if ($fa == 1) { die "Formulas are forbidden\n" }
1947	if ($fa == 2) { die "Formulas are forbidden\n" } # XS croak behaves like PP's "die"
1948
1949	if ($fa == 3) {
1950	my $rec = '';
1951	if ($ctx->{recno}) {
1952	$rec = sprintf " in record %lu", $ctx->{recno} + 1;
1953	}
1954	my $field = '';
1955	my $column_names = $self->{_COLUMN_NAMES};
1956	if (ref $column_names eq 'ARRAY' and @$column_names >= $i - 1) {
1957	my $column_name = $column_names->[$i - 1];
1958	$field = sprintf " (column: '%.100s')", $column_name if defined $column_name;
1959	}
1960	warn sprintf("Field %d%s%s contains formula '%s'\n", $i, $field, $rec, $value);
1961	return $value;
1962	}
1963
1964	if ($fa == 4) {
1965	return '';
1966	}
1967	if ($fa == 5) {
1968	return undef;
1969	}
1970	return;
1971	}
1972
1973	sub print {
1974	my ($self, $io, $fields) = @_;
1975
1976	require IO::Handle;
1977
1978	if (!defined $fields) {
1979	$fields = [];
1980	} elsif(ref($fields) ne 'ARRAY'){
1981	Carp::croak("Expected fields to be an array ref");
1982	}
1983
1984	$self->_hook(before_print => $fields);
1985
1986	my $str = "";
1987	$self->__combine(\$str, $fields, 1) or return '';
1988
1989	local $\ = '';
1990
1991	$io->print( $str ) or $self->_set_error_diag(2200);
1992	}
1993
1994	################################################################################
1995	# methods for parse
1996	################################################################################
1997
1998
1999	sub __parse { # cx_xsParse
2000	my ($self, $fields, $fflags, $src, $useIO) = @_;
2001
2002	my $ctx = $self->_setup_ctx;
2003
2004	my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
2005	if ($state and ($ctx->{has_hooks} \|\| 0) & HOOK_AFTER_PARSE) {
2006	$self->_hook(after_parse => $fields);
2007	}
2008	return $state \|\| !$last_error;
2009	}
2010
2011	sub ___parse { # cx_c_xsParse
2012	my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
2013
2014	local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
2015
2016	if ($ctx->{useIO} = $useIO) {
2017	require IO::Handle;
2018
2019	$ctx->{tmp} = undef;
2020	if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
2021	$ctx->{tmp} = $self->{_AHEAD};
2022	$ctx->{size} = length $ctx->{tmp};
2023	$ctx->{used} = 0;
2024	}
2025	} else {
2026	$ctx->{tmp} = $src;
2027	$ctx->{size} = length $src;
2028	$ctx->{used} = 0;
2029	$ctx->{utf8} = utf8::is_utf8($src);
2030	}
2031	if ($ctx->{has_error_input}) {
2032	$self->{_ERROR_INPUT} = undef;
2033	$ctx->{has_error_input} = 0;
2034	}
2035
2036	my $result = $self->____parse($ctx, $src, $fields, $fflags);
2037	$self->{_RECNO} = ++($ctx->{recno});
2038	$self->{_EOF} = '';
2039
2040	if ($ctx->{strict}) {
2041	$ctx->{strict_n} \|\|= $ctx->{fld_idx};
2042	if ($ctx->{strict_n} != $ctx->{fld_idx}) {
2043	$self->__parse_error($ctx, 2014, $ctx->{used});
2044	return;
2045	}
2046	}
2047
2048	if ($ctx->{useIO}) {
2049	if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
2050	$self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
2051	} else {
2052	$ctx->{has_ahead} = 0;
2053	if ($ctx->{useIO} & useIO_EOF) {
2054	$self->{_EOF} = 1;
2055	}
2056	}
2057	%{$self->{_CACHE}} = %$ctx;
2058
2059	if ($fflags) {
2060	if ($ctx->{keep_meta_info}) {
2061	$self->{_FFLAGS} = $fflags;
2062	} else {
2063	undef $fflags;
2064	}
2065	}
2066	} else {
2067	%{$self->{_CACHE}} = %$ctx;
2068	}
2069
2070	if ($result and $ctx->{types}) {
2071	my $len = @$fields;
2072	for(my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
2073	my $value = $fields->[$i];
2074	next unless defined $value;
2075	my $type = ord(substr($ctx->{types}, $i, 1));
2076	if ($type == IV) {
2077	$fields->[$i] = int($value);
2078	} elsif ($type == NV) {
2079	$fields->[$i] = $value + 0.0;
2080	}
2081	}
2082	}
2083
2084	$result;
2085	}
2086
2087	sub ____parse { # cx_Parse
2088	my ($self, $ctx, $src, $fields, $fflags) = @_;
2089
2090	my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
2091
2092	utf8::encode($sep) if !$ctx->{utf8} and $ctx->{sep_len};
2093	utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
2094	utf8::encode($eol) if !$ctx->{utf8} and $ctx->{eol_len};
2095
2096	my $seenSomething = 0;
2097	my $waitingForField = 1;
2098	my ($value, $v_ref);
2099	$ctx->{fld_idx} = my $fnum = 0;
2100	$ctx->{flag} = 0;
2101
2102	my $re_str = join '\|', map({$_ eq "\0" ? '[\\0]' : quotemeta($_)} sort {length $b <=> length $a} grep {defined $_ and $_ ne ''} $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
2103	$ctx->{_re} = qr/$re_str/;
2104	my $re = qr/$re_str\|[^\x09\x20-\x7E]\|$/;
2105
2106	LOOP:
2107	while($self->__get_from_src($ctx, $src)) {
2108	while($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
2109	my ($hit, $c) = ($1, $2);
2110	$ctx->{used} = pos($ctx->{tmp});
2111	if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
2112	$self->{_AHEAD} = $hit;
2113	$ctx->{has_ahead} = 1;
2114	$ctx->{has_leftover} = 1;
2115	last;
2116	}
2117	last if $seenSomething and $hit eq '' and $c eq ''; # EOF
2118
2119	# new field
2120	if (!$v_ref) {
2121	if ($ctx->{is_bound}) {
2122	$v_ref = $self->__bound_field($ctx, $fnum, 0);
2123	} else {
2124	$value = '';
2125	$v_ref = \$value;
2126	}
2127	$fnum++;
2128	return unless $v_ref;
2129	$ctx->{flag} = 0;
2130	$ctx->{fld_idx}++;
2131	}
2132
2133	$seenSomething = 1;
2134
2135	if (defined $hit and $hit ne '') {
2136	if ($waitingForField) {
2137	$waitingForField = 0;
2138	}
2139	if ($hit =~ /[^\x09\x20-\x7E]/) {
2140	$ctx->{flag} \|= IS_BINARY;
2141	}
2142	$$v_ref .= $hit;
2143	}
2144
2145	RESTART:
2146	if (defined $c and defined $sep and $c eq $sep) {
2147	if ($waitingForField) {
2148	# ,1,"foo, 3",,bar,
2149	# ^ ^
2150	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2151	$$v_ref = undef;
2152	} else {
2153	$$v_ref = "";
2154	}
2155	unless ($ctx->{is_bound}) {
2156	push @$fields, $$v_ref;
2157	}
2158	$v_ref = undef;
2159	if ($ctx->{keep_meta_info} and $fflags) {
2160	push @$fflags, $ctx->{flag};
2161	}
2162	} elsif ($ctx->{flag} & IS_QUOTED) {
2163	# ,1,"foo, 3",,bar,
2164	# ^
2165	$$v_ref .= $c;
2166	} else {
2167	# ,1,"foo, 3",,bar,
2168	# ^ ^ ^
2169	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2170	$v_ref = undef;
2171	$waitingForField = 1;
2172	}
2173	}
2174	elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
2175	if ($waitingForField) {
2176	# ,1,"foo, 3",,bar,\r\n
2177	# ^
2178	$ctx->{flag} \|= IS_QUOTED;
2179	$waitingForField = 0;
2180	next;
2181	}
2182	if ($ctx->{flag} & IS_QUOTED) {
2183	# ,1,"foo, 3",,bar,\r\n
2184	# ^
2185	my $quoesc = 0;
2186	my $c2 = $self->__get($ctx);
2187
2188	if ($ctx->{allow_whitespace}) {
2189	# , 1 , "foo, 3" , , bar , \r\n
2190	# ^
2191	while($self->__is_whitespace($ctx, $c2)) {
2192	if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
2193	$$v_ref .= $c;
2194	$c = $c2;
2195	}
2196	$c2 = $self->__get($ctx);
2197	}
2198	}
2199
2200	if (!defined $c2) { # EOF
2201	# ,1,"foo, 3"
2202	# ^
2203	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2204	return 1;
2205	}
2206
2207	if (defined $c2 and defined $sep and $c2 eq $sep) {
2208	# ,1,"foo, 3",,bar,\r\n
2209	# ^
2210	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2211	$v_ref = undef;
2212	$waitingForField = 1;
2213	next;
2214	}
2215	if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
2216	# ,1,"foo, 3",,"bar"\n
2217	# ^
2218	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2219	return 1;
2220	}
2221
2222	if (defined $esc and $c eq $esc) {
2223	$quoesc = 1;
2224	if (defined $c2 and $c2 eq '0') {
2225	# ,1,"foo, 3"056",,bar,\r\n
2226	# ^
2227	$$v_ref .= "\0";
2228	next;
2229	}
2230	if (defined $c2 and defined $quot and $c2 eq $quot) {
2231	# ,1,"foo, 3""56",,bar,\r\n
2232	# ^
2233	if ($ctx->{utf8}) {
2234	$ctx->{flag} \|= IS_BINARY;
2235	}
2236	$$v_ref .= $c2;
2237	next;
2238	}
2239	if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
2240	# ,1,"foo, 3"56",,bar,\r\n
2241	# ^
2242	$$v_ref .= $c;
2243	$c = $c2;
2244	goto RESTART;
2245	}
2246	}
2247	if (defined $c2 and $c2 eq "\015") {
2248	if ($ctx->{eol_is_cr}) {
2249	# ,1,"foo, 3"\r
2250	# ^
2251	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2252	return 1;
2253	}
2254
2255	my $c3 = $self->__get($ctx);
2256	if (defined $c3 and $c3 eq "\012") {
2257	# ,1,"foo, 3"\r\n
2258	# ^
2259	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2260	return 1;
2261	}
2262
2263	if ($ctx->{useIO} and !$ctx->{eol_len} and $c3 !~ /[^\x09\x20-\x7E]/) {
2264	# ,1,"foo\n 3",,"bar"\r
2265	# baz,4
2266	# ^
2267	$self->__set_eol_is_cr($ctx);
2268	$ctx->{used}--;
2269	$ctx->{has_ahead} = 1;
2270	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2271	return 1;
2272	}
2273
2274	$self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
2275	return;
2276	}
2277
2278	if ($ctx->{allow_loose_quotes} and !$quoesc) {
2279	# ,1,"foo, 3"456",,bar,\r\n
2280	# ^
2281	$$v_ref .= $c;
2282	$c = $c2;
2283	goto RESTART;
2284	}
2285	# 1,"foo" ",3
2286	# ^
2287	if ($quoesc) {
2288	$ctx->{used}--;
2289	$self->__error_inside_quotes($ctx, 2023);
2290	return;
2291	}
2292	$self->__error_inside_quotes($ctx, 2011);
2293	return;
2294	}
2295	# !waitingForField, !InsideQuotes
2296	if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
2297	$ctx->{flag} \|= IS_ERROR;
2298	$$v_ref .= $c;
2299	} else {
2300	$self->__error_inside_field($ctx, 2034);
2301	return;
2302	}
2303	}
2304	elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
2305	# This means quote_char != escape_char
2306	if ($waitingForField) {
2307	$waitingForField = 0;
2308	if ($ctx->{allow_unquoted_escape}) {
2309	# The escape character is the first character of an
2310	# unquoted field
2311	# ... get and store next character
2312	my $c2 = $self->__get($ctx);
2313	$$v_ref = "";
2314
2315	if (!defined $c2) { # EOF
2316	$ctx->{used}--;
2317	$self->__error_inside_field($ctx, 2035);
2318	return;
2319	}
2320	if ($c2 eq '0') {
2321	$$v_ref .= "\0";
2322	}
2323	elsif (
2324	(defined $quot and $c2 eq $quot) or
2325	(defined $sep and $c2 eq $sep) or
2326	(defined $esc and $c2 eq $esc) or
2327	$ctx->{allow_loose_escapes}
2328	) {
2329	if ($ctx->{utf8}) {
2330	$ctx->{flag} \|= IS_BINARY;
2331	}
2332	$$v_ref .= $c2;
2333	} else {
2334	$self->__parse_inside_quotes($ctx, 2025);
2335	return;
2336	}
2337	}
2338	}
2339	elsif ($ctx->{flag} & IS_QUOTED) {
2340	my $c2 = $self->__get($ctx);
2341	if (!defined $c2) { # EOF
2342	$ctx->{used}--;
2343	$self->__error_inside_quotes($ctx, 2024);
2344	return;
2345	}
2346	if ($c2 eq '0') {
2347	$$v_ref .= "\0";
2348	}
2349	elsif (
2350	(defined $quot and $c2 eq $quot) or
2351	(defined $sep and $c2 eq $sep) or
2352	(defined $esc and $c2 eq $esc) or
2353	$ctx->{allow_loose_escapes}
2354	) {
2355	if ($ctx->{utf8}) {
2356	$ctx->{flag} \|= IS_BINARY;
2357	}
2358	$$v_ref .= $c2;
2359	} else {
2360	$ctx->{used}--;
2361	$self->__error_inside_quotes($ctx, 2025);
2362	return;
2363	}
2364	}
2365	elsif ($v_ref) {
2366	my $c2 = $self->__get($ctx);
2367	if (!defined $c2) { # EOF
2368	$ctx->{used}--;
2369	$self->__error_inside_field($ctx, 2035);
2370	return;
2371	}
2372	$$v_ref .= $c2;
2373	}
2374	else {
2375	$self->__error_inside_field($ctx, 2036);
2376	return;
2377	}
2378	}
2379	elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
2380	EOLX:
2381	if ($waitingForField) {
2382	# ,1,"foo, 3",,bar,
2383	# ^
2384	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2385	$$v_ref = undef;
2386	} else {
2387	$$v_ref = "";
2388	}
2389	unless ($ctx->{is_bound}) {
2390	push @$fields, $$v_ref;
2391	}
2392	if ($ctx->{keep_meta_info} and $fflags) {
2393	push @$fflags, $ctx->{flag};
2394	}
2395	return 1;
2396	}
2397	if ($ctx->{flag} & IS_QUOTED) {
2398	# ,1,"foo\n 3",,bar,
2399	# ^
2400	$ctx->{flag} \|= IS_BINARY;
2401	unless ($ctx->{binary}) {
2402	$self->__error_inside_quotes($ctx, 2021);
2403	return;
2404	}
2405	$$v_ref .= $c;
2406	}
2407	elsif ($ctx->{verbatim}) {
2408	# ,1,foo\n 3,,bar,
2409	# This feature should be deprecated
2410	$ctx->{flag} \|= IS_BINARY;
2411	unless ($ctx->{binary}) {
2412	$self->__error_inside_field($ctx, 2030);
2413	return;
2414	}
2415	$$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
2416	}
2417	else {
2418	# sep=,
2419	# ^
2420	if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
2421	$ctx->{sep} = $1;
2422	use bytes;
2423	my $len = length $ctx->{sep};
2424	if ($len <= 16) {
2425	$ctx->{sep_len} = $len == 1 ? 0 : $len;
2426	return $self->____parse($ctx, $src, $fields, $fflags);
2427	}
2428	}
2429
2430	# ,1,"foo\n 3",,bar
2431	# ^
2432	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2433	return 1;
2434	}
2435	}
2436	elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
2437	if ($waitingForField) {
2438	$waitingForField = 0;
2439	if ($ctx->{eol_is_cr}) {
2440	# ,1,"foo\n 3",,bar,\r
2441	# ^
2442	$c = "\012";
2443	goto RESTART;
2444	}
2445
2446	my $c2 = $self->__get($ctx);
2447	if (!defined $c2) { # EOF
2448	# ,1,"foo\n 3",,bar,\r
2449	# ^
2450	$c = undef;
2451	goto RESTART;
2452	}
2453	if ($c2 eq "\012") { # \r is not optional before EOLX!
2454	# ,1,"foo\n 3",,bar,\r\n
2455	# ^
2456	$c = $c2;
2457	goto RESTART;
2458	}
2459
2460	if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2461	# ,1,"foo\n 3",,bar,\r
2462	# baz,4
2463	# ^
2464	$self->__set_eol_is_cr($ctx);
2465	$ctx->{used}--;
2466	$ctx->{has_ahead} = 1;
2467	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2468	return 1;
2469	}
2470
2471	# ,1,"foo\n 3",,bar,\r\t
2472	# ^
2473	$ctx->{used}--;
2474	$self->__error_inside_field($ctx, 2031);
2475	return;
2476	}
2477	if ($ctx->{flag} & IS_QUOTED) {
2478	# ,1,"foo\r 3",,bar,\r\t
2479	# ^
2480	$ctx->{flag} \|= IS_BINARY;
2481	unless ($ctx->{binary}) {
2482	$self->__error_inside_quotes($ctx, 2022);
2483	return;
2484	}
2485	$$v_ref .= $c;
2486	}
2487	else {
2488	if ($ctx->{eol_is_cr}) {
2489	# ,1,"foo\n 3",,bar\r
2490	# ^
2491	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2492	return 1;
2493	}
2494
2495	my $c2 = $self->__get($ctx);
2496	if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
2497	# ,1,"foo\n 3",,bar\r\n
2498	# ^
2499	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2500	return 1;
2501	}
2502
2503	if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
2504	# ,1,"foo\n 3",,bar\r
2505	# baz,4
2506	# ^
2507	$self->__set_eol_is_cr($ctx);
2508	$ctx->{used}--;
2509	$ctx->{has_ahead} = 1;
2510	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2511	return 1;
2512	}
2513
2514	# ,1,"foo\n 3",,bar\r\t
2515	# ^
2516	$self->__error_inside_field($ctx, 2032);
2517	return;
2518	}
2519	}
2520	else {
2521	if ($ctx->{eolx} and $c eq $eol) {
2522	$c = '';
2523	goto EOLX;
2524	}
2525
2526	if ($waitingForField) {
2527	if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
2528	do {
2529	$c = $self->__get($ctx);
2530	last if !defined $c;
2531	} while $self->__is_whitespace($ctx, $c);
2532	goto RESTART;
2533	}
2534	$waitingForField = 0;
2535	goto RESTART;
2536	}
2537	if ($ctx->{flag} & IS_QUOTED) {
2538	if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2539	$ctx->{flag} \|= IS_BINARY;
2540	unless ($ctx->{binary} or $ctx->{utf8}) {
2541	$self->__error_inside_quotes($ctx, 2026);
2542	return;
2543	}
2544	}
2545	$$v_ref .= $c;
2546	} else {
2547	if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
2548	$ctx->{flag} \|= IS_BINARY;
2549	unless ($ctx->{binary} or $ctx->{utf8}) {
2550	$self->__error_inside_field($ctx, 2037);
2551	return;
2552	}
2553	}
2554	$$v_ref .= $c;
2555	}
2556	}
2557	last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
2558	}
2559	}
2560
2561	if ($waitingForField) {
2562	if ($seenSomething or !$ctx->{useIO}) {
2563	# new field
2564	if (!$v_ref) {
2565	if ($ctx->{is_bound}) {
2566	$v_ref = $self->__bound_field($ctx, $fnum, 0);
2567	} else {
2568	$value = '';
2569	$v_ref = \$value;
2570	}
2571	$fnum++;
2572	return unless $v_ref;
2573	$ctx->{flag} = 0;
2574	$ctx->{fld_idx}++;
2575	}
2576	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
2577	$$v_ref = undef;
2578	} else {
2579	$$v_ref = "";
2580	}
2581	unless ($ctx->{is_bound}) {
2582	push @$fields, $$v_ref;
2583	}
2584	if ($ctx->{keep_meta_info} and $fflags) {
2585	push @$fflags, $ctx->{flag};
2586	}
2587	return 1;
2588	}
2589	$self->SetDiag(2012);
2590	return;
2591	}
2592
2593	if ($ctx->{flag} & IS_QUOTED) {
2594	$self->__error_inside_quotes($ctx, 2027);
2595	return;
2596	}
2597
2598	if ($v_ref) {
2599	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
2600	}
2601	return 1;
2602	}
2603
2604	sub __get_from_src {
2605	my ($self, $ctx, $src) = @_;
2606	return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
2607	return 1 if $ctx->{used} < $ctx->{size};
2608	return unless $ctx->{useIO};
2609	my $res = $src->getline;
2610	if (defined $res) {
2611	if ($ctx->{has_ahead}) {
2612	$ctx->{tmp} = $self->{_AHEAD};
2613	$ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
2614	$ctx->{tmp} .= $res;
2615	$ctx->{has_ahead} = 0;
2616	} else {
2617	$ctx->{tmp} = $res;
2618	}
2619	if ($ctx->{size} = length $ctx->{tmp}) {
2620	$ctx->{used} = -1;
2621	$ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2622	pos($ctx->{tmp}) = 0;
2623	return 1;
2624	}
2625	} elsif (delete $ctx->{has_leftover}) {
2626	$ctx->{tmp} = $self->{_AHEAD};
2627	$ctx->{has_ahead} = 0;
2628	$ctx->{useIO} \|= useIO_EOF;
2629	if ($ctx->{size} = length $ctx->{tmp}) {
2630	$ctx->{used} = -1;
2631	$ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
2632	pos($ctx->{tmp}) = 0;
2633	return 1;
2634	}
2635	}
2636	$ctx->{tmp} = '' unless defined $ctx->{tmp};
2637	$ctx->{useIO} \|= useIO_EOF;
2638	return;
2639	}
2640
2641	sub __set_eol_is_cr {
2642	my ($self, $ctx) = @_;
2643	$ctx->{eol} = "\015";
2644	$ctx->{eol_is_cr} = 1;
2645	$ctx->{eol_len} = 1;
2646	%{$self->{_CACHE}} = %$ctx;
2647
2648	$self->{eol} = $ctx->{eol};
2649	}
2650
2651	sub __bound_field {
2652	my ($self, $ctx, $i, $keep) = @_;
2653	if ($i >= $ctx->{is_bound}) {
2654	$self->SetDiag(3006);
2655	return;
2656	}
2657	if (ref $ctx->{bound} eq 'ARRAY') {
2658	my $ref = $ctx->{bound}[$i];
2659	if (ref $ref) {
2660	if ($keep) {
2661	return $ref;
2662	}
2663	unless (Scalar::Util::readonly($$ref)) {
2664	$$ref = "";
2665	return $ref;
2666	}
2667	}
2668	}
2669	$self->SetDiag(3008);
2670	return;
2671	}
2672
2673	sub __get {
2674	my ($self, $ctx) = @_;
2675	return unless defined $ctx->{used};
2676	return if $ctx->{used} >= $ctx->{size};
2677	my $pos = pos($ctx->{tmp});
2678	if ($ctx->{tmp} =~ /\G($ctx->{_re}\|.)/gs) {
2679	my $c = $1;
2680	if ($c =~ /[^\x09\x20-\x7e]/) {
2681	$ctx->{flag} \|= IS_BINARY;
2682	}
2683	$ctx->{used} = pos($ctx->{tmp});
2684	return $c;
2685	} else {
2686	pos($ctx->{tmp}) = $pos;
2687	return;
2688	}
2689	}
2690
2691	sub __error_inside_quotes {
2692	my ($self, $ctx, $error) = @_;
2693	$self->__parse_error($ctx, $error, $ctx->{used} - 1);
2694	}
2695
2696	sub __error_inside_field {
2697	my ($self, $ctx, $error) = @_;
2698	$self->__parse_error($ctx, $error, $ctx->{used} - 1);
2699	}
2700
2701	sub __parse_error {
2702	my ($self, $ctx, $error, $pos) = @_;
2703	$self->{_ERROR_POS} = $pos;
2704	$self->{_ERROR_FLD} = $ctx->{fld_idx};
2705	$self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
2706	$self->SetDiag($error);
2707	return;
2708	}
2709
2710	sub __is_whitespace {
2711	my ($self, $ctx, $c) = @_;
2712	return unless defined $c;
2713	return (
2714	(!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
2715	(!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
2716	(!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
2717	($c eq " " or $c eq "\t")
2718	);
2719	}
2720
2721	sub __push_value { # AV_PUSH (part of)
2722	my ($self, $ctx, $v_ref, $fields, $fflags, $flag, $fnum) = @_;
2723	utf8::encode($$v_ref) if $ctx->{utf8};
2724	if ($ctx->{formula} && $$v_ref && substr($$v_ref, 0, 1) eq '=') {
2725	my $value = $self->_formula($ctx, $$v_ref, $fnum);
2726	push @$fields, defined $value ? $value : undef;
2727	return;
2728	}
2729	if (
2730	(!defined $$v_ref or $$v_ref eq '') and
2731	($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
2732	) {
2733	$$v_ref = undef;
2734	} else {
2735	if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
2736	$$v_ref =~ s/[ \t]+$//;
2737	}
2738	if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} \|\| _is_valid_utf8($$v_ref))) {
2739	utf8::decode($$v_ref);
2740	}
2741	}
2742	unless ($ctx->{is_bound}) {
2743	push @$fields, $$v_ref;
2744	}
2745	if ($ctx->{keep_meta_info} and $fflags) {
2746	push @$fflags, $flag;
2747	}
2748	}
2749
2750	sub getline {
2751	my ($self, $io) = @_;
2752
2753	my (@fields, @fflags);
2754	my $res = $self->__parse(\@fields, \@fflags, $io, 1);
2755	$res ? \@fields : undef;
2756	}
2757
2758	sub getline_all {
2759	my ( $self, $io, $offset, $len ) = @_;
2760
2761	my $ctx = $self->_setup_ctx;
2762
2763	my $tail = 0;
2764	my $n = 0;
2765	$offset \|\|= 0;
2766
2767	if ( $offset < 0 ) {
2768	$tail = -$offset;
2769	$offset = -1;
2770	}
2771
2772	my (@row, @list);
2773	while ($self->___parse($ctx, \@row, undef, $io, 1)) {
2774	$ctx = $self->_setup_ctx;
2775
2776	if ($offset > 0) {
2777	$offset--;
2778	@row = ();
2779	next;
2780	}
2781	if ($n++ >= $tail and $tail) {
2782	shift @list;
2783	$n--;
2784	}
2785	if (($ctx->{has_hooks} \|\| 0) & HOOK_AFTER_PARSE) {
2786	unless ($self->_hook(after_parse => \@row)) {
2787	@row = ();
2788	next;
2789	}
2790	}
2791	push @list, [@row];
2792	@row = ();
2793
2794	last if defined $len && $n >= $len and $offset >= 0; # exceeds limit size
2795	}
2796
2797	if ( defined $len && $n > $len ) {
2798	@list = splice( @list, 0, $len);
2799	}
2800
2801	return \@list;
2802	}
2803
2804	sub _is_valid_utf8 {
2805	return ( $_[0] =~ /^(?:
2806	[\x00-\x7F]
2807	\|[\xC2-\xDF][\x80-\xBF]
2808	\|[\xE0][\xA0-\xBF][\x80-\xBF]
2809	\|[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
2810	\|[\xED][\x80-\x9F][\x80-\xBF]
2811	\|[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
2812	\|[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
2813	\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
2814	\|[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
2815	)+$/x ) ? 1 : 0;
2816	}
2817
2818	################################################################################
2819	# methods for errors
2820	################################################################################
2821
2822	sub _set_error_diag {
2823	my ( $self, $error, $pos ) = @_;
2824
2825	$self->SetDiag($error);
2826
2827	if (defined $pos) {
2828	$_[0]->{_ERROR_POS} = $pos;
2829	}
2830
2831	return;
2832	}
2833
2834	sub error_input {
2835	my $self = shift;
2836	if ($self and ((Scalar::Util::reftype($self) \|\| '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
2837	return $self->{_ERROR_INPUT};
2838	}
2839	return;
2840	}
2841
2842	sub _sv_diag {
2843	my ($self, $error) = @_;
2844	bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
2845	}
2846
2847	sub _set_diag {
2848	my ($self, $ctx, $error) = @_;
2849
2850	$last_error = $self->_sv_diag($error);
2851	$self->{_ERROR_DIAG} = $last_error;
2852	if ($error == 0) {
2853	$self->{_ERROR_POS} = 0;
2854	$self->{_ERROR_FLD} = 0;
2855	$self->{_ERROR_INPUT} = undef;
2856	$ctx->{has_error_input} = 0;
2857	}
2858	if ($error == 2012) { # EOF
2859	$self->{_EOF} = 1;
2860	}
2861	if ($ctx->{auto_diag}) {
2862	$self->error_diag;
2863	}
2864	return $last_error;
2865	}
2866
2867	sub SetDiag {
2868	my ($self, $error, $errstr) = @_;
2869	my $res;
2870	if (ref $self) {
2871	my $ctx = $self->_setup_ctx;
2872	$res = $self->_set_diag($ctx, $error);
2873
2874	} else {
2875	$res = $self->_sv_diag($error);
2876	}
2877	if (defined $errstr) {
2878	$res->[1] = $errstr;
2879	}
2880	$res;
2881	}
2882
2883	################################################################################
2884	package Text::CSV::ErrorDiag;
2885
2886	use strict;
2887	use overload (
2888	'""' => \&stringify,
2889	'+' => \&numeric,
2890	'-' => \&numeric,
2891	'*' => \&numeric,
2892	'/' => \&numeric,
2893	fallback => 1,
2894	);
2895
2896
2897	sub numeric {
2898	my ($left, $right) = @_;
2899	return ref $left ? $left->[0] : $right->[0];
2900	}
2901
2902
2903	sub stringify {
2904	$_[0]->[1];
2905	}
2906	################################################################################
2907	1;
2908	__END__
2909
2910	=head1 NAME
2911
2912	Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
2913
2914
2915	=head1 SYNOPSIS
2916
2917	This section is taken from Text::CSV_XS.
2918
2919	# Functional interface
2920	use Text::CSV_PP qw( csv );
2921
2922	# Read whole file in memory
2923	my $aoa = csv (in => "data.csv"); # as array of array
2924	my $aoh = csv (in => "data.csv",
2925	headers => "auto"); # as array of hash
2926
2927	# Write array of arrays as csv file
2928	csv (in => $aoa, out => "file.csv", sep_char=> ";");
2929
2930	# Only show lines where "code" is odd
2931	csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
2932
2933	# Object interface
2934	use Text::CSV_PP;
2935
2936	my @rows;
2937	# Read/parse CSV
2938	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
2939	open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
2940	while (my $row = $csv->getline ($fh)) {
2941	$row->[2] =~ m/pattern/ or next; # 3rd field should match
2942	push @rows, $row;
2943	}
2944	close $fh;
2945
2946	# and write as CSV
2947	open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
2948	$csv->say ($fh, $_) for @rows;
2949	close $fh or die "new.csv: $!";
2950
2951	=head1 DESCRIPTION
2952
2953	Text::CSV_PP is a pure-perl module that provides facilities for the
2954	composition and decomposition of comma-separated values. This is
2955	(almost) compatible with much faster L<Text::CSV_XS>, and mainly
2956	used as its fallback module when you use L<Text::CSV> module without
2957	having installed Text::CSV_XS. If you don't have any reason to use
2958	this module directly, use Text::CSV for speed boost and portability
2959	(or maybe Text::CSV_XS when you write an one-off script and don't need
2960	to care about portability).
2961
2962	The following caveats are taken from the doc of Text::CSV_XS.
2963
2964	=head2 Embedded newlines
2965
2966	B<Important Note>: The default behavior is to accept only ASCII characters
2967	in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
2968	fields can not contain newlines. If your data contains newlines embedded in
2969	fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
2970	set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
2971	parsing options, you will always want to set binary.
2972
2973	But you still have the problem that you have to pass a correct line to the
2974	L</parse> method, which is more complicated from the usual point of usage:
2975
2976	my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
2977	while (<>) { # WRONG!
2978	$csv->parse ($_);
2979	my @fields = $csv->fields ();
2980	}
2981
2982	this will break, as the C<while> might read broken lines: it does not care
2983	about the quoting. If you need to support embedded newlines, the way to go
2984	is to B<not> pass L<C<eol>\|/eol> in the parser (it accepts C<\n>, C<\r>,
2985	B<and> C<\r\n> by default) and then
2986
2987	my $csv = Text::CSV_PP->new ({ binary => 1 });
2988	open my $fh, "<", $file or die "$file: $!";
2989	while (my $row = $csv->getline ($fh)) {
2990	my @fields = @$row;
2991	}
2992
2993	The old(er) way of using global file handles is still supported
2994
2995	while (my $row = $csv->getline (*ARGV)) { ... }
2996
2997	=head2 Unicode
2998
2999	Unicode is only tested to work with perl-5.8.2 and up.
3000
3001	See also L</BOM>.
3002
3003	The simplest way to ensure the correct encoding is used for in- and output
3004	is by either setting layers on the filehandles, or setting the L</encoding>
3005	argument for L</csv>.
3006
3007	open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
3008	or
3009	my $aoa = csv (in => "in.csv", encoding => "UTF-8");
3010
3011	open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
3012	or
3013	csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
3014
3015	On parsing (both for L</getline> and L</parse>), if the source is marked
3016	being UTF8, then all fields that are marked binary will also be marked UTF8.
3017
3018	On combining (L</print> and L</combine>): if any of the combining fields
3019	was marked UTF8, the resulting string will be marked as UTF8. Note however
3020	that all fields I<before> the first field marked UTF8 and contained 8-bit
3021	characters that were not upgraded to UTF8, these will be C<bytes> in the
3022	resulting string too, possibly causing unexpected errors. If you pass data
3023	of different encoding, or you don't know if there is different encoding,
3024	force it to be upgraded before you pass them on:
3025
3026	$csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
3027
3028	For complete control over encoding, please use L<Text::CSV::Encoded>:
3029
3030	use Text::CSV::Encoded;
3031	my $csv = Text::CSV::Encoded->new ({
3032	encoding_in => "iso-8859-1", # the encoding comes into Perl
3033	encoding_out => "cp1252", # the encoding comes out of Perl
3034	});
3035
3036	$csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
3037	# combine () and print () accept literally utf8 encoded data
3038	# parse () and getline () return literally utf8 encoded data
3039
3040	$csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
3041	# combine () and print () accept UTF8 marked data
3042	# parse () and getline () return UTF8 marked data
3043
3044	=head2 BOM
3045
3046	BOM (or Byte Order Mark) handling is available only inside the L</header>
3047	method. This method supports the following encodings: C<utf-8>, C<utf-1>,
3048	C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
3049	C<bocu-1>, and C<gb-18030>. See L<Wikipedia\|https://en.wikipedia.org/wiki/Byte_order_mark>.
3050
3051	If a file has a BOM, the easiest way to deal with that is
3052
3053	my $aoh = csv (in => $file, detect_bom => 1);
3054
3055	All records will be encoded based on the detected BOM.
3056
3057	This implies a call to the L</header> method, which defaults to also set
3058	the L</column_names>. So this is B<not> the same as
3059
3060	my $aoh = csv (in => $file, headers => "auto");
3061
3062	which only reads the first record to set L</column_names> but ignores any
3063	meaning of possible present BOM.
3064
3065	=head1 METHODS
3066
3067	This section is also taken from Text::CSV_XS.
3068
3069	=head2 version
3070
3071	(Class method) Returns the current module version.
3072
3073	=head2 new
3074
3075	(Class method) Returns a new instance of class Text::CSV_PP. The attributes
3076	are described by the (optional) hash ref C<\%attr>.
3077
3078	my $csv = Text::CSV_PP->new ({ attributes ... });
3079
3080	The following attributes are available:
3081
3082	=head3 eol
3083
3084	my $csv = Text::CSV_PP->new ({ eol => $/ });
3085	$csv->eol (undef);
3086	my $eol = $csv->eol;
3087
3088	The end-of-line string to add to rows for L</print> or the record separator
3089	for L</getline>.
3090
3091	When not passed in a B<parser> instance, the default behavior is to accept
3092	C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
3093	all. Passing C<undef> or the empty string behave the same.
3094
3095	When not passed in a B<generating> instance, records are not terminated at
3096	all, so it is probably wise to pass something you expect. A safe choice for
3097	C<eol> on output is either C<$/> or C<\r\n>.
3098
3099	Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
3100	(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
3101	Return). The L<C<eol>\|/eol> attribute cannot exceed 7 (ASCII) characters.
3102
3103	If both C<$/> and L<C<eol>\|/eol> equal C<"\015">, parsing lines that end on
3104	only a Carriage Return without Line Feed, will be L</parse>d correct.
3105
3106	=head3 sep_char
3107
3108	my $csv = Text::CSV_PP->new ({ sep_char => ";" });
3109	$csv->sep_char (";");
3110	my $c = $csv->sep_char;
3111
3112	The char used to separate fields, by default a comma. (C<,>). Limited to a
3113	single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
3114	(tilde). When longer sequences are required, use L<C<sep>\|/sep>.
3115
3116	The separation character can not be equal to the quote character or to the
3117	escape character.
3118
3119	=head3 sep
3120
3121	my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
3122	$csv->sep (";");
3123	my $sep = $csv->sep;
3124
3125	The chars used to separate fields, by default undefined. Limited to 8 bytes.
3126
3127	When set, overrules L<C<sep_char>\|/sep_char>. If its length is one byte it
3128	acts as an alias to L<C<sep_char>\|/sep_char>.
3129
3130	=head3 quote_char
3131
3132	my $csv = Text::CSV_PP->new ({ quote_char => "'" });
3133	$csv->quote_char (undef);
3134	my $c = $csv->quote_char;
3135
3136	The character to quote fields containing blanks or binary data, by default
3137	the double quote character (C<">). A value of undef suppresses quote chars
3138	(for simple cases only). Limited to a single-byte character, usually in the
3139	range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
3140	required, use L<C<quote>\|/quote>.
3141
3142	C<quote_char> can not be equal to L<C<sep_char>\|/sep_char>.
3143
3144	=head3 quote
3145
3146	my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
3147	$csv->quote ("'");
3148	my $quote = $csv->quote;
3149
3150	The chars used to quote fields, by default undefined. Limited to 8 bytes.
3151
3152	When set, overrules L<C<quote_char>\|/quote_char>. If its length is one byte
3153	it acts as an alias to L<C<quote_char>\|/quote_char>.
3154
3155	=head3 escape_char
3156
3157	my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3158	$csv->escape_char (":");
3159	my $c = $csv->escape_char;
3160
3161	The character to escape certain characters inside quoted fields. This is
3162	limited to a single-byte character, usually in the range from C<0x20>
3163	(space) to C<0x7E> (tilde).
3164
3165	The C<escape_char> defaults to being the double-quote mark (C<">). In other
3166	words the same as the default L<C<quote_char>\|/quote_char>. This means that
3167	doubling the quote mark in a field escapes it:
3168
3169	"foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
3170
3171	If you change the L<C<quote_char>\|/quote_char> without changing the
3172	C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
3173	If instead you want to escape the L<C<quote_char>\|/quote_char> by doubling
3174	it you will need to also change the C<escape_char> to be the same as what
3175	you have changed the L<C<quote_char>\|/quote_char> to.
3176
3177	Setting C<escape_char> to <undef> or C<""> will disable escaping completely
3178	and is greatly discouraged. This will also disable C<escape_null>.
3179
3180	The escape character can not be equal to the separation character.
3181
3182	=head3 binary
3183
3184	my $csv = Text::CSV_PP->new ({ binary => 1 });
3185	$csv->binary (0);
3186	my $f = $csv->binary;
3187
3188	If this attribute is C<1>, you may use binary characters in quoted fields,
3189	including line feeds, carriage returns and C<NULL> bytes. (The latter could
3190	be escaped as C<"0>.) By default this feature is off.
3191
3192	If a string is marked UTF8, C<binary> will be turned on automatically when
3193	binary characters other than C<CR> and C<NL> are encountered. Note that a
3194	simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
3195	so setting C<< { binary => 1 } >> is still a wise option.
3196
3197	=head3 strict
3198
3199	my $csv = Text::CSV_PP->new ({ strict => 1 });
3200	$csv->strict (0);
3201	my $f = $csv->strict;
3202
3203	If this attribute is set to C<1>, any row that parses to a different number
3204	of fields than the previous row will cause the parser to throw error 2014.
3205
3206	=head3 formula_handling
3207
3208	=head3 formula
3209
3210	my $csv = Text::CSV_PP->new ({ formula => "none" });
3211	$csv->formula ("none");
3212	my $f = $csv->formula;
3213
3214	This defines the behavior of fields containing I<formulas>. As formulas are
3215	considered dangerous in spreadsheets, this attribute can define an optional
3216	action to be taken if a field starts with an equal sign (C<=>).
3217
3218	For purpose of code-readability, this can also be written as
3219
3220	my $csv = Text::CSV_PP->new ({ formula_handling => "none" });
3221	$csv->formula_handling ("none");
3222	my $f = $csv->formula_handling;
3223
3224	Possible values for this attribute are
3225
3226	=over 2
3227
3228	=item none
3229
3230	Take no specific action. This is the default.
3231
3232	$csv->formula ("none");
3233
3234	=item die
3235
3236	Cause the process to C<die> whenever a leading C<=> is encountered.
3237
3238	$csv->formula ("die");
3239
3240	=item croak
3241
3242	Cause the process to C<croak> whenever a leading C<=> is encountered. (See
3243	L<Carp>)
3244
3245	$csv->formula ("croak");
3246
3247	=item diag
3248
3249	Report position and content of the field whenever a leading C<=> is found.
3250	The value of the field is unchanged.
3251
3252	$csv->formula ("diag");
3253
3254	=item empty
3255
3256	Replace the content of fields that start with a C<=> with the empty string.
3257
3258	$csv->formula ("empty");
3259	$csv->formula ("");
3260
3261	=item undef
3262
3263	Replace the content of fields that start with a C<=> with C<undef>.
3264
3265	$csv->formula ("undef");
3266	$csv->formula (undef);
3267
3268	=back
3269
3270	All other values will give a warning and then fallback to C<diag>.
3271
3272	=head3 decode_utf8
3273
3274	my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
3275	$csv->decode_utf8 (0);
3276	my $f = $csv->decode_utf8;
3277
3278	This attributes defaults to TRUE.
3279
3280	While I<parsing>, fields that are valid UTF-8, are automatically set to be
3281	UTF-8, so that
3282
3283	$csv->parse ("\xC4\xA8\n");
3284
3285	results in
3286
3287	PV("\304\250"\0) [UTF8 "\x{128}"]
3288
3289	Sometimes it might not be a desired action. To prevent those upgrades, set
3290	this attribute to false, and the result will be
3291
3292	PV("\304\250"\0)
3293
3294	=head3 auto_diag
3295
3296	my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
3297	$csv->auto_diag (2);
3298	my $l = $csv->auto_diag;
3299
3300	Set this attribute to a number between C<1> and C<9> causes L</error_diag>
3301	to be automatically called in void context upon errors.
3302
3303	In case of error C<2012 - EOF>, this call will be void.
3304
3305	If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
3306	on errors instead of C<warn>. If set to anything unrecognized, it will be
3307	silently ignored.
3308
3309	Future extensions to this feature will include more reliable auto-detection
3310	of C<autodie> being active in the scope of which the error occurred which
3311	will increment the value of C<auto_diag> with C<1> the moment the error is
3312	detected.
3313
3314	=head3 diag_verbose
3315
3316	my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
3317	$csv->diag_verbose (2);
3318	my $l = $csv->diag_verbose;
3319
3320	Set the verbosity of the output triggered by C<auto_diag>. Currently only
3321	adds the current input-record-number (if known) to the diagnostic output
3322	with an indication of the position of the error.
3323
3324	=head3 blank_is_undef
3325
3326	my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
3327	$csv->blank_is_undef (0);
3328	my $f = $csv->blank_is_undef;
3329
3330	Under normal circumstances, C<CSV> data makes no distinction between quoted-
3331	and unquoted empty fields. These both end up in an empty string field once
3332	read, thus
3333
3334	1,"",," ",2
3335
3336	is read as
3337
3338	("1", "", "", " ", "2")
3339
3340	When I<writing> C<CSV> files with either L<C<always_quote>\|/always_quote>
3341	or L<C<quote_empty>\|/quote_empty> set, the unquoted I<empty> field is the
3342	result of an undefined value. To enable this distinction when I<reading>
3343	C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
3344	fields to be set to C<undef>, causing the above to be parsed as
3345
3346	("1", "", undef, " ", "2")
3347
3348	note that this is specifically important when loading C<CSV> fields into a
3349	database that allows C<NULL> values, as the perl equivalent for C<NULL> is
3350	C<undef> in L<DBI> land.
3351
3352	=head3 empty_is_undef
3353
3354	my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
3355	$csv->empty_is_undef (0);
3356	my $f = $csv->empty_is_undef;
3357
3358	Going one step further than L<C<blank_is_undef>\|/blank_is_undef>, this
3359	attribute converts all empty fields to C<undef>, so
3360
3361	1,"",," ",2
3362
3363	is read as
3364
3365	(1, undef, undef, " ", 2)
3366
3367	Note that this effects only fields that are originally empty, not fields
3368	that are empty after stripping allowed whitespace. YMMV.
3369
3370	=head3 allow_whitespace
3371
3372	my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
3373	$csv->allow_whitespace (0);
3374	my $f = $csv->allow_whitespace;
3375
3376	When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
3377	surrounding the separation character is removed when parsing. If either
3378	C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>\|/sep_char>,
3379	L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> it will not
3380	be considered whitespace.
3381
3382	Now lines like:
3383
3384	1 , "foo" , bar , 3 , zapp
3385
3386	are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
3387
3388	Note that B<all> whitespace is stripped from both start and end of each
3389	field. That would make it I<more> than a I<feature> to enable parsing bad
3390	C<CSV> lines, as
3391
3392	1, 2.0, 3, ape , monkey
3393
3394	will now be parsed as
3395
3396	("1", "2.0", "3", "ape", "monkey")
3397
3398	even if the original line was perfectly acceptable C<CSV>.
3399
3400	=head3 allow_loose_quotes
3401
3402	my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
3403	$csv->allow_loose_quotes (0);
3404	my $f = $csv->allow_loose_quotes;
3405
3406	By default, parsing unquoted fields containing L<C<quote_char>\|/quote_char>
3407	characters like
3408
3409	1,foo "bar" baz,42
3410
3411	would result in parse error 2034. Though it is still bad practice to allow
3412	this format, we cannot help the fact that some vendors make their
3413	applications spit out lines styled this way.
3414
3415	If there is B<really> bad C<CSV> data, like
3416
3417	1,"foo "bar" baz",42
3418
3419	or
3420
3421	1,""foo bar baz"",42
3422
3423	there is a way to get this data-line parsed and leave the quotes inside the
3424	quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
3425	B<AND> making sure that the L<C<escape_char>\|/escape_char> is I<not> equal
3426	to L<C<quote_char>\|/quote_char>.
3427
3428	=head3 allow_loose_escapes
3429
3430	my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
3431	$csv->allow_loose_escapes (0);
3432	my $f = $csv->allow_loose_escapes;
3433
3434	Parsing fields that have L<C<escape_char>\|/escape_char> characters that
3435	escape characters that do not need to be escaped, like:
3436
3437	my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
3438	$csv->parse (qq{1,"my bar\'s",baz,42});
3439
3440	would result in parse error 2025. Though it is bad practice to allow this
3441	format, this attribute enables you to treat all escape character sequences
3442	equal.
3443
3444	=head3 allow_unquoted_escape
3445
3446	my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
3447	$csv->allow_unquoted_escape (0);
3448	my $f = $csv->allow_unquoted_escape;
3449
3450	A backward compatibility issue where L<C<escape_char>\|/escape_char> differs
3451	from L<C<quote_char>\|/quote_char> prevents L<C<escape_char>\|/escape_char>
3452	to be in the first position of a field. If L<C<quote_char>\|/quote_char> is
3453	equal to the default C<"> and L<C<escape_char>\|/escape_char> is set to C<\>,
3454	this would be illegal:
3455
3456	1,\0,2
3457
3458	Setting this attribute to C<1> might help to overcome issues with backward
3459	compatibility and allow this style.
3460
3461	=head3 always_quote
3462
3463	my $csv = Text::CSV_PP->new ({ always_quote => 1 });
3464	$csv->always_quote (0);
3465	my $f = $csv->always_quote;
3466
3467	By default the generated fields are quoted only if they I<need> to be. For
3468	example, if they contain the separator character. If you set this attribute
3469	to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
3470	quoted, see L</blank_is_undef>). This makes it quite often easier to handle
3471	exported data in external applications.
3472
3473	=head3 quote_space
3474
3475	my $csv = Text::CSV_PP->new ({ quote_space => 1 });
3476	$csv->quote_space (0);
3477	my $f = $csv->quote_space;
3478
3479	By default, a space in a field would trigger quotation. As no rule exists
3480	this to be forced in C<CSV>, nor any for the opposite, the default is true
3481	for safety. You can exclude the space from this trigger by setting this
3482	attribute to 0.
3483
3484	=head3 quote_empty
3485
3486	my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
3487	$csv->quote_empty (0);
3488	my $f = $csv->quote_empty;
3489
3490	By default the generated fields are quoted only if they I<need> to be. An
3491	empty (defined) field does not need quotation. If you set this attribute to
3492	C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
3493	quoted, see L</blank_is_undef>). See also L<C<always_quote>\|/always_quote>.
3494
3495	=head3 quote_binary
3496
3497	my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
3498	$csv->quote_binary (0);
3499	my $f = $csv->quote_binary;
3500
3501	By default, all "unsafe" bytes inside a string cause the combined field to
3502	be quoted. By setting this attribute to C<0>, you can disable that trigger
3503	for bytes >= C<0x7F>.
3504
3505	=head3 escape_null
3506
3507	my $csv = Text::CSV_PP->new ({ escape_null => 1 });
3508	$csv->escape_null (0);
3509	my $f = $csv->escape_null;
3510
3511	By default, a C<NULL> byte in a field would be escaped. This option enables
3512	you to treat the C<NULL> byte as a simple binary character in binary mode
3513	(the C<< { binary => 1 } >> is set). The default is true. You can prevent
3514	C<NULL> escapes by setting this attribute to C<0>.
3515
3516	When the C<escape_char> attribute is set to undefined, this attribute will
3517	be set to false.
3518
3519	The default setting will encode "=\x00=" as
3520
3521	"="0="
3522
3523	With C<escape_null> set, this will result in
3524
3525	"=\x00="
3526
3527	The default when using the C<csv> function is C<false>.
3528
3529	For backward compatibility reasons, the deprecated old name C<quote_null>
3530	is still recognized.
3531
3532	=head3 keep_meta_info
3533
3534	my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
3535	$csv->keep_meta_info (0);
3536	my $f = $csv->keep_meta_info;
3537
3538	By default, the parsing of input records is as simple and fast as possible.
3539	However, some parsing information - like quotation of the original field -
3540	is lost in that process. Setting this flag to true enables retrieving that
3541	information after parsing with the methods L</meta_info>, L</is_quoted>,
3542	and L</is_binary> described below. Default is false for performance.
3543
3544	If you set this attribute to a value greater than 9, than you can control
3545	output quotation style like it was used in the input of the the last parsed
3546	record (unless quotation was added because of other reasons).
3547
3548	my $csv = Text::CSV_PP->new ({
3549	binary => 1,
3550	keep_meta_info => 1,
3551	quote_space => 0,
3552	});
3553
3554	my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
3555
3556	$csv->print (*STDOUT, \@row);
3557	# 1,,, , ,f,g,"h""h",help,help
3558	$csv->keep_meta_info (11);
3559	$csv->print (*STDOUT, \@row);
3560	# 1,,"", ," ",f,"g","h""h",help,"help"
3561
3562	=head3 undef_str
3563
3564	my $csv = Text::CSV_PP->new ({ undef_str => "\\N" });
3565	$csv->undef_str (undef);
3566	my $s = $csv->undef_str;
3567
3568	This attribute optionally defines the output of undefined fields. The value
3569	passed is not changed at all, so if it needs quotation, the quotation needs
3570	to be included in the value of the attribute. Use with caution, as passing
3571	a value like C<",",,,,"""> will for sure mess up your output. The default
3572	for this attribute is C<undef>, meaning no special treatment.
3573
3574	This attribute is useful when exporting CSV data to be imported in custom
3575	loaders, like for MySQL, that recognize special sequences for C<NULL> data.
3576
3577	This attribute has no meaning when parsing CSV data.
3578
3579	=head3 verbatim
3580
3581	my $csv = Text::CSV_PP->new ({ verbatim => 1 });
3582	$csv->verbatim (0);
3583	my $f = $csv->verbatim;
3584
3585	This is a quite controversial attribute to set, but makes some hard things
3586	possible.
3587
3588	The rationale behind this attribute is to tell the parser that the normally
3589	special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
3590	special when this flag is set, and be dealt with as being ordinary binary
3591	characters. This will ease working with data with embedded newlines.
3592
3593	When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
3594	every line.
3595
3596	Imagine a file format like
3597
3598	M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
3599
3600	where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
3601	C<^> (caret). None of the fields is quoted, but embedded binary data is
3602	likely to be present. With the specific line ending, this should not be too
3603	hard to detect.
3604
3605	By default, Text::CSV_PP' parse function is instructed to only know about
3606	C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
3607	embedded newline as a real C<end-of-line>, so it can scan the next line if
3608	binary is true, and the newline is inside a quoted field. With this option,
3609	we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
3610	a binary character.
3611
3612	For L</parse> this means that the parser has no more idea about line ending
3613	and L</getline> C<chomp>s line endings on reading.
3614
3615	=head3 types
3616
3617	A set of column types; the attribute is immediately passed to the L</types>
3618	method.
3619
3620	=head3 callbacks
3621
3622	See the L</Callbacks> section below.
3623
3624	=head3 accessors
3625
3626	To sum it up,
3627
3628	$csv = Text::CSV_PP->new ();
3629
3630	is equivalent to
3631
3632	$csv = Text::CSV_PP->new ({
3633	eol => undef, # \r, \n, or \r\n
3634	sep_char => ',',
3635	sep => undef,
3636	quote_char => '"',
3637	quote => undef,
3638	escape_char => '"',
3639	binary => 0,
3640	decode_utf8 => 1,
3641	auto_diag => 0,
3642	diag_verbose => 0,
3643	blank_is_undef => 0,
3644	empty_is_undef => 0,
3645	allow_whitespace => 0,
3646	allow_loose_quotes => 0,
3647	allow_loose_escapes => 0,
3648	allow_unquoted_escape => 0,
3649	always_quote => 0,
3650	quote_empty => 0,
3651	quote_space => 1,
3652	escape_null => 1,
3653	quote_binary => 1,
3654	keep_meta_info => 0,
3655	strict => 0,
3656	formula => 0,
3657	verbatim => 0,
3658	undef_str => undef,
3659	types => undef,
3660	callbacks => undef,
3661	});
3662
3663	For all of the above mentioned flags, an accessor method is available where
3664	you can inquire the current value, or change the value
3665
3666	my $quote = $csv->quote_char;
3667	$csv->binary (1);
3668
3669	It is not wise to change these settings halfway through writing C<CSV> data
3670	to a stream. If however you want to create a new stream using the available
3671	C<CSV> object, there is no harm in changing them.
3672
3673	If the L</new> constructor call fails, it returns C<undef>, and makes the
3674	fail reason available through the L</error_diag> method.
3675
3676	$csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
3677	die "".Text::CSV_PP->error_diag ();
3678
3679	L</error_diag> will return a string like
3680
3681	"INI - Unknown attribute 'ecs_char'"
3682
3683	=head2 known_attributes
3684
3685	@attr = Text::CSV_PP->known_attributes;
3686	@attr = Text::CSV_PP::known_attributes;
3687	@attr = $csv->known_attributes;
3688
3689	This method will return an ordered list of all the supported attributes as
3690	described above. This can be useful for knowing what attributes are valid
3691	in classes that use or extend Text::CSV_PP.
3692
3693	=head2 print
3694
3695	$status = $csv->print ($fh, $colref);
3696
3697	Similar to L</combine> + L</string> + L</print>, but much more efficient.
3698	It expects an array ref as input (not an array!) and the resulting string
3699	is not really created, but immediately written to the C<$fh> object,
3700	typically an IO handle or any other object that offers a L</print> method.
3701
3702	For performance reasons C<print> does not create a result string, so all
3703	L</string>, L</status>, L</fields>, and L</error_input> methods will return
3704	undefined information after executing this method.
3705
3706	If C<$colref> is C<undef> (explicit, not through a variable argument) and
3707	L</bind_columns> was used to specify fields to be printed, it is possible
3708	to make performance improvements, as otherwise data would have to be copied
3709	as arguments to the method call:
3710
3711	$csv->bind_columns (\($foo, $bar));
3712	$status = $csv->print ($fh, undef);
3713
3714	A short benchmark
3715
3716	my @data = ("aa" .. "zz");
3717	$csv->bind_columns (\(@data));
3718
3719	$csv->print ($fh, [ @data ]); # 11800 recs/sec
3720	$csv->print ($fh, \@data ); # 57600 recs/sec
3721	$csv->print ($fh, undef ); # 48500 recs/sec
3722
3723	=head2 say
3724
3725	$status = $csv->say ($fh, $colref);
3726
3727	Like L<C<print>\|/print>, but L<C<eol>\|/eol> defaults to C<$\>.
3728
3729	=head2 print_hr
3730
3731	$csv->print_hr ($fh, $ref);
3732
3733	Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
3734	provided the column names are set with L</column_names>.
3735
3736	It is just a wrapper method with basic parameter checks over
3737
3738	$csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
3739
3740	=head2 combine
3741
3742	$status = $csv->combine (@fields);
3743
3744	This method constructs a C<CSV> record from C<@fields>, returning success
3745	or failure. Failure can result from lack of arguments or an argument that
3746	contains an invalid character. Upon success, L</string> can be called to
3747	retrieve the resultant C<CSV> string. Upon failure, the value returned by
3748	L</string> is undefined and L</error_input> could be called to retrieve the
3749	invalid argument.
3750
3751	=head2 string
3752
3753	$line = $csv->string ();
3754
3755	This method returns the input to L</parse> or the resultant C<CSV> string
3756	of L</combine>, whichever was called more recently.
3757
3758	=head2 getline
3759
3760	$colref = $csv->getline ($fh);
3761
3762	This is the counterpart to L</print>, as L</parse> is the counterpart to
3763	L</combine>: it parses a row from the C<$fh> handle using the L</getline>
3764	method associated with C<$fh> and parses this row into an array ref. This
3765	array ref is returned by the function or C<undef> for failure. When C<$fh>
3766	does not support C<getline>, you are likely to hit errors.
3767
3768	When fields are bound with L</bind_columns> the return value is a reference
3769	to an empty list.
3770
3771	The L</string>, L</fields>, and L</status> methods are meaningless again.
3772
3773	=head2 getline_all
3774
3775	$arrayref = $csv->getline_all ($fh);
3776	$arrayref = $csv->getline_all ($fh, $offset);
3777	$arrayref = $csv->getline_all ($fh, $offset, $length);
3778
3779	This will return a reference to a list of L<getline ($fh)\|/getline> results.
3780	In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
3781	with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
3782	into consideration.
3783
3784	Given a CSV file with 10 lines:
3785
3786	lines call
3787	----- ---------------------------------------------------------
3788	0..9 $csv->getline_all ($fh) # all
3789	0..9 $csv->getline_all ($fh, 0) # all
3790	8..9 $csv->getline_all ($fh, 8) # start at 8
3791	- $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
3792	0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
3793	4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
3794	8..9 $csv->getline_all ($fh, -2) # last 2 rows
3795	6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
3796
3797	=head2 getline_hr
3798
3799	The L</getline_hr> and L</column_names> methods work together to allow you
3800	to have rows returned as hashrefs. You must call L</column_names> first to
3801	declare your column names.
3802
3803	$csv->column_names (qw( code name price description ));
3804	$hr = $csv->getline_hr ($fh);
3805	print "Price for $hr->{name} is $hr->{price} EUR\n";
3806
3807	L</getline_hr> will croak if called before L</column_names>.
3808
3809	Note that L</getline_hr> creates a hashref for every row and will be much
3810	slower than the combined use of L</bind_columns> and L</getline> but still
3811	offering the same ease of use hashref inside the loop:
3812
3813	my @cols = @{$csv->getline ($fh)};
3814	$csv->column_names (@cols);
3815	while (my $row = $csv->getline_hr ($fh)) {
3816	print $row->{price};
3817	}
3818
3819	Could easily be rewritten to the much faster:
3820
3821	my @cols = @{$csv->getline ($fh)};
3822	my $row = {};
3823	$csv->bind_columns (\@{$row}{@cols});
3824	while ($csv->getline ($fh)) {
3825	print $row->{price};
3826	}
3827
3828	Your mileage may vary for the size of the data and the number of rows. With
3829	perl-5.14.2 the comparison for a 100_000 line file with 14 rows:
3830
3831	Rate hashrefs getlines
3832	hashrefs 1.00/s -- -76%
3833	getlines 4.15/s 313% --
3834
3835	=head2 getline_hr_all
3836
3837	$arrayref = $csv->getline_hr_all ($fh);
3838	$arrayref = $csv->getline_hr_all ($fh, $offset);
3839	$arrayref = $csv->getline_hr_all ($fh, $offset, $length);
3840
3841	This will return a reference to a list of L<getline_hr ($fh)\|/getline_hr>
3842	results. In this call, L<C<keep_meta_info>\|/keep_meta_info> is disabled.
3843
3844	=head2 parse
3845
3846	$status = $csv->parse ($line);
3847
3848	This method decomposes a C<CSV> string into fields, returning success or
3849	failure. Failure can result from a lack of argument or the given C<CSV>
3850	string is improperly formatted. Upon success, L</fields> can be called to
3851	retrieve the decomposed fields. Upon failure calling L</fields> will return
3852	undefined data and L</error_input> can be called to retrieve the invalid
3853	argument.
3854
3855	You may use the L</types> method for setting column types. See L</types>'
3856	description below.
3857
3858	The C<$line> argument is supposed to be a simple scalar. Everything else is
3859	supposed to croak and set error 1500.
3860
3861	=head2 fragment
3862
3863	This function tries to implement RFC7111 (URI Fragment Identifiers for the
3864	text/csv Media Type) - http://tools.ietf.org/html/rfc7111
3865
3866	my $AoA = $csv->fragment ($fh, $spec);
3867
3868	In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
3869	to indicate a range. All indices are C<1>-based: the first row or column
3870	has index C<1>. Selections can be combined with the semi-colon (C<;>).
3871
3872	When using this method in combination with L</column_names>, the returned
3873	reference will point to a list of hashes instead of a list of lists. A
3874	disjointed cell-based combined selection might return rows with different
3875	number of columns making the use of hashes unpredictable.
3876
3877	$csv->column_names ("Name", "Age");
3878	my $AoH = $csv->fragment ($fh, "col=3;8");
3879
3880	If the L</after_parse> callback is active, it is also called on every line
3881	parsed and skipped before the fragment.
3882
3883	=over 2
3884
3885	=item row
3886
3887	row=4
3888	row=5-7
3889	row=6-*
3890	row=1-2;4;6-*
3891
3892	=item col
3893
3894	col=2
3895	col=1-3
3896	col=4-*
3897	col=1-2;4;7-*
3898
3899	=item cell
3900
3901	In cell-based selection, the comma (C<,>) is used to pair row and column
3902
3903	cell=4,1
3904
3905	The range operator (C<->) using C<cell>s can be used to define top-left and
3906	bottom-right C<cell> location
3907
3908	cell=3,1-4,6
3909
3910	The C<*> is only allowed in the second part of a pair
3911
3912	cell=3,2-*,2 # row 3 till end, only column 2
3913	cell=3,2-3,* # column 2 till end, only row 3
3914	cell=3,2-, # strip row 1 and 2, and column 1
3915
3916	Cells and cell ranges may be combined with C<;>, possibly resulting in rows
3917	with different number of columns
3918
3919	cell=1,1-2,2;3,3-4,4;1,4;4,1
3920
3921	Disjointed selections will only return selected cells. The cells that are
3922	not specified will not be included in the returned set, not even as
3923	C<undef>. As an example given a C<CSV> like
3924
3925	11,12,13,...19
3926	21,22,...28,29
3927	: :
3928	91,...97,98,99
3929
3930	with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
3931
3932	11,12,14
3933	21,22
3934	33,34
3935	41,43,44
3936
3937	Overlapping cell-specs will return those cells only once, So
3938	C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
3939
3940	11,12,13
3941	21,22,23,24
3942	31,32,33,34
3943	42,43,44
3944
3945	=back
3946
3947	L<RFC7111\|http://tools.ietf.org/html/rfc7111> does B<not> allow different
3948	types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
3949	Passing an invalid fragment specification will croak and set error 2013.
3950
3951	=head2 column_names
3952
3953	Set the "keys" that will be used in the L</getline_hr> calls. If no keys
3954	(column names) are passed, it will return the current setting as a list.
3955
3956	L</column_names> accepts a list of scalars (the column names) or a single
3957	array_ref, so you can pass the return value from L</getline> too:
3958
3959	$csv->column_names ($csv->getline ($fh));
3960
3961	L</column_names> does B<no> checking on duplicates at all, which might lead
3962	to unexpected results. Undefined entries will be replaced with the string
3963	C<"\cAUNDEF\cA">, so
3964
3965	$csv->column_names (undef, "", "name", "name");
3966	$hr = $csv->getline_hr ($fh);
3967
3968	Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
3969	the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
3970	field.
3971
3972	L</column_names> croaks on invalid arguments.
3973
3974	=head2 header
3975
3976	This method does NOT work in perl-5.6.x
3977
3978	Parse the CSV header and set L<C<sep>\|/sep>, column_names and encoding.
3979
3980	my @hdr = $csv->header ($fh);
3981	$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
3982	$csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3983
3984	The first argument should be a file handle.
3985
3986	This method resets some object properties, as it is supposed to be invoked
3987	only once per file or stream. It will leave attributes C<column_names> and
3988	C<bound_columns> alone of setting column names is disabled. Reading headers
3989	on previously process objects might fail on perl-5.8.0 and older.
3990
3991	Assuming that the file opened for parsing has a header, and the header does
3992	not contain problematic characters like embedded newlines, read the first
3993	line from the open handle then auto-detect whether the header separates the
3994	column names with a character from the allowed separator list.
3995
3996	If any of the allowed separators matches, and none of the I<other> allowed
3997	separators match, set L<C<sep>\|/sep> to that separator for the current
3998	CSV_PP instance and use it to parse the first line, map those to lowercase,
3999	and use that to set the instance L</column_names>:
4000
4001	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4002	open my $fh, "<", "file.csv";
4003	binmode $fh; # for Windows
4004	$csv->header ($fh);
4005	while (my $row = $csv->getline_hr ($fh)) {
4006	...
4007	}
4008
4009	If the header is empty, contains more than one unique separator out of the
4010	allowed set, contains empty fields, or contains identical fields (after
4011	folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
4012
4013	If the header contains embedded newlines or is not valid CSV in any other
4014	way, this method will croak and leave the parse error untouched.
4015
4016	A successful call to C<header> will always set the L<C<sep>\|/sep> of the
4017	C<$csv> object. This behavior can not be disabled.
4018
4019	=head3 return value
4020
4021	On error this method will croak.
4022
4023	In list context, the headers will be returned whether they are used to set
4024	L</column_names> or not.
4025
4026	In scalar context, the instance itself is returned. B<Note>: the values as
4027	found in the header will effectively be B<lost> if C<set_column_names> is
4028	false.
4029
4030	=head3 Options
4031
4032	=over 2
4033
4034	=item sep_set
4035
4036	$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
4037
4038	The list of legal separators defaults to C<[ ";", "," ]> and can be changed
4039	by this option. As this is probably the most often used option, it can be
4040	passed on its own as an unnamed argument:
4041
4042	$csv->header ($fh, [ ";", ",", "\|", "\t", "::", "\x{2063}" ]);
4043
4044	Multi-byte sequences are allowed, both multi-character and Unicode. See
4045	L<C<sep>\|/sep>.
4046
4047	=item detect_bom
4048
4049	$csv->header ($fh, { detect_bom => 1 });
4050
4051	The default behavior is to detect if the header line starts with a BOM. If
4052	the header has a BOM, use that to set the encoding of C<$fh>. This default
4053	behavior can be disabled by passing a false value to C<detect_bom>.
4054
4055	Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
4056	UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
4057	but L<Encode> does not (yet). UTF-7 is not supported.
4058
4059	If a supported BOM was detected as start of the stream, it is stored in the
4060	abject attribute C<ENCODING>.
4061
4062	my $enc = $csv->{ENCODING};
4063
4064	The encoding is used with C<binmode> on C<$fh>.
4065
4066	If the handle was opened in a (correct) encoding, this method will B<not>
4067	alter the encoding, as it checks the leading B<bytes> of the first line. In
4068	case the stream starts with a decode BOM (C<U+FEFF>), C<{ENCODING}> will be
4069	C<""> (empty) instead of the default C<undef>.
4070
4071	=item munge_column_names
4072
4073	This option offers the means to modify the column names into something that
4074	is most useful to the application. The default is to map all column names
4075	to lower case.
4076
4077	$csv->header ($fh, { munge_column_names => "lc" });
4078
4079	The following values are available:
4080
4081	lc - lower case
4082	uc - upper case
4083	none - do not change
4084	\%hash - supply a mapping
4085	\&cb - supply a callback
4086
4087	Literal:
4088
4089	$csv->header ($fh, { munge_column_names => "none" });
4090
4091	Hash:
4092
4093	$csv->header ($fh, { munge_column_names => { foo => "sombrero" });
4094
4095	if a value does not exist, the original value is used unchanged
4096
4097	Callback:
4098
4099	$csv->header ($fh, { munge_column_names => sub { fc } });
4100	$csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
4101	$csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
4102
4103	As this callback is called in a C<map>, you can use C<$_> directly.
4104
4105	=item set_column_names
4106
4107	$csv->header ($fh, { set_column_names => 1 });
4108
4109	The default is to set the instances column names using L</column_names> if
4110	the method is successful, so subsequent calls to L</getline_hr> can return
4111	a hash. Disable setting the header can be forced by using a false value for
4112	this option.
4113
4114	As described in L</return value> above, content is lost in scalar context.
4115
4116	=back
4117
4118	=head3 Validation
4119
4120	When receiving CSV files from external sources, this method can be used to
4121	protect against changes in the layout by restricting to known headers (and
4122	typos in the header fields).
4123
4124	my %known = (
4125	"record key" => "c_rec",
4126	"rec id" => "c_rec",
4127	"id_rec" => "c_rec",
4128	"kode" => "code",
4129	"code" => "code",
4130	"vaule" => "value",
4131	"value" => "value",
4132	);
4133	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
4134	open my $fh, "<", $source or die "$source: $!";
4135	$csv->header ($fh, { munge_column_names => sub {
4136	s/\s+$//;
4137	s/^\s+//;
4138	$known{lc $_} or die "Unknown column '$_' in $source";
4139	}});
4140	while (my $row = $csv->getline_hr ($fh)) {
4141	say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
4142	}
4143
4144	=head2 bind_columns
4145
4146	Takes a list of scalar references to be used for output with L</print> or
4147	to store in the fields fetched by L</getline>. When you do not pass enough
4148	references to store the fetched fields in, L</getline> will fail with error
4149	C<3006>. If you pass more than there are fields to return, the content of
4150	the remaining references is left untouched.
4151
4152	$csv->bind_columns (\$code, \$name, \$price, \$description);
4153	while ($csv->getline ($fh)) {
4154	print "The price of a $name is \x{20ac} $price\n";
4155	}
4156
4157	To reset or clear all column binding, call L</bind_columns> with the single
4158	argument C<undef>. This will also clear column names.
4159
4160	$csv->bind_columns (undef);
4161
4162	If no arguments are passed at all, L</bind_columns> will return the list of
4163	current bindings or C<undef> if no binds are active.
4164
4165	Note that in parsing with C<bind_columns>, the fields are set on the fly.
4166	That implies that if the third field of a row causes an error (or this row
4167	has just two fields where the previous row had more), the first two fields
4168	already have been assigned the values of the current row, while the rest of
4169	the fields will still hold the values of the previous row. If you want the
4170	parser to fail in these cases, use the L<C<strict>\|/strict> attribute.
4171
4172	=head2 eof
4173
4174	$eof = $csv->eof ();
4175
4176	If L</parse> or L</getline> was used with an IO stream, this method will
4177	return true (1) if the last call hit end of file, otherwise it will return
4178	false (''). This is useful to see the difference between a failure and end
4179	of file.
4180
4181	Note that if the parsing of the last line caused an error, C<eof> is still
4182	true. That means that if you are I<not> using L</auto_diag>, an idiom like
4183
4184	while (my $row = $csv->getline ($fh)) {
4185	# ...
4186	}
4187	$csv->eof or $csv->error_diag;
4188
4189	will I<not> report the error. You would have to change that to
4190
4191	while (my $row = $csv->getline ($fh)) {
4192	# ...
4193	}
4194	+$csv->error_diag and $csv->error_diag;
4195
4196	=head2 types
4197
4198	$csv->types (\@tref);
4199
4200	This method is used to force that (all) columns are of a given type. For
4201	example, if you have an integer column, two columns with doubles and a
4202	string column, then you might do a
4203
4204	$csv->types ([Text::CSV_PP::IV (),
4205	Text::CSV_PP::NV (),
4206	Text::CSV_PP::NV (),
4207	Text::CSV_PP::PV ()]);
4208
4209	Column types are used only for I<decoding> columns while parsing, in other
4210	words by the L</parse> and L</getline> methods.
4211
4212	You can unset column types by doing a
4213
4214	$csv->types (undef);
4215
4216	or fetch the current type settings with
4217
4218	$types = $csv->types ();
4219
4220	=over 4
4221
4222	=item IV
4223
4224	Set field type to integer.
4225
4226	=item NV
4227
4228	Set field type to numeric/float.
4229
4230	=item PV
4231
4232	Set field type to string.
4233
4234	=back
4235
4236	=head2 fields
4237
4238	@columns = $csv->fields ();
4239
4240	This method returns the input to L</combine> or the resultant decomposed
4241	fields of a successful L</parse>, whichever was called more recently.
4242
4243	Note that the return value is undefined after using L</getline>, which does
4244	not fill the data structures returned by L</parse>.
4245
4246	=head2 meta_info
4247
4248	@flags = $csv->meta_info ();
4249
4250	This method returns the "flags" of the input to L</combine> or the flags of
4251	the resultant decomposed fields of L</parse>, whichever was called more
4252	recently.
4253
4254	For each field, a meta_info field will hold flags that inform something
4255	about the field returned by the L</fields> method or passed to the
4256	L</combine> method. The flags are bit-wise-C<or>'d like:
4257
4258	=over 2
4259
4260	=item C< >0x0001
4261
4262	The field was quoted.
4263
4264	=item C< >0x0002
4265
4266	The field was binary.
4267
4268	=back
4269
4270	See the C<is_***> methods below.
4271
4272	=head2 is_quoted
4273
4274	my $quoted = $csv->is_quoted ($column_idx);
4275
4276	Where C<$column_idx> is the (zero-based) index of the column in the last
4277	result of L</parse>.
4278
4279	This returns a true value if the data in the indicated column was enclosed
4280	in L<C<quote_char>\|/quote_char> quotes. This might be important for fields
4281	where content C<,20070108,> is to be treated as a numeric value, and where
4282	C<,"20070108",> is explicitly marked as character string data.
4283
4284	This method is only valid when L</keep_meta_info> is set to a true value.
4285
4286	=head2 is_binary
4287
4288	my $binary = $csv->is_binary ($column_idx);
4289
4290	Where C<$column_idx> is the (zero-based) index of the column in the last
4291	result of L</parse>.
4292
4293	This returns a true value if the data in the indicated column contained any
4294	byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
4295
4296	This method is only valid when L</keep_meta_info> is set to a true value.
4297
4298	=head2 is_missing
4299
4300	my $missing = $csv->is_missing ($column_idx);
4301
4302	Where C<$column_idx> is the (zero-based) index of the column in the last
4303	result of L</getline_hr>.
4304
4305	$csv->keep_meta_info (1);
4306	while (my $hr = $csv->getline_hr ($fh)) {
4307	$csv->is_missing (0) and next; # This was an empty line
4308	}
4309
4310	When using L</getline_hr>, it is impossible to tell if the parsed fields
4311	are C<undef> because they where not filled in the C<CSV> stream or because
4312	they were not read at all, as B<all> the fields defined by L</column_names>
4313	are set in the hash-ref. If you still need to know if all fields in each
4314	row are provided, you should enable L<C<keep_meta_info>\|/keep_meta_info> so
4315	you can check the flags.
4316
4317	If L<C<keep_meta_info>\|/keep_meta_info> is C<false>, C<is_missing> will
4318	always return C<undef>, regardless of C<$column_idx> being valid or not. If
4319	this attribute is C<true> it will return either C<0> (the field is present)
4320	or C<1> (the field is missing).
4321
4322	A special case is the empty line. If the line is completely empty - after
4323	dealing with the flags - this is still a valid CSV line: it is a record of
4324	just one single empty field. However, if C<keep_meta_info> is set, invoking
4325	C<is_missing> with index C<0> will now return true.
4326
4327	=head2 status
4328
4329	$status = $csv->status ();
4330
4331	This method returns the status of the last invoked L</combine> or L</parse>
4332	call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
4333
4334	=head2 error_input
4335
4336	$bad_argument = $csv->error_input ();
4337
4338	This method returns the erroneous argument (if it exists) of L</combine> or
4339	L</parse>, whichever was called more recently. If the last invocation was
4340	successful, C<error_input> will return C<undef>.
4341
4342	=head2 error_diag
4343
4344	Text::CSV_PP->error_diag ();
4345	$csv->error_diag ();
4346	$error_code = 0 + $csv->error_diag ();
4347	$error_str = "" . $csv->error_diag ();
4348	($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
4349
4350	If (and only if) an error occurred, this function returns the diagnostics
4351	of that error.
4352
4353	If called in void context, this will print the internal error code and the
4354	associated error message to STDERR.
4355
4356	If called in list context, this will return the error code and the error
4357	message in that order. If the last error was from parsing, the rest of the
4358	values returned are a best guess at the location within the line that was
4359	being parsed. Their values are 1-based. The position currently is index of
4360	the byte at which the parsing failed in the current record. It might change
4361	to be the index of the current character in a later release. The records is
4362	the index of the record parsed by the csv instance. The field number is the
4363	index of the field the parser thinks it is currently trying to parse. See
4364	F<examples/csv-check> for how this can be used.
4365
4366	If called in scalar context, it will return the diagnostics in a single
4367	scalar, a-la C<$!>. It will contain the error code in numeric context, and
4368	the diagnostics message in string context.
4369
4370	When called as a class method or a direct function call, the diagnostics
4371	are that of the last L</new> call.
4372
4373	=head2 record_number
4374
4375	$recno = $csv->record_number ();
4376
4377	Returns the records parsed by this csv instance. This value should be more
4378	accurate than C<$.> when embedded newlines come in play. Records written by
4379	this instance are not counted.
4380
4381	=head2 SetDiag
4382
4383	$csv->SetDiag (0);
4384
4385	Use to reset the diagnostics if you are dealing with errors.
4386
4387	=head1 FUNCTIONS
4388
4389	This section is also taken from Text::CSV_XS.
4390
4391	=head2 csv
4392
4393	This function is not exported by default and should be explicitly requested:
4394
4395	use Text::CSV_PP qw( csv );
4396
4397	This is an high-level function that aims at simple (user) interfaces. This
4398	can be used to read/parse a C<CSV> file or stream (the default behavior) or
4399	to produce a file or write to a stream (define the C<out> attribute). It
4400	returns an array- or hash-reference on parsing (or C<undef> on fail) or the
4401	numeric value of L</error_diag> on writing. When this function fails you
4402	can get to the error using the class call to L</error_diag>
4403
4404	my $aoa = csv (in => "test.csv") or
4405	die Text::CSV_PP->error_diag;
4406
4407	This function takes the arguments as key-value pairs. This can be passed as
4408	a list or as an anonymous hash:
4409
4410	my $aoa = csv ( in => "test.csv", sep_char => ";");
4411	my $aoh = csv ({ in => $fh, headers => "auto" });
4412
4413	The arguments passed consist of two parts: the arguments to L</csv> itself
4414	and the optional attributes to the C<CSV> object used inside the function
4415	as enumerated and explained in L</new>.
4416
4417	If not overridden, the default option used for CSV is
4418
4419	auto_diag => 1
4420	escape_null => 0
4421
4422	The option that is always set and cannot be altered is
4423
4424	binary => 1
4425
4426	As this function will likely be used in one-liners, it allows C<quote> to
4427	be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
4428	or C<escape>.
4429
4430	Alternative invocations:
4431
4432	my $aoa = Text::CSV_PP::csv (in => "file.csv");
4433
4434	my $csv = Text::CSV_PP->new ();
4435	my $aoa = $csv->csv (in => "file.csv");
4436
4437	In the latter case, the object attributes are used from the existing object
4438	and the attribute arguments in the function call are ignored:
4439
4440	my $csv = Text::CSV_PP->new ({ sep_char => ";" });
4441	my $aoh = $csv->csv (in => "file.csv", bom => 1);
4442
4443	will parse using C<;> as C<sep_char>, not C<,>.
4444
4445	=head3 in
4446
4447	Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
4448	which will be opened for reading and closed when finished, a file handle
4449	(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
4450	itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
4451
4452	When used with L</out>, C<in> should be a reference to a CSV structure (AoA
4453	or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
4454	The code-ref will be invoked with no arguments.
4455
4456	my $aoa = csv (in => "file.csv");
4457
4458	open my $fh, "<", "file.csv";
4459	my $aoa = csv (in => $fh);
4460
4461	my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
4462	my $err = csv (in => $csv, out => "file.csv");
4463
4464	If called in void context without the L</out> attribute, the resulting ref
4465	will be used as input to a subsequent call to csv:
4466
4467	csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
4468
4469	will be a shortcut to
4470
4471	csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
4472
4473	where, in the absence of the C<out> attribute, this is a shortcut to
4474
4475	csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
4476	out => *STDOUT)
4477
4478	=head3 out
4479
4480	csv (in => $aoa, out => "file.csv");
4481	csv (in => $aoa, out => $fh);
4482	csv (in => $aoa, out => STDOUT);
4483	csv (in => $aoa, out => *STDOUT);
4484	csv (in => $aoa, out => \*STDOUT);
4485	csv (in => $aoa, out => \my $data);
4486	csv (in => $aoa, out => undef);
4487	csv (in => $aoa, out => \"skip");
4488
4489	In output mode, the default CSV options when producing CSV are
4490
4491	eol => "\r\n"
4492
4493	The L</fragment> attribute is ignored in output mode.
4494
4495	C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
4496	writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
4497	reference to a glob (e.g. C<\STDOUT>), the glob itself (e.g. C<STDOUT>),
4498	or a reference to a scalar (e.g. C<\my $data>).
4499
4500	csv (in => sub { $sth->fetch }, out => "dump.csv");
4501	csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
4502	headers => $sth->{NAME_lc});
4503
4504	When a code-ref is used for C<in>, the output is generated per invocation,
4505	so no buffering is involved. This implies that there is no size restriction
4506	on the number of records. The C<csv> function ends when the coderef returns
4507	a false value.
4508
4509	If C<out> is set to a reference of the literal string C<"skip">, the output
4510	will be suppressed completely, which might be useful in combination with a
4511	filter for side effects only.
4512
4513	my %cache;
4514	csv (in => "dump.csv",
4515	out => \"skip",
4516	on_in => sub { $cache{$_[1][1]}++ });
4517
4518	Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
4519	equivalent to C<\"skip">.
4520
4521	=head3 encoding
4522
4523	If passed, it should be an encoding accepted by the C<:encoding()> option
4524	to C<open>. There is no default value. This attribute does not work in perl
4525	5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
4526	line invocations.
4527
4528	If C<encoding> is set to the literal value C<"auto">, the method L</header>
4529	will be invoked on the opened stream to check if there is a BOM and set the
4530	encoding accordingly. This is equal to passing a true value in the option
4531	L<C<detect_bom>\|/detect_bom>.
4532
4533	=head3 detect_bom
4534
4535	If C<detect_bom> is given, the method L</header> will be invoked on the
4536	opened stream to check if there is a BOM and set the encoding accordingly.
4537
4538	C<detect_bom> can be abbreviated to C<bom>.
4539
4540	This is the same as setting L<C<encoding>\|/encoding> to C<"auto">.
4541
4542	Note that as the method L</header> is invoked, its default is to also set
4543	the headers.
4544
4545	=head3 headers
4546
4547	If this attribute is not given, the default behavior is to produce an array
4548	of arrays.
4549
4550	If C<headers> is supplied, it should be an anonymous list of column names,
4551	an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
4552	or C<skip>.
4553
4554	=over 2
4555
4556	=item skip
4557
4558	When C<skip> is used, the header will not be included in the output.
4559
4560	my $aoa = csv (in => $fh, headers => "skip");
4561
4562	=item auto
4563
4564	If C<auto> is used, the first line of the C<CSV> source will be read as the
4565	list of field headers and used to produce an array of hashes.
4566
4567	my $aoh = csv (in => $fh, headers => "auto");
4568
4569	=item lc
4570
4571	If C<lc> is used, the first line of the C<CSV> source will be read as the
4572	list of field headers mapped to lower case and used to produce an array of
4573	hashes. This is a variation of C<auto>.
4574
4575	my $aoh = csv (in => $fh, headers => "lc");
4576
4577	=item uc
4578
4579	If C<uc> is used, the first line of the C<CSV> source will be read as the
4580	list of field headers mapped to upper case and used to produce an array of
4581	hashes. This is a variation of C<auto>.
4582
4583	my $aoh = csv (in => $fh, headers => "uc");
4584
4585	=item CODE
4586
4587	If a coderef is used, the first line of the C<CSV> source will be read as
4588	the list of mangled field headers in which each field is passed as the only
4589	argument to the coderef. This list is used to produce an array of hashes.
4590
4591	my $aoh = csv (in => $fh,
4592	headers => sub { lc ($_[0]) =~ s/kode/code/gr });
4593
4594	this example is a variation of using C<lc> where all occurrences of C<kode>
4595	are replaced with C<code>.
4596
4597	=item ARRAY
4598
4599	If C<headers> is an anonymous list, the entries in the list will be used
4600	as field names. The first line is considered data instead of headers.
4601
4602	my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
4603	csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
4604
4605	=item HASH
4606
4607	If C<headers> is an hash reference, this implies C<auto>, but header fields
4608	for that exist as key in the hashref will be replaced by the value for that
4609	key. Given a CSV file like
4610
4611	post-kode,city,name,id number,fubble
4612	1234AA,Duckstad,Donald,13,"X313DF"
4613
4614	using
4615
4616	csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
4617
4618	will return an entry like
4619
4620	{ pc => "1234AA",
4621	city => "Duckstad",
4622	name => "Donald",
4623	ID => "13",
4624	fubble => "X313DF",
4625	}
4626
4627	=back
4628
4629	See also L<C<munge_column_names>\|/munge_column_names> and
4630	L<C<set_column_names>\|/set_column_names>.
4631
4632	=head3 munge_column_names
4633
4634	If C<munge_column_names> is set, the method L</header> is invoked on the
4635	opened stream with all matching arguments to detect and set the headers.
4636
4637	C<munge_column_names> can be abbreviated to C<munge>.
4638
4639	=head3 key
4640
4641	If passed, will default L<C<headers>\|/headers> to C<"auto"> and return a
4642	hashref instead of an array of hashes. Allowed values are simple scalars or
4643	array-references where the first element is the joiner and the rest are the
4644	fields to join to combine the key.
4645
4646	my $ref = csv (in => "test.csv", key => "code");
4647	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
4648
4649	with test.csv like
4650
4651	code,product,price,color
4652	1,pc,850,gray
4653	2,keyboard,12,white
4654	3,mouse,5,black
4655
4656	the first example will return
4657
4658	{ 1 => {
4659	code => 1,
4660	color => 'gray',
4661	price => 850,
4662	product => 'pc'
4663	},
4664	2 => {
4665	code => 2,
4666	color => 'white',
4667	price => 12,
4668	product => 'keyboard'
4669	},
4670	3 => {
4671	code => 3,
4672	color => 'black',
4673	price => 5,
4674	product => 'mouse'
4675	}
4676	}
4677
4678	the second example will return
4679
4680	{ "1:gray" => {
4681	code => 1,
4682	color => 'gray',
4683	price => 850,
4684	product => 'pc'
4685	},
4686	"2:white" => {
4687	code => 2,
4688	color => 'white',
4689	price => 12,
4690	product => 'keyboard'
4691	},
4692	"3:black" => {
4693	code => 3,
4694	color => 'black',
4695	price => 5,
4696	product => 'mouse'
4697	}
4698	}
4699
4700	The C<key> attribute can be combined with L<C<headers>\|/headers> for C<CSV>
4701	date that has no header line, like
4702
4703	my $ref = csv (
4704	in => "foo.csv",
4705	headers => [qw( c_foo foo bar description stock )],
4706	key => "c_foo",
4707	);
4708
4709	=head3 value
4710
4711	Used to create key-value hashes.
4712
4713	Only allowed when C<key> is valid. A C<value> can be either a single column
4714	label or an anonymous list of column labels. In the first case, the value
4715	will be a simple scalar value, in the latter case, it will be a hashref.
4716
4717	my $ref = csv (in => "test.csv", key => "code",
4718	value => "price");
4719	my $ref = csv (in => "test.csv", key => "code",
4720	value => [ "product", "price" ]);
4721	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
4722	value => "price");
4723	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
4724	value => [ "product", "price" ]);
4725
4726	with test.csv like
4727
4728	code,product,price,color
4729	1,pc,850,gray
4730	2,keyboard,12,white
4731	3,mouse,5,black
4732
4733	the first example will return
4734
4735	{ 1 => 850,
4736	2 => 12,
4737	3 => 5,
4738	}
4739
4740	the second example will return
4741
4742	{ 1 => {
4743	price => 850,
4744	product => 'pc'
4745	},
4746	2 => {
4747	price => 12,
4748	product => 'keyboard'
4749	},
4750	3 => {
4751	price => 5,
4752	product => 'mouse'
4753	}
4754	}
4755
4756	the third example will return
4757
4758	{ "1:gray" => 850,
4759	"2:white" => 12,
4760	"3:black" => 5,
4761	}
4762
4763	the fourth example will return
4764
4765	{ "1:gray" => {
4766	price => 850,
4767	product => 'pc'
4768	},
4769	"2:white" => {
4770	price => 12,
4771	product => 'keyboard'
4772	},
4773	"3:black" => {
4774	price => 5,
4775	product => 'mouse'
4776	}
4777	}
4778
4779	=head3 keep_headers
4780
4781	When using hashes, keep the column names into the arrayref passed, so all
4782	headers are available after the call in the original order.
4783
4784	my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
4785
4786	This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
4787
4788	This attribute implies a default of C<auto> for the C<headers> attribute.
4789
4790	=head3 fragment
4791
4792	Only output the fragment as defined in the L</fragment> method. This option
4793	is ignored when I<generating> C<CSV>. See L</out>.
4794
4795	Combining all of them could give something like
4796
4797	use Text::CSV_PP qw( csv );
4798	my $aoh = csv (
4799	in => "test.txt",
4800	encoding => "utf-8",
4801	headers => "auto",
4802	sep_char => "\|",
4803	fragment => "row=3;6-9;15-*",
4804	);
4805	say $aoh->[15]{Foo};
4806
4807	=head3 sep_set
4808
4809	If C<sep_set> is set, the method L</header> is invoked on the opened stream
4810	to detect and set L<C<sep_char>\|/sep_char> with the given set.
4811
4812	C<sep_set> can be abbreviated to C<seps>.
4813
4814	Note that as the L</header> method is invoked, its default is to also set
4815	the headers.
4816
4817	=head3 set_column_names
4818
4819	If C<set_column_names> is passed, the method L</header> is invoked on the
4820	opened stream with all arguments meant for L</header>.
4821
4822	If C<set_column_names> is passed as a false value, the content of the first
4823	row is only preserved if the output is AoA:
4824
4825	With an input-file like
4826
4827	bAr,foo
4828	1,2
4829	3,4,5
4830
4831	This call
4832
4833	my $aoa = csv (in => $file, set_column_names => 0);
4834
4835	will result in
4836
4837	[[ "bar", "foo" ],
4838	[ "1", "2" ],
4839	[ "3", "4", "5" ]]
4840
4841	and
4842
4843	my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
4844
4845	will result in
4846
4847	[[ "bAr", "foo" ],
4848	[ "1", "2" ],
4849	[ "3", "4", "5" ]]
4850
4851	=head2 Callbacks
4852
4853	Callbacks enable actions triggered from the I<inside> of Text::CSV_PP.
4854
4855	While most of what this enables can easily be done in an unrolled loop as
4856	described in the L</SYNOPSIS> callbacks can be used to meet special demands
4857	or enhance the L</csv> function.
4858
4859	=over 2
4860
4861	=item error
4862
4863	$csv->callbacks (error => sub { $csv->SetDiag (0) });
4864
4865	the C<error> callback is invoked when an error occurs, but I<only> when
4866	L</auto_diag> is set to a true value. A callback is invoked with the values
4867	returned by L</error_diag>:
4868
4869	my ($c, $s);
4870
4871	sub ignore3006
4872	{
4873	my ($err, $msg, $pos, $recno, $fldno) = @_;
4874	if ($err == 3006) {
4875	# ignore this error
4876	($c, $s) = (undef, undef);
4877	Text::CSV_PP->SetDiag (0);
4878	}
4879	# Any other error
4880	return;
4881	} # ignore3006
4882
4883	$csv->callbacks (error => \&ignore3006);
4884	$csv->bind_columns (\$c, \$s);
4885	while ($csv->getline ($fh)) {
4886	# Error 3006 will not stop the loop
4887	}
4888
4889	=item after_parse
4890
4891	$csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4892	while (my $row = $csv->getline ($fh)) {
4893	$row->[-1] eq "NEW";
4894	}
4895
4896	This callback is invoked after parsing with L</getline> only if no error
4897	occurred. The callback is invoked with two arguments: the current C<CSV>
4898	parser object and an array reference to the fields parsed.
4899
4900	The return code of the callback is ignored unless it is a reference to the
4901	string "skip", in which case the record will be skipped in L</getline_all>.
4902
4903	sub add_from_db
4904	{
4905	my ($csv, $row) = @_;
4906	$sth->execute ($row->[4]);
4907	push @$row, $sth->fetchrow_array;
4908	} # add_from_db
4909
4910	my $aoa = csv (in => "file.csv", callbacks => {
4911	after_parse => \&add_from_db });
4912
4913	This hook can be used for validation:
4914
4915	=over 2
4916
4917	=item FAIL
4918
4919	Die if any of the records does not validate a rule:
4920
4921	after_parse => sub {
4922	$_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4923	die "5th field does not have a valid Dutch zipcode";
4924	}
4925
4926	=item DEFAULT
4927
4928	Replace invalid fields with a default value:
4929
4930	after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4931
4932	=item SKIP
4933
4934	Skip records that have invalid fields (only applies to L</getline_all>):
4935
4936	after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4937
4938	=back
4939
4940	=item before_print
4941
4942	my $idx = 1;
4943	$csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4944	$csv->print (*STDOUT, [ 0, $_ ]) for @members;
4945
4946	This callback is invoked before printing with L</print> only if no error
4947	occurred. The callback is invoked with two arguments: the current C<CSV>
4948	parser object and an array reference to the fields passed.
4949
4950	The return code of the callback is ignored.
4951
4952	sub max_4_fields
4953	{
4954	my ($csv, $row) = @_;
4955	@$row > 4 and splice @$row, 4;
4956	} # max_4_fields
4957
4958	csv (in => csv (in => "file.csv"), out => *STDOUT,
4959	callbacks => { before print => \&max_4_fields });
4960
4961	This callback is not active for L</combine>.
4962
4963	=back
4964
4965	=head3 Callbacks for csv ()
4966
4967	The L</csv> allows for some callbacks that do not integrate in XS internals
4968	but only feature the L</csv> function.
4969
4970	csv (in => "file.csv",
4971	callbacks => {
4972	filter => { 6 => sub { $_ > 15 } }, # first
4973	after_parse => sub { say "AFTER PARSE"; }, # first
4974	after_in => sub { say "AFTER IN"; }, # second
4975	on_in => sub { say "ON IN"; }, # third
4976	},
4977	);
4978
4979	csv (in => $aoh,
4980	out => "file.csv",
4981	callbacks => {
4982	on_in => sub { say "ON IN"; }, # first
4983	before_out => sub { say "BEFORE OUT"; }, # second
4984	before_print => sub { say "BEFORE PRINT"; }, # third
4985	},
4986	);
4987
4988	=over 2
4989
4990	=item filter
4991
4992	This callback can be used to filter records. It is called just after a new
4993	record has been scanned. The callback accepts a:
4994
4995	=over 2
4996
4997	=item hashref
4998
4999	The keys are the index to the row (the field name or field number, 1-based)
5000	and the values are subs to return a true or false value.
5001
5002	csv (in => "file.csv", filter => {
5003	3 => sub { m/a/ }, # third field should contain an "a"
5004	5 => sub { length > 4 }, # length of the 5th field minimal 5
5005	});
5006
5007	csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
5008
5009	If the keys to the filter hash contain any character that is not a digit it
5010	will also implicitly set L</headers> to C<"auto"> unless L</headers> was
5011	already passed as argument. When headers are active, returning an array of
5012	hashes, the filter is not applicable to the header itself.
5013
5014	All sub results should match, as in AND.
5015
5016	The context of the callback sets C<$_> localized to the field indicated by
5017	the filter. The two arguments are as with all other callbacks, so the other
5018	fields in the current row can be seen:
5019
5020	filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
5021
5022	If the context is set to return a list of hashes (L</headers> is defined),
5023	the current record will also be available in the localized C<%_>:
5024
5025	filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
5026
5027	If the filter is used to I<alter> the content by changing C<$_>, make sure
5028	that the sub returns true in order not to have that record skipped:
5029
5030	filter => { 2 => sub { $_ = uc }}
5031
5032	will upper-case the second field, and then skip it if the resulting content
5033	evaluates to false. To always accept, end with truth:
5034
5035	filter => { 2 => sub { $_ = uc; 1 }}
5036
5037	=item coderef
5038
5039	csv (in => "file.csv", filter => sub { $n++; 0; });
5040
5041	If the argument to C<filter> is a coderef, it is an alias or shortcut to a
5042	filter on column 0:
5043
5044	csv (filter => sub { $n++; 0 });
5045
5046	is equal to
5047
5048	csv (filter => { 0 => sub { $n++; 0 });
5049
5050	=item filter-name
5051
5052	csv (in => "file.csv", filter => "not_blank");
5053	csv (in => "file.csv", filter => "not_empty");
5054	csv (in => "file.csv", filter => "filled");
5055
5056	These are predefined filters
5057
5058	Given a file like (line numbers prefixed for doc purpose only):
5059
5060	1:1,2,3
5061	2:
5062	3:,
5063	4:""
5064	5:,,
5065	6:, ,
5066	7:"",
5067	8:" "
5068	9:4,5,6
5069
5070	=over 2
5071
5072	=item not_blank
5073
5074	Filter out the blank lines
5075
5076	This filter is a shortcut for
5077
5078	filter => { 0 => sub { @{$_[1]} > 1 or
5079	defined $_[1][0] && $_[1][0] ne "" } }
5080
5081	Due to the implementation, it is currently impossible to also filter lines
5082	that consists only of a quoted empty field. These lines are also considered
5083	blank lines.
5084
5085	With the given example, lines 2 and 4 will be skipped.
5086
5087	=item not_empty
5088
5089	Filter out lines where all the fields are empty.
5090
5091	This filter is a shortcut for
5092
5093	filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
5094
5095	A space is not regarded being empty, so given the example data, lines 2, 3,
5096	4, 5, and 7 are skipped.
5097
5098	=item filled
5099
5100	Filter out lines that have no visible data
5101
5102	This filter is a shortcut for
5103
5104	filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
5105
5106	This filter rejects all lines that I<not> have at least one field that does
5107	not evaluate to the empty string.
5108
5109	With the given example data, this filter would skip lines 2 through 8.
5110
5111	=back
5112
5113	=back
5114
5115	=item after_in
5116
5117	This callback is invoked for each record after all records have been parsed
5118	but before returning the reference to the caller. The hook is invoked with
5119	two arguments: the current C<CSV> parser object and a reference to the
5120	record. The reference can be a reference to a HASH or a reference to an
5121	ARRAY as determined by the arguments.
5122
5123	This callback can also be passed as an attribute without the C<callbacks>
5124	wrapper.
5125
5126	=item before_out
5127
5128	This callback is invoked for each record before the record is printed. The
5129	hook is invoked with two arguments: the current C<CSV> parser object and a
5130	reference to the record. The reference can be a reference to a HASH or a
5131	reference to an ARRAY as determined by the arguments.
5132
5133	This callback can also be passed as an attribute without the C<callbacks>
5134	wrapper.
5135
5136	This callback makes the row available in C<%_> if the row is a hashref. In
5137	this case C<%_> is writable and will change the original row.
5138
5139	=item on_in
5140
5141	This callback acts exactly as the L</after_in> or the L</before_out> hooks.
5142
5143	This callback can also be passed as an attribute without the C<callbacks>
5144	wrapper.
5145
5146	This callback makes the row available in C<%_> if the row is a hashref. In
5147	this case C<%_> is writable and will change the original row. So e.g. with
5148
5149	my $aoh = csv (
5150	in => \"foo\n1\n2\n",
5151	headers => "auto",
5152	on_in => sub { $_{bar} = 2; },
5153	);
5154
5155	C<$aoh> will be:
5156
5157	[ { foo => 1,
5158	bar => 2,
5159	}
5160	{ foo => 2,
5161	bar => 2,
5162	}
5163	]
5164
5165	=item csv
5166
5167	The I<function> L</csv> can also be called as a method or with an existing
5168	Text::CSV_PP object. This could help if the function is to be invoked a lot
5169	of times and the overhead of creating the object internally over and over
5170	again would be prevented by passing an existing instance.
5171
5172	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
5173
5174	my $aoa = $csv->csv (in => $fh);
5175	my $aoa = csv (in => $fh, csv => $csv);
5176
5177	both act the same. Running this 20000 times on a 20 lines CSV file, showed
5178	a 53% speedup.
5179
5180	=back
5181
5182	=head1 DIAGNOSTICS
5183
5184	This section is also taken from Text::CSV_XS.
5185
5186	Still under construction ...
5187
5188	If an error occurs, C<< $csv->error_diag >> can be used to get information
5189	on the cause of the failure. Note that for speed reasons the internal value
5190	is never cleared on success, so using the value returned by L</error_diag>
5191	in normal cases - when no error occurred - may cause unexpected results.
5192
5193	If the constructor failed, the cause can be found using L</error_diag> as a
5194	class method, like C<< Text::CSV_PP->error_diag >>.
5195
5196	The C<< $csv->error_diag >> method is automatically invoked upon error when
5197	the contractor was called with L<C<auto_diag>\|/auto_diag> set to C<1> or
5198	C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
5199	C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
5200	EOF> is excluded from L<C<auto_diag>\|/auto_diag> reports.
5201
5202	Errors can be (individually) caught using the L</error> callback.
5203
5204	The errors as described below are available. I have tried to make the error
5205	itself explanatory enough, but more descriptions will be added. For most of
5206	these errors, the first three capitals describe the error category:
5207
5208	=over 2
5209
5210	=item *
5211	INI
5212
5213	Initialization error or option conflict.
5214
5215	=item *
5216	ECR
5217
5218	Carriage-Return related parse error.
5219
5220	=item *
5221	EOF
5222
5223	End-Of-File related parse error.
5224
5225	=item *
5226	EIQ
5227
5228	Parse error inside quotation.
5229
5230	=item *
5231	EIF
5232
5233	Parse error inside field.
5234
5235	=item *
5236	ECB
5237
5238	Combine error.
5239
5240	=item *
5241	EHR
5242
5243	HashRef parse related error.
5244
5245	=back
5246
5247	And below should be the complete list of error codes that can be returned:
5248
5249	=over 2
5250
5251	=item *
5252	1001 "INI - sep_char is equal to quote_char or escape_char"
5253
5254	The L<separation character\|/sep_char> cannot be equal to L<the quotation
5255	character\|/quote_char> or to L<the escape character\|/escape_char>, as this
5256	would invalidate all parsing rules.
5257
5258	=item *
5259	1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
5260
5261	Using the L<C<allow_whitespace>\|/allow_whitespace> attribute when either
5262	L<C<quote_char>\|/quote_char> or L<C<escape_char>\|/escape_char> is equal to
5263	C<SPACE> or C<TAB> is too ambiguous to allow.
5264
5265	=item *
5266	1003 "INI - \r or \n in main attr not allowed"
5267
5268	Using default L<C<eol>\|/eol> characters in either L<C<sep_char>\|/sep_char>,
5269	L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> is not
5270	allowed.
5271
5272	=item *
5273	1004 "INI - callbacks should be undef or a hashref"
5274
5275	The L<C<callbacks>\|/Callbacks> attribute only allows one to be C<undef> or
5276	a hash reference.
5277
5278	=item *
5279	1005 "INI - EOL too long"
5280
5281	The value passed for EOL is exceeding its maximum length (16).
5282
5283	=item *
5284	1006 "INI - SEP too long"
5285
5286	The value passed for SEP is exceeding its maximum length (16).
5287
5288	=item *
5289	1007 "INI - QUOTE too long"
5290
5291	The value passed for QUOTE is exceeding its maximum length (16).
5292
5293	=item *
5294	1008 "INI - SEP undefined"
5295
5296	The value passed for SEP should be defined and not empty.
5297
5298	=item *
5299	1010 "INI - the header is empty"
5300
5301	The header line parsed in the L</header> is empty.
5302
5303	=item *
5304	1011 "INI - the header contains more than one valid separator"
5305
5306	The header line parsed in the L</header> contains more than one (unique)
5307	separator character out of the allowed set of separators.
5308
5309	=item *
5310	1012 "INI - the header contains an empty field"
5311
5312	The header line parsed in the L</header> is contains an empty field.
5313
5314	=item *
5315	1013 "INI - the header contains nun-unique fields"
5316
5317	The header line parsed in the L</header> contains at least two identical
5318	fields.
5319
5320	=item *
5321	1014 "INI - header called on undefined stream"
5322
5323	The header line cannot be parsed from an undefined sources.
5324
5325	=item *
5326	1500 "PRM - Invalid/unsupported argument(s)"
5327
5328	Function or method called with invalid argument(s) or parameter(s).
5329
5330	=item *
5331	1501 "PRM - The key attribute is passed as an unsupported type"
5332
5333	The C<key> attribute is of an unsupported type.
5334
5335	=item *
5336	1502 "PRM - The value attribute is passed without the key attribute"
5337
5338	The C<value> attribute is only allowed when a valid key is given.
5339
5340	=item *
5341	1503 "PRM - The value attribute is passed as an unsupported type"
5342
5343	The C<value> attribute is of an unsupported type.
5344
5345	=item *
5346	2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
5347
5348	When L<C<eol>\|/eol> has been set to anything but the default, like
5349	C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
5350	L<C<quote_char>\|/quote_char>, where the characters following the C<"\r"> do
5351	not make up the L<C<eol>\|/eol> sequence, this is an error.
5352
5353	=item *
5354	2011 "ECR - Characters after end of quoted field"
5355
5356	Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
5357	field and after the closing double-quote, there should be either a new-line
5358	sequence or a separation character.
5359
5360	=item *
5361	2012 "EOF - End of data in parsing input stream"
5362
5363	Self-explaining. End-of-file while inside parsing a stream. Can happen only
5364	when reading from streams with L</getline>, as using L</parse> is done on
5365	strings that are not required to have a trailing L<C<eol>\|/eol>.
5366
5367	=item *
5368	2013 "INI - Specification error for fragments RFC7111"
5369
5370	Invalid specification for URI L</fragment> specification.
5371
5372	=item *
5373	2014 "ENF - Inconsistent number of fields"
5374
5375	Inconsistent number of fields under strict parsing.
5376
5377	=item *
5378	2021 "EIQ - NL char inside quotes, binary off"
5379
5380	Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
5381	has been selected with the constructor.
5382
5383	=item *
5384	2022 "EIQ - CR char inside quotes, binary off"
5385
5386	Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
5387	has been selected with the constructor.
5388
5389	=item *
5390	2023 "EIQ - QUO character not allowed"
5391
5392	Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
5393	will cause this error.
5394
5395	=item *
5396	2024 "EIQ - EOF cannot be escaped, not even inside quotes"
5397
5398	The escape character is not allowed as last character in an input stream.
5399
5400	=item *
5401	2025 "EIQ - Loose unescaped escape"
5402
5403	An escape character should escape only characters that need escaping.
5404
5405	Allowing the escape for other characters is possible with the attribute
5406	L</allow_loose_escape>.
5407
5408	=item *
5409	2026 "EIQ - Binary character inside quoted field, binary off"
5410
5411	Binary characters are not allowed by default. Exceptions are fields that
5412	contain valid UTF-8, that will automatically be upgraded if the content is
5413	valid UTF-8. Set L<C<binary>\|/binary> to C<1> to accept binary data.
5414
5415	=item *
5416	2027 "EIQ - Quoted field not terminated"
5417
5418	When parsing a field that started with a quotation character, the field is
5419	expected to be closed with a quotation character. When the parsed line is
5420	exhausted before the quote is found, that field is not terminated.
5421
5422	=item *
5423	2030 "EIF - NL char inside unquoted verbatim, binary off"
5424
5425	=item *
5426	2031 "EIF - CR char is first char of field, not part of EOL"
5427
5428	=item *
5429	2032 "EIF - CR char inside unquoted, not part of EOL"
5430
5431	=item *
5432	2034 "EIF - Loose unescaped quote"
5433
5434	=item *
5435	2035 "EIF - Escaped EOF in unquoted field"
5436
5437	=item *
5438	2036 "EIF - ESC error"
5439
5440	=item *
5441	2037 "EIF - Binary character in unquoted field, binary off"
5442
5443	=item *
5444	2110 "ECB - Binary character in Combine, binary off"
5445
5446	=item *
5447	2200 "EIO - print to IO failed. See errno"
5448
5449	=item *
5450	3001 "EHR - Unsupported syntax for column_names ()"
5451
5452	=item *
5453	3002 "EHR - getline_hr () called before column_names ()"
5454
5455	=item *
5456	3003 "EHR - bind_columns () and column_names () fields count mismatch"
5457
5458	=item *
5459	3004 "EHR - bind_columns () only accepts refs to scalars"
5460
5461	=item *
5462	3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
5463
5464	=item *
5465	3007 "EHR - bind_columns needs refs to writable scalars"
5466
5467	=item *
5468	3008 "EHR - unexpected error in bound fields"
5469
5470	=item *
5471	3009 "EHR - print_hr () called before column_names ()"
5472
5473	=item *
5474	3010 "EHR - print_hr () called with invalid arguments"
5475
5476	=back
5477
5478	=head1 SEE ALSO
5479
5480	L<Text::CSV_XS>, L<Text::CSV>
5481
5482	Older versions took many regexp from L<http://www.din.or.jp/~ohzaki/perl.htm>
5483
5484	=head1 AUTHOR
5485
5486	Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5487	Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5488
5489	Text::CSV_XS was written by E<lt>joe[at]ispsoft.deE<gt>
5490	and maintained by E<lt>h.m.brand[at]xs4all.nlE<gt>.
5491
5492	Text::CSV was written by E<lt>alan[at]mfgrtl.comE<gt>.
5493
5494	=head1 COPYRIGHT AND LICENSE
5495
5496	Copyright 2017- by Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
5497	Copyright 2005-2015 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
5498
5499	Most of the code and doc is directly taken from the pure perl part of
5500	Text::CSV_XS.
5501
5502	Copyright (C) 2007-2016 H.Merijn Brand. All rights reserved.
5503	Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
5504	Copyright (C) 1997 Alan Citterman. All rights reserved.
5505
5506	This library is free software; you can redistribute it and/or modify
5507	it under the same terms as Perl itself.
5508
5509	=cut

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/perllib/cpan/Text/CSV_PP.pm@ 33235

Download in other formats: