Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

CSV_PP.pm@ 33235

Last change on this file since 33235 was 33235, checked in by davidb, 5 years ago
CPAN module for processing CSV files
File size: 168.2 KB

Rev	Line
[33235]	1	package Text::CSV_PP;
	2
	3	################################################################################
	4	#
	5	# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
	6	#
	7	################################################################################
	8	require 5.006001;
	9
	10	use strict;
	11	use Exporter ();
	12	use vars qw($VERSION @ISA @EXPORT_OK);
	13	use Carp;
	14
	15	$VERSION = '1.99';
	16	@ISA = qw(Exporter);
	17	@EXPORT_OK = qw(csv);
	18
	19	sub PV { 0 }
	20	sub IV { 1 }
	21	sub NV { 2 }
	22
	23	sub IS_QUOTED () { 0x0001; }
	24	sub IS_BINARY () { 0x0002; }
	25	sub IS_ERROR () { 0x0004; }
	26	sub IS_MISSING () { 0x0010; }
	27
	28	sub HOOK_ERROR () { 0x0001; }
	29	sub HOOK_AFTER_PARSE () { 0x0002; }
	30	sub HOOK_BEFORE_PRINT () { 0x0004; }
	31
	32	sub useIO_EOF () { 0x0010; }
	33
	34	my $ERRORS = {
	35	# Generic errors
	36	1000 => "INI - constructor failed",
	37	1001 => "INI - sep_char is equal to quote_char or escape_char",
	38	1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
	39	1003 => "INI - \\r or \\n in main attr not allowed",
	40	1004 => "INI - callbacks should be undef or a hashref",
	41	1005 => "INI - EOL too long",
	42	1006 => "INI - SEP too long",
	43	1007 => "INI - QUOTE too long",
	44	1008 => "INI - SEP undefined",
	45
	46	1010 => "INI - the header is empty",
	47	1011 => "INI - the header contains more than one valid separator",
	48	1012 => "INI - the header contains an empty field",
	49	1013 => "INI - the header contains nun-unique fields",
	50	1014 => "INI - header called on undefined stream",
	51
	52	# Syntax errors
	53	1500 => "PRM - Invalid/unsupported arguments(s)",
	54	1501 => "PRM - The key attribute is passed as an unsupported type",
	55	1502 => "PRM - The value attribute is passed without the key attribute",
	56	1503 => "PRM - The value attribute is passed as an unsupported type",
	57
	58	# Parse errors
	59	2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
	60	2011 => "ECR - Characters after end of quoted field",
	61	2012 => "EOF - End of data in parsing input stream",
	62	2013 => "ESP - Specification error for fragments RFC7111",
	63	2014 => "ENF - Inconsistent number of fields",
	64
	65	# EIQ - Error Inside Quotes
	66	2021 => "EIQ - NL char inside quotes, binary off",
	67	2022 => "EIQ - CR char inside quotes, binary off",
	68	2023 => "EIQ - QUO character not allowed",
	69	2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
	70	2025 => "EIQ - Loose unescaped escape",
	71	2026 => "EIQ - Binary character inside quoted field, binary off",
	72	2027 => "EIQ - Quoted field not terminated",
	73
	74	# EIF - Error Inside Field
	75	2030 => "EIF - NL char inside unquoted verbatim, binary off",
	76	2031 => "EIF - CR char is first char of field, not part of EOL",
	77	2032 => "EIF - CR char inside unquoted, not part of EOL",
	78	2034 => "EIF - Loose unescaped quote",
	79	2035 => "EIF - Escaped EOF in unquoted field",
	80	2036 => "EIF - ESC error",
	81	2037 => "EIF - Binary character in unquoted field, binary off",
	82
	83	# Combine errors
	84	2110 => "ECB - Binary character in Combine, binary off",
	85
	86	# IO errors
	87	2200 => "EIO - print to IO failed. See errno",
	88
	89	# Hash-Ref errors
	90	3001 => "EHR - Unsupported syntax for column_names ()",
	91	3002 => "EHR - getline_hr () called before column_names ()",
	92	3003 => "EHR - bind_columns () and column_names () fields count mismatch",
	93	3004 => "EHR - bind_columns () only accepts refs to scalars",
	94	3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
	95	3007 => "EHR - bind_columns needs refs to writable scalars",
	96	3008 => "EHR - unexpected error in bound fields",
	97	3009 => "EHR - print_hr () called before column_names ()",
	98	3010 => "EHR - print_hr () called with invalid arguments",
	99
	100	4001 => "PRM - The key does not exist as field in the data",
	101
	102	0 => "",
	103	};
	104
	105	BEGIN {
	106	if ( $] < 5.006 ) {
	107	$INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
	108	no strict 'refs';
	109	*{"utf8::is_utf8"} = sub { 0; };
	110	*{"utf8::decode"} = sub { };
	111	}
	112	elsif ( $] < 5.008 ) {
	113	no strict 'refs';
	114	*{"utf8::is_utf8"} = sub { 0; };
	115	*{"utf8::decode"} = sub { };
	116	*{"utf8::encode"} = sub { };
	117	}
	118	elsif ( !defined &utf8::is_utf8 ) {
	119	require Encode;
	120	utf8::is_utf8 = Encode::is_utf8;
	121	}
	122
	123	eval q\| require Scalar::Util \|;
	124	if ( $@ ) {
	125	eval q\| require B \|;
	126	if ( $@ ) {
	127	Carp::croak $@;
	128	}
	129	else {
	130	my %tmap = qw(
	131	B::NULL SCALAR
	132	B::HV HASH
	133	B::AV ARRAY
	134	B::CV CODE
	135	B::IO IO
	136	B::GV GLOB
	137	B::REGEXP REGEXP
	138	);
	139	*Scalar::Util::reftype = sub (\$) {
	140	my $r = shift;
	141	return undef unless length(ref($r));
	142	my $t = ref(B::svref_2object($r));
	143	return
	144	exists $tmap{$t} ? $tmap{$t}
	145	: length(ref($$r)) ? 'REF'
	146	: 'SCALAR';
	147	};
	148	*Scalar::Util::readonly = sub (\$) {
	149	my $b = B::svref_2object( $_[0] );
	150	$b->FLAGS & 0x00800000; # SVf_READONLY?
	151	};
	152	}
	153	}
	154	}
	155
	156	################################################################################
	157	#
	158	# Common pure perl methods, taken almost directly from Text::CSV_XS.
	159	# (These should be moved into a common class eventually, so that
	160	# both XS and PP don't need to apply the same changes.)
	161	#
	162	################################################################################
	163
	164	################################################################################
	165	# version
	166	################################################################################
	167
	168	sub version {
	169	return $VERSION;
	170	}
	171
	172	################################################################################
	173	# new
	174	################################################################################
	175
	176	my %def_attr = (
	177	eol => '',
	178	sep_char => ',',
	179	quote_char => '"',
	180	escape_char => '"',
	181	binary => 0,
	182	decode_utf8 => 1,
	183	auto_diag => 0,
	184	diag_verbose => 0,
	185	strict => 0,
	186	blank_is_undef => 0,
	187	empty_is_undef => 0,
	188	allow_whitespace => 0,
	189	allow_loose_quotes => 0,
	190	allow_loose_escapes => 0,
	191	allow_unquoted_escape => 0,
	192	always_quote => 0,
	193	quote_empty => 0,
	194	quote_space => 1,
	195	quote_binary => 1,
	196	escape_null => 1,
	197	keep_meta_info => 0,
	198	verbatim => 0,
	199	formula => 0,
	200	undef_str => undef,
	201	types => undef,
	202	callbacks => undef,
	203
	204	_EOF => 0,
	205	_RECNO => 0,
	206	_STATUS => undef,
	207	_FIELDS => undef,
	208	_FFLAGS => undef,
	209	_STRING => undef,
	210	_ERROR_INPUT => undef,
	211	_COLUMN_NAMES => undef,
	212	_BOUND_COLUMNS => undef,
	213	_AHEAD => undef,
	214
	215	ENCODING => undef,
	216	);
	217
	218	my %attr_alias = (
	219	quote_always => "always_quote",
	220	verbose_diag => "diag_verbose",
	221	quote_null => "escape_null",
	222	escape => "escape_char",
	223	);
	224
	225	my $last_new_error = Text::CSV_PP->SetDiag(0);
	226	my $last_error;
	227
	228	# NOT a method: is also used before bless
	229	sub _unhealthy_whitespace {
	230	my ($self, $aw) = @_;
	231	$aw or return 0; # no checks needed without allow_whitespace
	232
	233	my $quo = $self->{quote};
	234	defined $quo && length ($quo) or $quo = $self->{quote_char};
	235	my $esc = $self->{escape_char};
	236
	237	defined $quo && $quo =~ m/^[ \t]/ and return 1002;
	238	defined $esc && $esc =~ m/^[ \t]/ and return 1002;
	239
	240	return 0;
	241	}
	242
	243	sub _check_sanity {
	244	my $self = shift;
	245
	246	my $eol = $self->{eol};
	247	my $sep = $self->{sep};
	248	defined $sep && length ($sep) or $sep = $self->{sep_char};
	249	my $quo = $self->{quote};
	250	defined $quo && length ($quo) or $quo = $self->{quote_char};
	251	my $esc = $self->{escape_char};
	252
	253	# use DP;::diag ("SEP: '", DPeek ($sep),
	254	# "', QUO: '", DPeek ($quo),
	255	# "', ESC: '", DPeek ($esc),"'");
	256
	257	# sep_char should not be undefined
	258	$sep ne "" or return 1008;
	259	length ($sep) > 16 and return 1006;
	260	$sep =~ m/[\r\n]/ and return 1003;
	261
	262	if (defined $quo) {
	263	$quo eq $sep and return 1001;
	264	length ($quo) > 16 and return 1007;
	265	$quo =~ m/[\r\n]/ and return 1003;
	266	}
	267	if (defined $esc) {
	268	$esc eq $sep and return 1001;
	269	$esc =~ m/[\r\n]/ and return 1003;
	270	}
	271	if (defined $eol) {
	272	length ($eol) > 16 and return 1005;
	273	}
	274
	275	return _unhealthy_whitespace ($self, $self->{allow_whitespace});
	276	}
	277
	278	sub known_attributes {
	279	sort grep !m/^_/ => "sep", "quote", keys %def_attr;
	280	}
	281
	282	sub new {
	283	$last_new_error = Text::CSV_PP->SetDiag(1000,
	284	'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);');
	285
	286	my $proto = shift;
	287	my $class = ref ($proto) \|\| $proto or return;
	288	@_ > 0 && ref $_[0] ne "HASH" and return;
	289	my $attr = shift \|\| {};
	290	my %attr = map {
	291	my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
	292	exists $attr_alias{$k} and $k = $attr_alias{$k};
	293	$k => $attr->{$_};
	294	} keys %$attr;
	295
	296	my $sep_aliased = 0;
	297	if (exists $attr{sep}) {
	298	$attr{sep_char} = delete $attr{sep};
	299	$sep_aliased = 1;
	300	}
	301	my $quote_aliased = 0;
	302	if (exists $attr{quote}) {
	303	$attr{quote_char} = delete $attr{quote};
	304	$quote_aliased = 1;
	305	}
	306	exists $attr{formula_handling} and
	307	$attr{formula} = delete $attr{formula_handling};
	308	exists $attr{formula} and
	309	$attr{formula} = _supported_formula (undef, $attr{formula});
	310	for (keys %attr) {
	311	if (m/^[a-z]/ && exists $def_attr{$_}) {
	312	# uncoverable condition false
	313	defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
	314	next;
	315	}
	316	# croak?
	317	$last_new_error = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
	318	$attr{auto_diag} and error_diag ();
	319	return;
	320	}
	321	if ($sep_aliased) {
	322	my @b = unpack "U0C*", $attr{sep_char};
	323	if (@b > 1) {
	324	$attr{sep} = $attr{sep_char};
	325	$attr{sep_char} = "\0";
	326	}
	327	else {
	328	$attr{sep} = undef;
	329	}
	330	}
	331	if ($quote_aliased and defined $attr{quote_char}) {
	332	my @b = unpack "U0C*", $attr{quote_char};
	333	if (@b > 1) {
	334	$attr{quote} = $attr{quote_char};
	335	$attr{quote_char} = "\0";
	336	}
	337	else {
	338	$attr{quote} = undef;
	339	}
	340	}
	341
	342	my $self = { %def_attr, %attr };
	343	if (my $ec = _check_sanity ($self)) {
	344	$last_new_error = Text::CSV_PP->SetDiag($ec);
	345	$attr{auto_diag} and error_diag ();
	346	return;
	347	}
	348	if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
	349	Carp::carp "The 'callbacks' attribute is set but is not a hash: ignored\n";
	350	$self->{callbacks} = undef;
	351	}
	352
	353	$last_new_error = Text::CSV_PP->SetDiag(0);
	354	defined $\ && !exists $attr{eol} and $self->{eol} = $\;
	355	bless $self, $class;
	356	defined $self->{types} and $self->types ($self->{types});
	357	$self;
	358	}
	359
	360	# Keep in sync with XS!
	361	my %_cache_id = ( # Only expose what is accessed from within PM
	362	quote_char => 0,
	363	escape_char => 1,
	364	sep_char => 2,
	365	sep => 39, # 39 .. 55
	366	binary => 3,
	367	keep_meta_info => 4,
	368	always_quote => 5,
	369	allow_loose_quotes => 6,
	370	allow_loose_escapes => 7,
	371	allow_unquoted_escape => 8,
	372	allow_whitespace => 9,
	373	blank_is_undef => 10,
	374	eol => 11,
	375	quote => 15,
	376	verbatim => 22,
	377	empty_is_undef => 23,
	378	auto_diag => 24,
	379	diag_verbose => 33,
	380	quote_space => 25,
	381	quote_empty => 37,
	382	quote_binary => 32,
	383	escape_null => 31,
	384	decode_utf8 => 35,
	385	_has_ahead => 30,
	386	_has_hooks => 36,
	387	_is_bound => 26, # 26 .. 29
	388	formula => 38,
	389	strict => 42,
	390	undef_str => 46,
	391	);
	392
	393	my %_hidden_cache_id = qw(
	394	sep_len 38
	395	eol_len 12
	396	eol_is_cr 13
	397	quo_len 16
	398	has_error_input 34
	399	);
	400
	401	my %_reverse_cache_id = (
	402	map({$_cache_id{$_} => $_} keys %_cache_id),
	403	map({$_hidden_cache_id{$_} => $_} keys %_hidden_cache_id),
	404	);
	405
	406	# A `character'
	407	sub _set_attr_C {
	408	my ($self, $name, $val, $ec) = @_;
	409	defined $val or $val = 0;
	410	utf8::decode ($val);
	411	$self->{$name} = $val;
	412	$ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
	413	$self->_cache_set ($_cache_id{$name}, $val);
	414	}
	415
	416	# A flag
	417	sub _set_attr_X {
	418	my ($self, $name, $val) = @_;
	419	defined $val or $val = 0;
	420	$self->{$name} = $val;
	421	$self->_cache_set ($_cache_id{$name}, 0 + $val);
	422	}
	423
	424	# A number
	425	sub _set_attr_N {
	426	my ($self, $name, $val) = @_;
	427	$self->{$name} = $val;
	428	$self->_cache_set ($_cache_id{$name}, 0 + $val);
	429	}
	430
	431	# Accessor methods.
	432	# It is unwise to change them halfway through a single file!
	433	sub quote_char {
	434	my $self = shift;
	435	if (@_) {
	436	$self->_set_attr_C ("quote_char", shift);
	437	$self->_cache_set ($_cache_id{quote}, "");
	438	}
	439	$self->{quote_char};
	440	}
	441
	442	sub quote {
	443	my $self = shift;
	444	if (@_) {
	445	my $quote = shift;
	446	defined $quote or $quote = "";
	447	utf8::decode ($quote);
	448	my @b = unpack "U0C*", $quote;
	449	if (@b > 1) {
	450	@b > 16 and croak ($self->SetDiag (1007));
	451	$self->quote_char ("\0");
	452	}
	453	else {
	454	$self->quote_char ($quote);
	455	$quote = "";
	456	}
	457	$self->{quote} = $quote;
	458
	459	my $ec = _check_sanity ($self);
	460	$ec and croak ($self->SetDiag ($ec));
	461
	462	$self->_cache_set ($_cache_id{quote}, $quote);
	463	}
	464	my $quote = $self->{quote};
	465	defined $quote && length ($quote) ? $quote : $self->{quote_char};
	466	}
	467
	468	sub escape_char {
	469	my $self = shift;
	470	if (@_) {
	471	my $ec = shift;
	472	$self->_set_attr_C ("escape_char", $ec);
	473	$ec or $self->_set_attr_X ("escape_null", 0);
	474	}
	475	$self->{escape_char};
	476	}
	477
	478	sub sep_char {
	479	my $self = shift;
	480	if (@_) {
	481	$self->_set_attr_C ("sep_char", shift);
	482	$self->_cache_set ($_cache_id{sep}, "");
	483	}
	484	$self->{sep_char};
	485	}
	486
	487	sub sep {
	488	my $self = shift;
	489	if (@_) {
	490	my $sep = shift;
	491	defined $sep or $sep = "";
	492	utf8::decode ($sep);
	493	my @b = unpack "U0C*", $sep;
	494	if (@b > 1) {
	495	@b > 16 and croak ($self->SetDiag (1006));
	496	$self->sep_char ("\0");
	497	}
	498	else {
	499	$self->sep_char ($sep);
	500	$sep = "";
	501	}
	502	$self->{sep} = $sep;
	503
	504	my $ec = _check_sanity ($self);
	505	$ec and croak ($self->SetDiag ($ec));
	506
	507	$self->_cache_set ($_cache_id{sep}, $sep);
	508	}
	509	my $sep = $self->{sep};
	510	defined $sep && length ($sep) ? $sep : $self->{sep_char};
	511	}
	512
	513	sub eol {
	514	my $self = shift;
	515	if (@_) {
	516	my $eol = shift;
	517	defined $eol or $eol = "";
	518	length ($eol) > 16 and croak ($self->SetDiag (1005));
	519	$self->{eol} = $eol;
	520	$self->_cache_set ($_cache_id{eol}, $eol);
	521	}
	522	$self->{eol};
	523	}
	524
	525	sub always_quote {
	526	my $self = shift;
	527	@_ and $self->_set_attr_X ("always_quote", shift);
	528	$self->{always_quote};
	529	}
	530
	531	sub quote_space {
	532	my $self = shift;
	533	@_ and $self->_set_attr_X ("quote_space", shift);
	534	$self->{quote_space};
	535	}
	536
	537	sub quote_empty {
	538	my $self = shift;
	539	@_ and $self->_set_attr_X ("quote_empty", shift);
	540	$self->{quote_empty};
	541	}
	542
	543	sub escape_null {
	544	my $self = shift;
	545	@_ and $self->_set_attr_X ("escape_null", shift);
	546	$self->{escape_null};
	547	}
	548
	549	sub quote_null { goto &escape_null; }
	550
	551	sub quote_binary {
	552	my $self = shift;
	553	@_ and $self->_set_attr_X ("quote_binary", shift);
	554	$self->{quote_binary};
	555	}
	556
	557	sub binary {
	558	my $self = shift;
	559	@_ and $self->_set_attr_X ("binary", shift);
	560	$self->{binary};
	561	}
	562
	563	sub strict {
	564	my $self = shift;
	565	@_ and $self->_set_attr_X ("strict", shift);
	566	$self->{strict};
	567	}
	568
	569	sub _SetDiagInfo {
	570	my ($self, $err, $msg) = @_;
	571	$self->SetDiag ($err);
	572	my $em = $self->error_diag;
	573	$em =~ s/^\d+$// and $msg =~ s/^/# /;
	574	my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
	575	join $sep => grep m/\S\S\S/ => $em, $msg;
	576	}
	577
	578	sub _supported_formula {
	579	my ($self, $f) = @_;
	580	defined $f or return 5;
	581	$f =~ m/^(?: 0 \| none )$/xi ? 0 :
	582	$f =~ m/^(?: 1 \| die )$/xi ? 1 :
	583	$f =~ m/^(?: 2 \| croak )$/xi ? 2 :
	584	$f =~ m/^(?: 3 \| diag )$/xi ? 3 :
	585	$f =~ m/^(?: 4 \| empty \| )$/xi ? 4 :
	586	$f =~ m/^(?: 5 \| undef )$/xi ? 5 : do {
	587	$self \|\|= "Text::CSV_PP";
	588	croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
	589	};
	590	}
	591
	592	sub formula {
	593	my $self = shift;
	594	@_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
	595	[qw( none die croak diag empty undef )]->[_supported_formula ($self, $self->{formula})];
	596	}
	597	sub formula_handling {
	598	my $self = shift;
	599	$self->formula (@_);
	600	}
	601
	602	sub decode_utf8 {
	603	my $self = shift;
	604	@_ and $self->_set_attr_X ("decode_utf8", shift);
	605	$self->{decode_utf8};
	606	}
	607
	608	sub keep_meta_info {
	609	my $self = shift;
	610	if (@_) {
	611	my $v = shift;
	612	!defined $v \|\| $v eq "" and $v = 0;
	613	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
	614	$self->_set_attr_X ("keep_meta_info", $v);
	615	}
	616	$self->{keep_meta_info};
	617	}
	618
	619	sub allow_loose_quotes {
	620	my $self = shift;
	621	@_ and $self->_set_attr_X ("allow_loose_quotes", shift);
	622	$self->{allow_loose_quotes};
	623	}
	624
	625	sub allow_loose_escapes {
	626	my $self = shift;
	627	@_ and $self->_set_attr_X ("allow_loose_escapes", shift);
	628	$self->{allow_loose_escapes};
	629	}
	630
	631	sub allow_whitespace {
	632	my $self = shift;
	633	if (@_) {
	634	my $aw = shift;
	635	_unhealthy_whitespace ($self, $aw) and
	636	croak ($self->SetDiag (1002));
	637	$self->_set_attr_X ("allow_whitespace", $aw);
	638	}
	639	$self->{allow_whitespace};
	640	}
	641
	642	sub allow_unquoted_escape {
	643	my $self = shift;
	644	@_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
	645	$self->{allow_unquoted_escape};
	646	}
	647
	648	sub blank_is_undef {
	649	my $self = shift;
	650	@_ and $self->_set_attr_X ("blank_is_undef", shift);
	651	$self->{blank_is_undef};
	652	}
	653
	654	sub empty_is_undef {
	655	my $self = shift;
	656	@_ and $self->_set_attr_X ("empty_is_undef", shift);
	657	$self->{empty_is_undef};
	658	}
	659
	660	sub verbatim {
	661	my $self = shift;
	662	@_ and $self->_set_attr_X ("verbatim", shift);
	663	$self->{verbatim};
	664	}
	665
	666	sub undef_str {
	667	my $self = shift;
	668	if (@_) {
	669	my $v = shift;
	670	$self->{undef_str} = defined $v ? "$v" : undef;
	671	$self->_cache_set ($_cache_id{undef_str}, $self->{undef_str});
	672	}
	673	$self->{undef_str};
	674	}
	675
	676	sub auto_diag {
	677	my $self = shift;
	678	if (@_) {
	679	my $v = shift;
	680	!defined $v \|\| $v eq "" and $v = 0;
	681	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
	682	$self->_set_attr_X ("auto_diag", $v);
	683	}
	684	$self->{auto_diag};
	685	}
	686
	687	sub diag_verbose {
	688	my $self = shift;
	689	if (@_) {
	690	my $v = shift;
	691	!defined $v \|\| $v eq "" and $v = 0;
	692	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
	693	$self->_set_attr_X ("diag_verbose", $v);
	694	}
	695	$self->{diag_verbose};
	696	}
	697
	698	################################################################################
	699	# status
	700	################################################################################
	701
	702	sub status {
	703	$_[0]->{_STATUS};
	704	}
	705
	706	sub eof {
	707	$_[0]->{_EOF};
	708	}
	709
	710	sub types {
	711	my $self = shift;
	712
	713	if (@_) {
	714	if (my $types = shift) {
	715	$self->{'_types'} = join("", map{ chr($_) } @$types);
	716	$self->{'types'} = $types;
	717	}
	718	else {
	719	delete $self->{'types'};
	720	delete $self->{'_types'};
	721	undef;
	722	}
	723	}
	724	else {
	725	$self->{'types'};
	726	}
	727	}
	728
	729	sub callbacks {
	730	my $self = shift;
	731	if (@_) {
	732	my $cb;
	733	my $hf = 0x00;
	734	if (defined $_[0]) {
	735	grep { !defined } @_ and croak ($self->SetDiag (1004));
	736	$cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
	737	: @_ % 2 == 0 ? { @_ }
	738	: croak ($self->SetDiag (1004));
	739	foreach my $cbk (keys %$cb) {
	740	# A key cannot be a ref. That would be stored as the *string
	741	# 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
	742	$cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
	743	croak ($self->SetDiag (1004));
	744	}
	745	exists $cb->{error} and $hf \|= 0x01;
	746	exists $cb->{after_parse} and $hf \|= 0x02;
	747	exists $cb->{before_print} and $hf \|= 0x04;
	748	}
	749	elsif (@_ > 1) {
	750	# (undef, whatever)
	751	croak ($self->SetDiag (1004));
	752	}
	753	$self->_set_attr_X ("_has_hooks", $hf);
	754	$self->{callbacks} = $cb;
	755	}
	756	$self->{callbacks};
	757	}
	758
	759	################################################################################
	760	# error_diag
	761	################################################################################
	762
	763	sub error_diag {
	764	my $self = shift;
	765	my @diag = (0 + $last_new_error, $last_new_error, 0, 0, 0);
	766
	767	# Docs state to NEVER use UNIVERSAL::isa, because it will never call an
	768	# overridden isa method in any class. Well, that is exacly what I want here
	769	if ($self && ref $self && # Not a class method or direct call
	770	UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{_ERROR_DIAG}) {
	771	$diag[0] = 0 + $self->{_ERROR_DIAG};
	772	$diag[1] = $self->{_ERROR_DIAG};
	773	$diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
	774	$diag[3] = $self->{_RECNO};
	775	$diag[4] = $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
	776
	777	$diag[0] && $self->{callbacks} && $self->{callbacks}{error} and
	778	return $self->{callbacks}{error}->(@diag);
	779	}
	780
	781	my $context = wantarray;
	782
	783	unless (defined $context) { # Void context, auto-diag
	784	if ($diag[0] && $diag[0] != 2012) {
	785	my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
	786	$diag[4] and $msg =~ s/$/ field $diag[4]/;
	787
	788	unless ($self && ref $self) { # auto_diag
	789	# called without args in void context
	790	warn $msg;
	791	return;
	792	}
	793
	794	if ($self->{diag_verbose} and $self->{_ERROR_INPUT}) {
	795	$msg .= "$self->{_ERROR_INPUT}'\n";
	796	$msg .= " " x ($diag[2] - 1);
	797	$msg .= "^\n";
	798	}
	799
	800	my $lvl = $self->{auto_diag};
	801	if ($lvl < 2) {
	802	my @c = caller (2);
	803	if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
	804	my $hints = $c[10];
	805	(exists $hints->{autodie} && $hints->{autodie} or
	806	exists $hints->{"guard Fatal"} &&
	807	!exists $hints->{"no Fatal"}) and
	808	$lvl++;
	809	# Future releases of autodie will probably set $^H{autodie}
	810	# to "autodie @args", like "autodie :all" or "autodie open"
	811	# so we can/should check for "open" or "new"
	812	}
	813	}
	814	$lvl > 1 ? die $msg : warn $msg;
	815	}
	816	return;
	817	}
	818
	819	return $context ? @diag : $diag[1];
	820	}
	821
	822	sub record_number {
	823	return shift->{_RECNO};
	824	}
	825
	826	################################################################################
	827	# string
	828	################################################################################
	829
	830	*string = \&_string;
	831	sub _string {
	832	defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef;
	833	}
	834
	835	################################################################################
	836	# fields
	837	################################################################################
	838
	839	*fields = \&_fields;
	840	sub _fields {
	841	ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef;
	842	}
	843
	844	################################################################################
	845	# meta_info
	846	################################################################################
	847
	848	sub meta_info {
	849	$_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef;
	850	}
	851
	852	sub is_quoted {
	853	return unless (defined $_[0]->{_FFLAGS});
	854	return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
	855
	856	$_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0;
	857	}
	858
	859	sub is_binary {
	860	return unless (defined $_[0]->{_FFLAGS});
	861	return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } );
	862	$_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0;
	863	}
	864
	865	sub is_missing {
	866	my ($self, $idx, $val) = @_;
	867	return unless $self->{keep_meta_info}; # FIXME
	868	$idx < 0 \|\| !ref $self->{_FFLAGS} and return;
	869	$idx >= @{$self->{_FFLAGS}} and return 1;
	870	$self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0;
	871	}
	872
	873	################################################################################
	874	# combine
	875	################################################################################
	876	*combine = \&_combine;
	877	sub _combine {
	878	my ($self, @fields) = @_;
	879	my $str = "";
	880	$self->{_FIELDS} = \@fields;
	881	$self->{_STATUS} = (@fields > 0) && $self->__combine(\$str, \@fields, 0);
	882	$self->{_STRING} = \$str;
	883	$self->{_STATUS};
	884	}
	885
	886	################################################################################
	887	# parse
	888	################################################################################
	889	*parse = \&_parse;
	890	sub _parse {
	891	my ($self, $str) = @_;
	892
	893	ref $str and croak ($self->SetDiag (1500));
	894
	895	my $fields = [];
	896	my $fflags = [];
	897	$self->{_STRING} = \$str;
	898	if (defined $str && $self->__parse ($fields, $fflags, $str, 0)) {
	899	$self->{_FIELDS} = $fields;
	900	$self->{_FFLAGS} = $fflags;
	901	$self->{_STATUS} = 1;
	902	}
	903	else {
	904	$self->{_FIELDS} = undef;
	905	$self->{_FFLAGS} = undef;
	906	$self->{_STATUS} = 0;
	907	}
	908	$self->{_STATUS};
	909	}
	910
	911	sub column_names {
	912	my ( $self, @columns ) = @_;
	913
	914	@columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
	915	@columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef;
	916
	917	if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) {
	918	@columns = @{ $columns[0] };
	919	}
	920	elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) {
	921	croak $self->SetDiag( 3001 );
	922	}
	923
	924	if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) {
	925	croak $self->SetDiag( 3003 );
	926	}
	927
	928	$self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ];
	929	@{ $self->{_COLUMN_NAMES} };
	930	}
	931
	932	sub header {
	933	my ($self, $fh, @args) = @_;
	934
	935	$fh or croak ($self->SetDiag (1014));
	936
	937	my (@seps, %args);
	938	for (@args) {
	939	if (ref $_ eq "ARRAY") {
	940	push @seps, @$_;
	941	next;
	942	}
	943	if (ref $_ eq "HASH") {
	944	%args = %$_;
	945	next;
	946	}
	947	croak (q{usage: $csv->header ($fh, [ seps ], { options })});
	948	}
	949
	950	defined $args{munge} && !defined $args{munge_column_names} and
	951	$args{munge_column_names} = $args{munge}; # munge as alias
	952	defined $args{detect_bom} or $args{detect_bom} = 1;
	953	defined $args{set_column_names} or $args{set_column_names} = 1;
	954	defined $args{munge_column_names} or $args{munge_column_names} = "lc";
	955
	956	# Reset any previous leftovers
	957	$self->{_RECNO} = 0;
	958	$self->{_AHEAD} = undef;
	959	$self->{_COLUMN_NAMES} = undef if $args{set_column_names};
	960	$self->{_BOUND_COLUMNS} = undef if $args{set_column_names};
	961	$self->_cache_set($_cache_id{'_has_ahead'}, 0);
	962
	963	if (defined $args{sep_set}) {
	964	ref $args{sep_set} eq "ARRAY" or
	965	croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
	966	@seps = @{$args{sep_set}};
	967	}
	968
	969	$^O eq "MSWin32" and binmode $fh;
	970	my $hdr = <$fh>;
	971	# check if $hdr can be empty here, I don't think so
	972	defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
	973
	974	my %sep;
	975	@seps or @seps = (",", ";");
	976	foreach my $sep (@seps) {
	977	index ($hdr, $sep) >= 0 and $sep{$sep}++;
	978	}
	979
	980	keys %sep >= 2 and croak ($self->SetDiag (1011));
	981
	982	$self->sep (keys %sep);
	983	my $enc = "";
	984	if ($args{detect_bom}) { # UTF-7 is not supported
	985	if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
	986	elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
	987	elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
	988	elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
	989	elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
	990	elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
	991	elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
	992	elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
	993	elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
	994	elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
	995	elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
	996
	997	$self->{ENCODING} = uc $enc;
	998
	999	$hdr eq "" and croak ($self->SetDiag (1010));
	1000
	1001	if ($enc) {
	1002	if ($enc =~ m/([13]).le$/) {
	1003	my $l = 0 + $1;
	1004	my $x;
	1005	$hdr .= "\0" x $l;
	1006	read $fh, $x, $l;
	1007	}
	1008	if ($enc ne "utf-8") {
	1009	require Encode;
	1010	$hdr = Encode::decode ($enc, $hdr);
	1011	}
	1012	binmode $fh, ":encoding($enc)";
	1013	}
	1014	}
	1015
	1016	my ($ahead, $eol);
	1017	if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
	1018	$eol = $2;
	1019	$ahead = $3;
	1020	}
	1021
	1022	$args{munge_column_names} eq "lc" and $hdr = lc $hdr;
	1023	$args{munge_column_names} eq "uc" and $hdr = uc $hdr;
	1024
	1025	my $hr = \$hdr; # Will cause croak on perl-5.6.x
	1026	open my $h, "<", $hr or croak ($self->SetDiag (1010));
	1027
	1028	my $row = $self->getline ($h) or croak;
	1029	close $h;
	1030
	1031	if ($ahead) { # Must be after getline, which creates the cache
	1032	$self->_cache_set ($_cache_id{_has_ahead}, 1);
	1033	$self->{_AHEAD} = $ahead;
	1034	$eol =~ m/^\r([^\n]\|\z)/ and $self->eol ($eol);
	1035	}
	1036
	1037	my @hdr = @$row;
	1038	ref $args{munge_column_names} eq "CODE" and
	1039	@hdr = map { $args{munge_column_names}->($_) } @hdr;
	1040	ref $args{munge_column_names} eq "HASH" and
	1041	@hdr = map { $args{munge_column_names}->{$_} \|\| $_ } @hdr;
	1042	my %hdr; $hdr{$_}++ for @hdr;
	1043	exists $hdr{""} and croak ($self->SetDiag (1012));
	1044	unless (keys %hdr == @hdr) {
	1045	croak ($self->_SetDiagInfo (1013, join ", " =>
	1046	map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
	1047	}
	1048	$args{set_column_names} and $self->column_names (@hdr);
	1049	wantarray ? @hdr : $self;
	1050	}
	1051
	1052	sub bind_columns {
	1053	my ( $self, @refs ) = @_;
	1054
	1055	@refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
	1056	@refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef;
	1057
	1058	if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) {
	1059	croak $self->SetDiag( 3003 );
	1060	}
	1061
	1062	if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep?
	1063	croak $self->SetDiag( 3004 );
	1064	}
	1065
	1066	$self->_set_attr_N("_is_bound", scalar @refs);
	1067	$self->{_BOUND_COLUMNS} = [ @refs ];
	1068	@refs;
	1069	}
	1070
	1071	sub getline_hr {
	1072	my ($self, @args, %hr) = @_;
	1073	$self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
	1074	my $fr = $self->getline (@args) or return;
	1075	if (ref $self->{_FFLAGS}) { # missing
	1076	$self->{_FFLAGS}[$_] = IS_MISSING
	1077	for (@$fr ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
	1078	@$fr == 1 && (!defined $fr->[0] \|\| $fr->[0] eq "") and
	1079	$self->{_FFLAGS}[0] \|\|= IS_MISSING;
	1080	}
	1081	@hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
	1082	\%hr;
	1083	}
	1084
	1085	sub getline_hr_all {
	1086	my ( $self, $io, @args ) = @_;
	1087
	1088	unless ( $self->{_COLUMN_NAMES} ) {
	1089	croak $self->SetDiag( 3002 );
	1090	}
	1091
	1092	my @cn = @{$self->{_COLUMN_NAMES}};
	1093
	1094	return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ];
	1095	}
	1096
	1097	sub say {
	1098	my ($self, $io, @f) = @_;
	1099	my $eol = $self->eol;
	1100	$eol eq "" and $self->eol ($\ \|\| $/);
	1101	# say ($fh, undef) does not propage actual undef to print ()
	1102	my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
	1103	$self->eol ($eol);
	1104	return $state;
	1105	}
	1106
	1107	sub print_hr {
	1108	my ($self, $io, $hr) = @_;
	1109	$self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
	1110	ref $hr eq "HASH" or croak($self->SetDiag(3010));
	1111	$self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
	1112	}
	1113
	1114	sub fragment {
	1115	my ($self, $io, $spec) = @_;
	1116
	1117	my $qd = qr{\s* [0-9]+ \s* }x; # digit
	1118	my $qs = qr{\s* (?: [0-9]+ \| \* ) \s*}x; # digit or star
	1119	my $qr = qr{$qd (?: - $qs )?}x; # range
	1120	my $qc = qr{$qr (?: ; $qr )*}x; # list
	1121	defined $spec && $spec =~ m{^ \s*
	1122	\x23 ? \s* # optional leading #
	1123	( row \| col \| cell ) \s* =
	1124	( $qc # for row and col
	1125	\| $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
	1126	(?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
	1127	) \s* $}xi or croak ($self->SetDiag (2013));
	1128	my ($type, $range) = (lc $1, $2);
	1129
	1130	my @h = $self->column_names ();
	1131
	1132	my @c;
	1133	if ($type eq "cell") {
	1134	my @spec;
	1135	my $min_row;
	1136	my $max_row = 0;
	1137	for (split m/\s;\s/ => $range) {
	1138	my ($tlr, $tlc, $brr, $brc) = (m{
	1139	^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
	1140	(?: - \s* ([0-9]+ \| \) \s , \s* ([0-9]+ \| \) \s )?
	1141	$}x) or croak ($self->SetDiag (2013));
	1142	defined $brr or ($brr, $brc) = ($tlr, $tlc);
	1143	$tlr == 0 \|\| $tlc == 0 \|\|
	1144	($brr ne "*" && ($brr == 0 \|\| $brr < $tlr)) \|\|
	1145	($brc ne "*" && ($brc == 0 \|\| $brc < $tlc))
	1146	and croak ($self->SetDiag (2013));
	1147	$tlc--;
	1148	$brc-- unless $brc eq "*";
	1149	defined $min_row or $min_row = $tlr;
	1150	$tlr < $min_row and $min_row = $tlr;
	1151	$brr eq "*" \|\| $brr > $max_row and
	1152	$max_row = $brr;
	1153	push @spec, [ $tlr, $tlc, $brr, $brc ];
	1154	}
	1155	my $r = 0;
	1156	while (my $row = $self->getline ($io)) {
	1157	++$r < $min_row and next;
	1158	my %row;
	1159	my $lc;
	1160	foreach my $s (@spec) {
	1161	my ($tlr, $tlc, $brr, $brc) = @$s;
	1162	$r < $tlr \|\| ($brr ne "*" && $r > $brr) and next;
	1163	!defined $lc \|\| $tlc < $lc and $lc = $tlc;
	1164	my $rr = $brc eq "*" ? $#$row : $brc;
	1165	$row{$_} = $row->[$_] for $tlc .. $rr;
	1166	}
	1167	push @c, [ @row{sort { $a <=> $b } keys %row } ];
	1168	if (@h) {
	1169	my %h; @h{@h} = @{$c[-1]};
	1170	$c[-1] = \%h;
	1171	}
	1172	$max_row ne "*" && $r == $max_row and last;
	1173	}
	1174	return \@c;
	1175	}
	1176
	1177	# row or col
	1178	my @r;
	1179	my $eod = 0;
	1180	for (split m/\s;\s/ => $range) {
	1181	my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ \| \* ))? \s* $/x
	1182	or croak ($self->SetDiag (2013));
	1183	$to \|\|= $from;
	1184	$to eq "*" and ($to, $eod) = ($from, 1);
	1185	# $to cannot be <= 0 due to regex and \|\|=
	1186	$from <= 0 \|\| $to < $from and croak ($self->SetDiag (2013));
	1187	$r[$_] = 1 for $from .. $to;
	1188	}
	1189
	1190	my $r = 0;
	1191	$type eq "col" and shift @r;
	1192	$_ \|\|= 0 for @r;
	1193	while (my $row = $self->getline ($io)) {
	1194	$r++;
	1195	if ($type eq "row") {
	1196	if (($r > $#r && $eod) \|\| $r[$r]) {
	1197	push @c, $row;
	1198	if (@h) {
	1199	my %h; @h{@h} = @{$c[-1]};
	1200	$c[-1] = \%h;
	1201	}
	1202	}
	1203	next;
	1204	}
	1205	push @c, [ map { ($_ > $#r && $eod) \|\| $r[$_] ? $row->[$_] : () } 0..$#$row ];
	1206	if (@h) {
	1207	my %h; @h{@h} = @{$c[-1]};
	1208	$c[-1] = \%h;
	1209	}
	1210	}
	1211
	1212	return \@c;
	1213	}
	1214
	1215	my $csv_usage = q{usage: my $aoa = csv (in => $file);};
	1216
	1217	sub _csv_attr {
	1218	my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
	1219
	1220	$attr{binary} = 1;
	1221
	1222	my $enc = delete $attr{enc} \|\| delete $attr{encoding} \|\| "";
	1223	$enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
	1224	$enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
	1225
	1226	my $fh;
	1227	my $sink = 0;
	1228	my $cls = 0; # If I open a file, I have to close it
	1229	my $in = delete $attr{in} \|\| delete $attr{file} or croak $csv_usage;
	1230	my $out = exists $attr{out} && !$attr{out} ? \"skip"
	1231	: delete $attr{out} \|\| delete $attr{file};
	1232
	1233	ref $in eq "CODE" \|\| ref $in eq "ARRAY" and $out \|\|= \*STDOUT;
	1234
	1235	$in && $out && !ref $in && !ref $out and croak join "\n" =>
	1236	qq{Cannot use a string for both in and out. Instead use:},
	1237	qq{ csv (in => csv (in => "$in"), out => "$out");\n};
	1238
	1239	if ($out) {
	1240	if ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
	1241	$fh = $out;
	1242	}
	1243	elsif (ref $out and "SCALAR" eq ref $out and defined $$out and $$out eq "skip") {
	1244	delete $attr{out};
	1245	$sink = 1;
	1246	}
	1247	else {
	1248	open $fh, ">", $out or croak "$out: $!";
	1249	$cls = 1;
	1250	}
	1251	if ($fh) {
	1252	$enc and binmode $fh, $enc;
	1253	unless (defined $attr{eol}) {
	1254	my @layers = eval { PerlIO::get_layers ($fh) };
	1255	$attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
	1256	}
	1257	}
	1258	}
	1259
	1260	if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
	1261	# All done
	1262	}
	1263	elsif (ref $in eq "SCALAR") {
	1264	# Strings with code points over 0xFF may not be mapped into in-memory file handles
	1265	# "<$enc" does not change that :(
	1266	open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
	1267	$cls = 1;
	1268	}
	1269	elsif (ref $in or "GLOB" eq ref \$in) {
	1270	if (!ref $in && $] < 5.008005) {
	1271	$fh = \*$in; # uncoverable statement ancient perl version required
	1272	}
	1273	else {
	1274	$fh = $in;
	1275	}
	1276	}
	1277	else {
	1278	open $fh, "<$enc", $in or croak "$in: $!";
	1279	$cls = 1;
	1280	}
	1281	$fh \|\| $sink or croak qq{No valid source passed. "in" is required};
	1282
	1283	my $hdrs = delete $attr{headers};
	1284	my $frag = delete $attr{fragment};
	1285	my $key = delete $attr{key};
	1286	my $val = delete $attr{value};
	1287	my $kh = delete $attr{keep_headers} \|\|
	1288	delete $attr{keep_column_names} \|\|
	1289	delete $attr{kh};
	1290
	1291	my $cbai = delete $attr{callbacks}{after_in} \|\|
	1292	delete $attr{after_in} \|\|
	1293	delete $attr{callbacks}{after_parse} \|\|
	1294	delete $attr{after_parse};
	1295	my $cbbo = delete $attr{callbacks}{before_out} \|\|
	1296	delete $attr{before_out};
	1297	my $cboi = delete $attr{callbacks}{on_in} \|\|
	1298	delete $attr{on_in};
	1299
	1300	my $hd_s = delete $attr{sep_set} \|\|
	1301	delete $attr{seps};
	1302	my $hd_b = delete $attr{detect_bom} \|\|
	1303	delete $attr{bom};
	1304	my $hd_m = delete $attr{munge} \|\|
	1305	delete $attr{munge_column_names};
	1306	my $hd_c = delete $attr{set_column_names};
	1307
	1308	for ([ quo => "quote" ],
	1309	[ esc => "escape" ],
	1310	[ escape => "escape_char" ],
	1311	) {
	1312	my ($f, $t) = @$_;
	1313	exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
	1314	}
	1315
	1316	my $fltr = delete $attr{filter};
	1317	my %fltr = (
	1318	not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
	1319	not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
	1320	filled => sub { grep { defined && m/\S/ } @{$_[1]} },
	1321	);
	1322	defined $fltr && !ref $fltr && exists $fltr{$fltr} and
	1323	$fltr = { 0 => $fltr{$fltr} };
	1324	ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
	1325	ref $fltr eq "HASH" or $fltr = undef;
	1326
	1327	exists $attr{formula} and
	1328	$attr{formula} = _supported_formula (undef, $attr{formula});
	1329
	1330	defined $attr{auto_diag} or $attr{auto_diag} = 1;
	1331	defined $attr{escape_null} or $attr{escape_null} = 0;
	1332	my $csv = delete $attr{csv} \|\| Text::CSV_PP->new (\%attr)
	1333	or croak $last_new_error;
	1334
	1335	return {
	1336	csv => $csv,
	1337	attr => { %attr },
	1338	fh => $fh,
	1339	cls => $cls,
	1340	in => $in,
	1341	sink => $sink,
	1342	out => $out,
	1343	enc => $enc,
	1344	hdrs => $hdrs,
	1345	key => $key,
	1346	val => $val,
	1347	kh => $kh,
	1348	frag => $frag,
	1349	fltr => $fltr,
	1350	cbai => $cbai,
	1351	cbbo => $cbbo,
	1352	cboi => $cboi,
	1353	hd_s => $hd_s,
	1354	hd_b => $hd_b,
	1355	hd_m => $hd_m,
	1356	hd_c => $hd_c,
	1357	};
	1358	}
	1359
	1360	sub csv {
	1361	@_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
	1362	@_ or croak $csv_usage;
	1363
	1364	my $c = _csv_attr (@_);
	1365
	1366	my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
	1367	my %hdr;
	1368	if (ref $hdrs eq "HASH") {
	1369	%hdr = %$hdrs;
	1370	$hdrs = "auto";
	1371	}
	1372
	1373	if ($c->{out} && !$c->{sink}) {
	1374	if (ref $in eq "CODE") {
	1375	my $hdr = 1;
	1376	while (my $row = $in->($csv)) {
	1377	if (ref $row eq "ARRAY") {
	1378	$csv->print ($fh, $row);
	1379	next;
	1380	}
	1381	if (ref $row eq "HASH") {
	1382	if ($hdr) {
	1383	$hdrs \|\|= [ map { $hdr{$_} \|\| $_ } keys %$row ];
	1384	$csv->print ($fh, $hdrs);
	1385	$hdr = 0;
	1386	}
	1387	$csv->print ($fh, [ @{$row}{@$hdrs} ]);
	1388	}
	1389	}
	1390	}
	1391	elsif (ref $in->[0] eq "ARRAY") { # aoa
	1392	ref $hdrs and $csv->print ($fh, $hdrs);
	1393	for (@{$in}) {
	1394	$c->{cboi} and $c->{cboi}->($csv, $_);
	1395	$c->{cbbo} and $c->{cbbo}->($csv, $_);
	1396	$csv->print ($fh, $_);
	1397	}
	1398	}
	1399	else { # aoh
	1400	my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
	1401	defined $hdrs or $hdrs = "auto";
	1402	ref $hdrs \|\| $hdrs eq "auto" and
	1403	$csv->print ($fh, [ map { $hdr{$_} \|\| $_ } @hdrs ]);
	1404	for (@{$in}) {
	1405	local %_;
	1406	*_ = $_;
	1407	$c->{cboi} and $c->{cboi}->($csv, $_);
	1408	$c->{cbbo} and $c->{cbbo}->($csv, $_);
	1409	$csv->print ($fh, [ @{$_}{@hdrs} ]);
	1410	}
	1411	}
	1412
	1413	$c->{cls} and close $fh;
	1414	return 1;
	1415	}
	1416
	1417	my @row1;
	1418	if (defined $c->{hd_s} \|\| defined $c->{hd_b} \|\| defined $c->{hd_m} \|\| defined $c->{hd_c}) {
	1419	my %harg;
	1420	defined $c->{hd_s} and $harg{set_set} = $c->{hd_s};
	1421	defined $c->{hd_d} and $harg{detect_bom} = $c->{hd_b};
	1422	defined $c->{hd_m} and $harg{munge_column_names} = $hdrs ? "none" : $c->{hd_m};
	1423	defined $c->{hd_c} and $harg{set_column_names} = $hdrs ? 0 : $c->{hd_c};
	1424	@row1 = $csv->header ($fh, \%harg);
	1425	my @hdr = $csv->column_names;
	1426	@hdr and $hdrs \|\|= \@hdr;
	1427	}
	1428
	1429	if ($c->{kh}) {
	1430	ref $c->{kh} eq "ARRAY" or croak ($csv->SetDiag (1501));
	1431	$hdrs \|\|= "auto";
	1432	}
	1433
	1434	my $key = $c->{key};
	1435	if ($key) {
	1436	!ref $key or ref $key eq "ARRAY" && @$key > 1 or croak ($csv->SetDiag (1501));
	1437	$hdrs \|\|= "auto";
	1438	}
	1439	my $val = $c->{val};
	1440	if ($val) {
	1441	$key or croak ($csv->SetDiag (1502));
	1442	!ref $val or ref $val eq "ARRAY" && @$val > 0 or croak ($csv->SetDiag (1503));
	1443	}
	1444
	1445	$c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs \|\|= "auto";
	1446	if (defined $hdrs) {
	1447	if (!ref $hdrs) {
	1448	if ($hdrs eq "skip") {
	1449	$csv->getline ($fh); # discard;
	1450	}
	1451	elsif ($hdrs eq "auto") {
	1452	my $h = $csv->getline ($fh) or return;
	1453	$hdrs = [ map { $hdr{$_} \|\| $_ } @$h ];
	1454	}
	1455	elsif ($hdrs eq "lc") {
	1456	my $h = $csv->getline ($fh) or return;
	1457	$hdrs = [ map { lc ($hdr{$_} \|\| $_) } @$h ];
	1458	}
	1459	elsif ($hdrs eq "uc") {
	1460	my $h = $csv->getline ($fh) or return;
	1461	$hdrs = [ map { uc ($hdr{$_} \|\| $_) } @$h ];
	1462	}
	1463	}
	1464	elsif (ref $hdrs eq "CODE") {
	1465	my $h = $csv->getline ($fh) or return;
	1466	my $cr = $hdrs;
	1467	$hdrs = [ map { $cr->($hdr{$_} \|\| $_) } @$h ];
	1468	}
	1469	$c->{kh} and $hdrs and @{$c->{kh}} = @$hdrs;
	1470	}
	1471
	1472	if ($c->{fltr}) {
	1473	my %f = %{$c->{fltr}};
	1474	# convert headers to index
	1475	my @hdr;
	1476	if (ref $hdrs) {
	1477	@hdr = @{$hdrs};
	1478	for (0 .. $#hdr) {
	1479	exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
	1480	}
	1481	}
	1482	$csv->callbacks (after_parse => sub {
	1483	my ($CSV, $ROW) = @_; # lexical sub-variables in caps
	1484	foreach my $FLD (sort keys %f) {
	1485	local $_ = $ROW->[$FLD - 1];
	1486	local %_;
	1487	@hdr and @_{@hdr} = @$ROW;
	1488	$f{$FLD}->($CSV, $ROW) or return \"skip";
	1489	$ROW->[$FLD - 1] = $_;
	1490	}
	1491	});
	1492	}
	1493
	1494	my $frag = $c->{frag};
	1495	my $ref = ref $hdrs
	1496	? # aoh
	1497	do {
	1498	my @h = $csv->column_names ($hdrs);
	1499	my %h; $h{$_}++ for @h;
	1500	exists $h{""} and croak ($csv->SetDiag (1012));
	1501	unless (keys %h == @h) {
	1502	croak ($csv->_SetDiagInfo (1013, join ", " =>
	1503	map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
	1504	}
	1505	$frag ? $csv->fragment ($fh, $frag) :
	1506	$key ? do {
	1507	my ($k, $j, @f) = ref $key ? (undef, @$key) : ($key);
	1508	if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
	1509	croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
	1510	}
	1511	+{ map {
	1512	my $r = $_;
	1513	my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
	1514	( $K => (
	1515	$val
	1516	? ref $val
	1517	? { map { $_ => $r->{$_} } @$val }
	1518	: $r->{$val}
	1519	: $r ));
	1520	} @{$csv->getline_hr_all ($fh)} }
	1521	}
	1522	: $csv->getline_hr_all ($fh);
	1523	}
	1524	: # aoa
	1525	$frag ? $csv->fragment ($fh, $frag)
	1526	: $csv->getline_all ($fh);
	1527	if ($ref) {
	1528	@row1 && !$c->{hd_c} && !ref $hdrs and unshift @$ref, \@row1;
	1529	}
	1530	else {
	1531	Text::CSV_PP->auto_diag;
	1532	}
	1533	$c->{cls} and close $fh;
	1534	if ($ref and $c->{cbai} \|\| $c->{cboi}) {
	1535	# Default is ARRAYref, but with key =>, you'll get a hashref
	1536	foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
	1537	local %_;
	1538	ref $r eq "HASH" and *_ = $r;
	1539	$c->{cbai} and $c->{cbai}->($csv, $r);
	1540	$c->{cboi} and $c->{cboi}->($csv, $r);
	1541	}
	1542	}
	1543
	1544	$c->{sink} and return;
	1545
	1546	defined wantarray or
	1547	return csv (%{$c->{attr}}, in => $ref, headers => $hdrs, %{$c->{attr}});
	1548
	1549	return $ref;
	1550	}
	1551
	1552	# The end of the common pure perl part.
	1553
	1554	################################################################################
	1555	#
	1556	# The following are methods implemented in XS in Text::CSV_XS or
	1557	# helper methods for Text::CSV_PP only
	1558	#
	1559	################################################################################
	1560
	1561	sub _setup_ctx {
	1562	my $self = shift;
	1563
	1564	$last_error = undef;
	1565
	1566	my $ctx;
	1567	if ($self->{_CACHE}) {
	1568	%$ctx = %{$self->{_CACHE}};
	1569	} else {
	1570	$ctx->{sep} = ',';
	1571	if (defined $self->{sep_char}) {
	1572	$ctx->{sep} = $self->{sep_char};
	1573	}
	1574	if (defined $self->{sep} and $self->{sep} ne '') {
	1575	use bytes;
	1576	$ctx->{sep} = $self->{sep};
	1577	my $sep_len = length($ctx->{sep});
	1578	$ctx->{sep_len} = $sep_len if $sep_len > 1;
	1579	}
	1580
	1581	$ctx->{quo} = '"';
	1582	if (exists $self->{quote_char}) {
	1583	my $quote_char = $self->{quote_char};
	1584	if (defined $quote_char and length $quote_char) {
	1585	$ctx->{quo} = $quote_char;
	1586	} else {
	1587	$ctx->{quo} = "\0";
	1588	}
	1589	}
	1590	if (defined $self->{quote} and $self->{quote} ne '') {
	1591	use bytes;
	1592	$ctx->{quo} = $self->{quote};
	1593	my $quote_len = length($ctx->{quo});
	1594	$ctx->{quo_len} = $quote_len if $quote_len > 1;
	1595	}
	1596
	1597	$ctx->{escape_char} = '"';
	1598	if (exists $self->{escape_char}) {
	1599	my $escape_char = $self->{escape_char};
	1600	if (defined $escape_char and length $escape_char) {
	1601	$ctx->{escape_char} = $escape_char;
	1602	} else {
	1603	$ctx->{escape_char} = "\0";
	1604	}
	1605	}
	1606
	1607	if (defined $self->{eol}) {
	1608	my $eol = $self->{eol};
	1609	my $eol_len = length($eol);
	1610	$ctx->{eol} = $eol;
	1611	$ctx->{eol_len} = $eol_len;
	1612	if ($eol_len == 1 and $eol eq "\015") {
	1613	$ctx->{eol_is_cr} = 1;
	1614	}
	1615	}
	1616
	1617	$ctx->{undef_flg} = 0;
	1618	if (defined $self->{undef_str}) {
	1619	$ctx->{undef_str} = $self->{undef_str};
	1620	$ctx->{undef_flg} = 3 if utf8::is_utf8($self->{undef_str});
	1621	} else {
	1622	$ctx->{undef_str} = undef;
	1623	}
	1624
	1625	if (defined $self->{_types}) {
	1626	$ctx->{types} = $self->{_types};
	1627	$ctx->{types_len} = length($ctx->{types});
	1628	}
	1629
	1630	if (defined $self->{_is_bound}) {
	1631	$ctx->{is_bound} = $self->{_is_bound};
	1632	}
	1633
	1634	if (defined $self->{callbacks}) {
	1635	my $cb = $self->{callbacks};
	1636	$ctx->{has_hooks} = 0;
	1637	if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
	1638	$ctx->{has_hooks} \|= HOOK_AFTER_PARSE;
	1639	}
	1640	if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
	1641	$ctx->{has_hooks} \|= HOOK_BEFORE_PRINT;
	1642	}
	1643	}
	1644
	1645	for (qw/
	1646	binary decode_utf8 always_quote strict quote_empty
	1647	allow_loose_quotes allow_loose_escapes
	1648	allow_unquoted_escape allow_whitespace blank_is_undef
	1649	empty_is_undef verbatim auto_diag diag_verbose
	1650	keep_meta_info formula
	1651	/) {
	1652	$ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
	1653	}
	1654	for (qw/quote_space escape_null quote_binary/) {
	1655	$ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
	1656	}
	1657	if ($ctx->{escape_char} eq "\0") {
	1658	$ctx->{escape_null} = 0;
	1659	}
	1660
	1661	# FIXME: readonly
	1662	%{$self->{_CACHE}} = %$ctx;
	1663	}
	1664
	1665	$ctx->{utf8} = 0;
	1666	$ctx->{size} = 0;
	1667	$ctx->{used} = 0;
	1668
	1669	if ($ctx->{is_bound}) {
	1670	my $bound = $self->{_BOUND_COLUMNS};
	1671	if ($bound and ref $bound eq 'ARRAY') {
	1672	$ctx->{bound} = $bound;
	1673	} else {
	1674	$ctx->{is_bound} = 0;
	1675	}
	1676	}
	1677
	1678	$ctx->{eol_pos} = -1;
	1679	$ctx->{eolx} = $ctx->{eol_len}
	1680	? $ctx->{verbatim} \|\| $ctx->{eol_len} >= 2
	1681	? 1
	1682	: $ctx->{eol} =~ /\A[\015\|\012]/ ? 0 : 1
	1683	: 0;
	1684
	1685	if ($ctx->{sep_len} and $ctx->{sep_len} > 1 and _is_valid_utf8($ctx->{sep})) {
	1686	$ctx->{utf8} = 1;
	1687	}
	1688	if ($ctx->{quo_len} and $ctx->{quo_len} > 1 and _is_valid_utf8($ctx->{quo})) {
	1689	$ctx->{utf8} = 1;
	1690	}
	1691
	1692	$ctx;
	1693	}
	1694
	1695	sub _cache_set {
	1696	my ($self, $idx, $value) = @_;
	1697	return unless exists $self->{_CACHE};
	1698	my $cache = $self->{_CACHE};
	1699
	1700	my $key = $_reverse_cache_id{$idx};
	1701	if (!defined $key) {
	1702	warn (sprintf "Unknown cache index %d ignored\n", $idx);
	1703	} elsif ($key eq 'sep_char') {
	1704	$cache->{sep} = $value;
	1705	$cache->{sep_len} = 0;
	1706	}
	1707	elsif ($key eq 'quote_char') {
	1708	$cache->{quo} = $value;
	1709	$cache->{quo_len} = 0;
	1710	}
	1711	elsif ($key eq '_has_ahead') {
	1712	$cache->{has_ahead} = $value;
	1713	}
	1714	elsif ($key eq '_has_hooks') {
	1715	$cache->{has_hooks} = $value;
	1716	}
	1717	elsif ($key eq '_is_bound') {
	1718	$cache->{is_bound} = $value;
	1719	}
	1720	elsif ($key eq 'sep') {
	1721	use bytes;
	1722	my $len = bytes::length($value);
	1723	$cache->{sep} = $value if $len;
	1724	$cache->{sep_len} = $len == 1 ? 0 : $len;
	1725	}
	1726	elsif ($key eq 'quote') {
	1727	use bytes;
	1728	my $len = bytes::length($value);
	1729	$cache->{quo} = $value if $len;
	1730	$cache->{quo_len} = $len == 1 ? 0 : $len;
	1731	}
	1732	elsif ($key eq 'eol') {
	1733	$cache->{eol} = $value if defined($value);
	1734	$cache->{eol_is_cr} = $value eq "\015" ? 1 : 0;
	1735	}
	1736	elsif ($key eq 'undef_str') {
	1737	if (defined $value) {
	1738	$cache->{undef_str} = $value;
	1739	$cache->{undef_flg} = 3 if utf8::is_utf8($value);
	1740	} else {
	1741	$cache->{undef_str} = undef;
	1742	$cache->{undef_flg} = 0;
	1743	}
	1744	}
	1745	else {
	1746	$cache->{$key} = $value;
	1747	}
	1748	return 1;
	1749	}
	1750
	1751	sub _cache_diag {
	1752	my $self = shift;
	1753	unless (exists $self->{_CACHE}) {
	1754	warn ("CACHE: invalid\n");
	1755	return;
	1756	}
	1757
	1758	my $cache = $self->{_CACHE};
	1759	warn ("CACHE:\n");
	1760	$self->__cache_show_char(quote_char => $cache->{quo});
	1761	$self->__cache_show_char(escape_char => $cache->{escape_char});
	1762	$self->__cache_show_char(sep_char => $cache->{sep});
	1763	for (qw/
	1764	binary decode_utf8 allow_loose_escapes allow_loose_quotes allow_unquoted_escape
	1765	allow_whitespace always_quote quote_empty quote_space
	1766	escape_null quote_binary auto_diag diag_verbose formula strict
	1767	has_error_input blank_is_undef empty_is_undef has_ahead
	1768	keep_meta_info verbatim has_hooks eol_is_cr eol_len
	1769	/) {
	1770	$self->__cache_show_byte($_ => $cache->{$_});
	1771	}
	1772	$self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
	1773	$self->__cache_show_byte(sep_len => $cache->{sep_len});
	1774	if ($cache->{sep_len} and $cache->{sep_len} > 1) {
	1775	$self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
	1776	}
	1777	$self->__cache_show_byte(quo_len => $cache->{quo_len});
	1778	if ($cache->{quo_len} and $cache->{quo_len} > 1) {
	1779	$self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
	1780	}
	1781	}
	1782
	1783	sub __cache_show_byte {
	1784	my ($self, $key, $value) = @_;
	1785	warn (sprintf " %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
	1786	}
	1787
	1788	sub __cache_show_char {
	1789	my ($self, $key, $value) = @_;
	1790	my $v = $value;
	1791	if (defined $value) {
	1792	my @b = unpack "U0C*", $value;
	1793	$v = pack "U*", $b[0];
	1794	}
	1795	warn (sprintf " %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
	1796	}
	1797
	1798	sub __cache_show_str {
	1799	my ($self, $key, $len, $value) = @_;
	1800	warn (sprintf " %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
	1801	}
	1802
	1803	sub __pretty_str { # FIXME
	1804	my ($self, $str, $len) = @_;
	1805	return '' unless defined $str;
	1806	$str = substr($str, 0, $len);
	1807	$str =~ s/"/\\"/g;
	1808	$str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
	1809	qq{"$str"};
	1810	}
	1811
	1812	sub _hook {
	1813	my ($self, $name, $fields) = @_;
	1814	return 0 unless $self->{callbacks};
	1815
	1816	my $cb = $self->{callbacks}{$name};
	1817	return 0 unless $cb && ref $cb eq 'CODE';
	1818
	1819	my (@res) = $cb->($self, $fields);
	1820	if (@res) {
	1821	return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
	1822	}
	1823	scalar @res;
	1824	}
	1825
	1826	################################################################################
	1827	# methods for combine
	1828	################################################################################
	1829
	1830	sub __combine {
	1831	my ($self, $dst, $fields, $useIO) = @_;
	1832
	1833	my $ctx = $self->_setup_ctx;
	1834
	1835	my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
	1836
	1837	if(!defined $quot or $quot eq "\0"){ $quot = ''; }
	1838
	1839	my $re_esc;
	1840	if ($esc ne '' and $esc ne "\0") {
	1841	if ($quot ne '') {
	1842	$re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} \|\|= qr/(\Q$quot\E\|\Q$esc\E)/;
	1843	} else {
	1844	$re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} \|\|= qr/(\Q$esc\E)/;
	1845	}
	1846	}
	1847
	1848	my $bound = 0;
	1849	my $n = @$fields - 1;
	1850	if ($n < 0 and $ctx->{is_bound}) {
	1851	$n = $ctx->{is_bound} - 1;
	1852	$bound = 1;
	1853	}
	1854
	1855	my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} \|\| []} >= $n) ? 1 : 0;
	1856
	1857	my $must_be_quoted;
	1858	my @results;
	1859	for(my $i = 0; $i <= $n; $i++) {
	1860	my $v_ref;
	1861	if ($bound) {
	1862	$v_ref = $self->__bound_field($ctx, $i, 1);
	1863	} else {
	1864	if (@$fields > $i) {
	1865	$v_ref = \($fields->[$i]);
	1866	}
	1867	}
	1868	next unless $v_ref;
	1869
	1870	my $value = $$v_ref;
	1871
	1872	if (!defined $value) {
	1873	if ($ctx->{undef_str}) {
	1874	if ($ctx->{undef_flg}) {
	1875	$ctx->{utf8} = 1;
	1876	$ctx->{binary} = 1;
	1877	}
	1878	push @results, $ctx->{undef_str};
	1879	} else {
	1880	push @results, '';
	1881	}
	1882	next;
	1883	}
	1884
	1885	if ( substr($value, 0, 1) eq '=' && $ctx->{formula} ) {
	1886	$value = $self->_formula($ctx, $value, $i);
	1887	if (!defined $value) {
	1888	push @results, '';
	1889	next;
	1890	}
	1891	}
	1892
	1893	$must_be_quoted = $ctx->{always_quote} ? 1 : 0;
	1894	if ($value eq '') {
	1895	$must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
	1896	}
	1897	else {
	1898
	1899	if (utf8::is_utf8 $value) {
	1900	$ctx->{utf8} = 1;
	1901	$ctx->{binary} = 1;
	1902	}
	1903
	1904	$must_be_quoted++ if $check_meta && $self->is_quoted($i);
	1905
	1906	if (!$must_be_quoted and $quot ne '') {
	1907	use bytes;
	1908	$must_be_quoted++ if
	1909	($value =~ /\Q$quot\E/) \|\|
	1910	($sep ne '' and $sep ne "\0" and $value =~ /\Q$sep\E/) \|\|
	1911	($esc ne '' and $esc ne "\0" and $value =~ /\Q$esc\E/) \|\|
	1912	($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) \|\|
	1913	($ctx->{quote_space} && $value =~ /[\x09\x20]/);
	1914	}
	1915
	1916	if (!$ctx->{binary} and $value =~ /[^\x09\x20-\x7E]/) {
	1917	# an argument contained an invalid character...
	1918	$self->{_ERROR_INPUT} = $value;
	1919	$self->SetDiag(2110);
	1920	return 0;
	1921	}
	1922
	1923	if ($re_esc) {
	1924	$value =~ s/($re_esc)/$esc$1/g;
	1925	}
	1926	if ($ctx->{escape_null}) {
	1927	$value =~ s/\0/${esc}0/g;
	1928	}
	1929	}
	1930
	1931	if ($must_be_quoted) {
	1932	$value = $quot . $value . $quot;
	1933	}
	1934	push @results, $value;
	1935	}
	1936
	1937	$$dst = join($sep, @results) . ( defined $ctx->{eol} ? $ctx->{eol} : '' );
	1938
	1939	return 1;
	1940	}
	1941
	1942	sub _formula {
	1943	my ($self, $ctx, $value, $i) = @_;
	1944
	1945	my $fa = $ctx->{formula} or return;
	1946	if ($fa == 1) { die "Formulas are forbidden\n" }
	1947	if ($fa == 2) { die "Formulas are forbidden\n" } # XS croak behaves like PP's "die"
	1948
	1949	if ($fa == 3) {
	1950	my $rec = '';
	1951	if ($ctx->{recno}) {
	1952	$rec = sprintf " in record %lu", $ctx->{recno} + 1;
	1953	}
	1954	my $field = '';
	1955	my $column_names = $self->{_COLUMN_NAMES};
	1956	if (ref $column_names eq 'ARRAY' and @$column_names >= $i - 1) {
	1957	my $column_name = $column_names->[$i - 1];
	1958	$field = sprintf " (column: '%.100s')", $column_name if defined $column_name;
	1959	}
	1960	warn sprintf("Field %d%s%s contains formula '%s'\n", $i, $field, $rec, $value);
	1961	return $value;
	1962	}
	1963
	1964	if ($fa == 4) {
	1965	return '';
	1966	}
	1967	if ($fa == 5) {
	1968	return undef;
	1969	}
	1970	return;
	1971	}
	1972
	1973	sub print {
	1974	my ($self, $io, $fields) = @_;
	1975
	1976	require IO::Handle;
	1977
	1978	if (!defined $fields) {
	1979	$fields = [];
	1980	} elsif(ref($fields) ne 'ARRAY'){
	1981	Carp::croak("Expected fields to be an array ref");
	1982	}
	1983
	1984	$self->_hook(before_print => $fields);
	1985
	1986	my $str = "";
	1987	$self->__combine(\$str, $fields, 1) or return '';
	1988
	1989	local $\ = '';
	1990
	1991	$io->print( $str ) or $self->_set_error_diag(2200);
	1992	}
	1993
	1994	################################################################################
	1995	# methods for parse
	1996	################################################################################
	1997
	1998
	1999	sub __parse { # cx_xsParse
	2000	my ($self, $fields, $fflags, $src, $useIO) = @_;
	2001
	2002	my $ctx = $self->_setup_ctx;
	2003
	2004	my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
	2005	if ($state and ($ctx->{has_hooks} \|\| 0) & HOOK_AFTER_PARSE) {
	2006	$self->_hook(after_parse => $fields);
	2007	}
	2008	return $state \|\| !$last_error;
	2009	}
	2010
	2011	sub ___parse { # cx_c_xsParse
	2012	my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
	2013
	2014	local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
	2015
	2016	if ($ctx->{useIO} = $useIO) {
	2017	require IO::Handle;
	2018
	2019	$ctx->{tmp} = undef;
	2020	if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
	2021	$ctx->{tmp} = $self->{_AHEAD};
	2022	$ctx->{size} = length $ctx->{tmp};
	2023	$ctx->{used} = 0;
	2024	}
	2025	} else {
	2026	$ctx->{tmp} = $src;
	2027	$ctx->{size} = length $src;
	2028	$ctx->{used} = 0;
	2029	$ctx->{utf8} = utf8::is_utf8($src);
	2030	}
	2031	if ($ctx->{has_error_input}) {
	2032	$self->{_ERROR_INPUT} = undef;
	2033	$ctx->{has_error_input} = 0;
	2034	}
	2035
	2036	my $result = $self->____parse($ctx, $src, $fields, $fflags);
	2037	$self->{_RECNO} = ++($ctx->{recno});
	2038	$self->{_EOF} = '';
	2039
	2040	if ($ctx->{strict}) {
	2041	$ctx->{strict_n} \|\|= $ctx->{fld_idx};
	2042	if ($ctx->{strict_n} != $ctx->{fld_idx}) {
	2043	$self->__parse_error($ctx, 2014, $ctx->{used});
	2044	return;
	2045	}
	2046	}
	2047
	2048	if ($ctx->{useIO}) {
	2049	if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
	2050	$self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
	2051	} else {
	2052	$ctx->{has_ahead} = 0;
	2053	if ($ctx->{useIO} & useIO_EOF) {
	2054	$self->{_EOF} = 1;
	2055	}
	2056	}
	2057	%{$self->{_CACHE}} = %$ctx;
	2058
	2059	if ($fflags) {
	2060	if ($ctx->{keep_meta_info}) {
	2061	$self->{_FFLAGS} = $fflags;
	2062	} else {
	2063	undef $fflags;
	2064	}
	2065	}
	2066	} else {
	2067	%{$self->{_CACHE}} = %$ctx;
	2068	}
	2069
	2070	if ($result and $ctx->{types}) {
	2071	my $len = @$fields;
	2072	for(my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
	2073	my $value = $fields->[$i];
	2074	next unless defined $value;
	2075	my $type = ord(substr($ctx->{types}, $i, 1));
	2076	if ($type == IV) {
	2077	$fields->[$i] = int($value);
	2078	} elsif ($type == NV) {
	2079	$fields->[$i] = $value + 0.0;
	2080	}
	2081	}
	2082	}
	2083
	2084	$result;
	2085	}
	2086
	2087	sub ____parse { # cx_Parse
	2088	my ($self, $ctx, $src, $fields, $fflags) = @_;
	2089
	2090	my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
	2091
	2092	utf8::encode($sep) if !$ctx->{utf8} and $ctx->{sep_len};
	2093	utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
	2094	utf8::encode($eol) if !$ctx->{utf8} and $ctx->{eol_len};
	2095
	2096	my $seenSomething = 0;
	2097	my $waitingForField = 1;
	2098	my ($value, $v_ref);
	2099	$ctx->{fld_idx} = my $fnum = 0;
	2100	$ctx->{flag} = 0;
	2101
	2102	my $re_str = join '\|', map({$_ eq "\0" ? '[\\0]' : quotemeta($_)} sort {length $b <=> length $a} grep {defined $_ and $_ ne ''} $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
	2103	$ctx->{_re} = qr/$re_str/;
	2104	my $re = qr/$re_str\|[^\x09\x20-\x7E]\|$/;
	2105
	2106	LOOP:
	2107	while($self->__get_from_src($ctx, $src)) {
	2108	while($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
	2109	my ($hit, $c) = ($1, $2);
	2110	$ctx->{used} = pos($ctx->{tmp});
	2111	if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
	2112	$self->{_AHEAD} = $hit;
	2113	$ctx->{has_ahead} = 1;
	2114	$ctx->{has_leftover} = 1;
	2115	last;
	2116	}
	2117	last if $seenSomething and $hit eq '' and $c eq ''; # EOF
	2118
	2119	# new field
	2120	if (!$v_ref) {
	2121	if ($ctx->{is_bound}) {
	2122	$v_ref = $self->__bound_field($ctx, $fnum, 0);
	2123	} else {
	2124	$value = '';
	2125	$v_ref = \$value;
	2126	}
	2127	$fnum++;
	2128	return unless $v_ref;
	2129	$ctx->{flag} = 0;
	2130	$ctx->{fld_idx}++;
	2131	}
	2132
	2133	$seenSomething = 1;
	2134
	2135	if (defined $hit and $hit ne '') {
	2136	if ($waitingForField) {
	2137	$waitingForField = 0;
	2138	}
	2139	if ($hit =~ /[^\x09\x20-\x7E]/) {
	2140	$ctx->{flag} \|= IS_BINARY;
	2141	}
	2142	$$v_ref .= $hit;
	2143	}
	2144
	2145	RESTART:
	2146	if (defined $c and defined $sep and $c eq $sep) {
	2147	if ($waitingForField) {
	2148	# ,1,"foo, 3",,bar,
	2149	# ^ ^
	2150	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
	2151	$$v_ref = undef;
	2152	} else {
	2153	$$v_ref = "";
	2154	}
	2155	unless ($ctx->{is_bound}) {
	2156	push @$fields, $$v_ref;
	2157	}
	2158	$v_ref = undef;
	2159	if ($ctx->{keep_meta_info} and $fflags) {
	2160	push @$fflags, $ctx->{flag};
	2161	}
	2162	} elsif ($ctx->{flag} & IS_QUOTED) {
	2163	# ,1,"foo, 3",,bar,
	2164	# ^
	2165	$$v_ref .= $c;
	2166	} else {
	2167	# ,1,"foo, 3",,bar,
	2168	# ^ ^ ^
	2169	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2170	$v_ref = undef;
	2171	$waitingForField = 1;
	2172	}
	2173	}
	2174	elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
	2175	if ($waitingForField) {
	2176	# ,1,"foo, 3",,bar,\r\n
	2177	# ^
	2178	$ctx->{flag} \|= IS_QUOTED;
	2179	$waitingForField = 0;
	2180	next;
	2181	}
	2182	if ($ctx->{flag} & IS_QUOTED) {
	2183	# ,1,"foo, 3",,bar,\r\n
	2184	# ^
	2185	my $quoesc = 0;
	2186	my $c2 = $self->__get($ctx);
	2187
	2188	if ($ctx->{allow_whitespace}) {
	2189	# , 1 , "foo, 3" , , bar , \r\n
	2190	# ^
	2191	while($self->__is_whitespace($ctx, $c2)) {
	2192	if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
	2193	$$v_ref .= $c;
	2194	$c = $c2;
	2195	}
	2196	$c2 = $self->__get($ctx);
	2197	}
	2198	}
	2199
	2200	if (!defined $c2) { # EOF
	2201	# ,1,"foo, 3"
	2202	# ^
	2203	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2204	return 1;
	2205	}
	2206
	2207	if (defined $c2 and defined $sep and $c2 eq $sep) {
	2208	# ,1,"foo, 3",,bar,\r\n
	2209	# ^
	2210	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2211	$v_ref = undef;
	2212	$waitingForField = 1;
	2213	next;
	2214	}
	2215	if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
	2216	# ,1,"foo, 3",,"bar"\n
	2217	# ^
	2218	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2219	return 1;
	2220	}
	2221
	2222	if (defined $esc and $c eq $esc) {
	2223	$quoesc = 1;
	2224	if (defined $c2 and $c2 eq '0') {
	2225	# ,1,"foo, 3"056",,bar,\r\n
	2226	# ^
	2227	$$v_ref .= "\0";
	2228	next;
	2229	}
	2230	if (defined $c2 and defined $quot and $c2 eq $quot) {
	2231	# ,1,"foo, 3""56",,bar,\r\n
	2232	# ^
	2233	if ($ctx->{utf8}) {
	2234	$ctx->{flag} \|= IS_BINARY;
	2235	}
	2236	$$v_ref .= $c2;
	2237	next;
	2238	}
	2239	if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
	2240	# ,1,"foo, 3"56",,bar,\r\n
	2241	# ^
	2242	$$v_ref .= $c;
	2243	$c = $c2;
	2244	goto RESTART;
	2245	}
	2246	}
	2247	if (defined $c2 and $c2 eq "\015") {
	2248	if ($ctx->{eol_is_cr}) {
	2249	# ,1,"foo, 3"\r
	2250	# ^
	2251	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2252	return 1;
	2253	}
	2254
	2255	my $c3 = $self->__get($ctx);
	2256	if (defined $c3 and $c3 eq "\012") {
	2257	# ,1,"foo, 3"\r\n
	2258	# ^
	2259	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2260	return 1;
	2261	}
	2262
	2263	if ($ctx->{useIO} and !$ctx->{eol_len} and $c3 !~ /[^\x09\x20-\x7E]/) {
	2264	# ,1,"foo\n 3",,"bar"\r
	2265	# baz,4
	2266	# ^
	2267	$self->__set_eol_is_cr($ctx);
	2268	$ctx->{used}--;
	2269	$ctx->{has_ahead} = 1;
	2270	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2271	return 1;
	2272	}
	2273
	2274	$self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
	2275	return;
	2276	}
	2277
	2278	if ($ctx->{allow_loose_quotes} and !$quoesc) {
	2279	# ,1,"foo, 3"456",,bar,\r\n
	2280	# ^
	2281	$$v_ref .= $c;
	2282	$c = $c2;
	2283	goto RESTART;
	2284	}
	2285	# 1,"foo" ",3
	2286	# ^
	2287	if ($quoesc) {
	2288	$ctx->{used}--;
	2289	$self->__error_inside_quotes($ctx, 2023);
	2290	return;
	2291	}
	2292	$self->__error_inside_quotes($ctx, 2011);
	2293	return;
	2294	}
	2295	# !waitingForField, !InsideQuotes
	2296	if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
	2297	$ctx->{flag} \|= IS_ERROR;
	2298	$$v_ref .= $c;
	2299	} else {
	2300	$self->__error_inside_field($ctx, 2034);
	2301	return;
	2302	}
	2303	}
	2304	elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
	2305	# This means quote_char != escape_char
	2306	if ($waitingForField) {
	2307	$waitingForField = 0;
	2308	if ($ctx->{allow_unquoted_escape}) {
	2309	# The escape character is the first character of an
	2310	# unquoted field
	2311	# ... get and store next character
	2312	my $c2 = $self->__get($ctx);
	2313	$$v_ref = "";
	2314
	2315	if (!defined $c2) { # EOF
	2316	$ctx->{used}--;
	2317	$self->__error_inside_field($ctx, 2035);
	2318	return;
	2319	}
	2320	if ($c2 eq '0') {
	2321	$$v_ref .= "\0";
	2322	}
	2323	elsif (
	2324	(defined $quot and $c2 eq $quot) or
	2325	(defined $sep and $c2 eq $sep) or
	2326	(defined $esc and $c2 eq $esc) or
	2327	$ctx->{allow_loose_escapes}
	2328	) {
	2329	if ($ctx->{utf8}) {
	2330	$ctx->{flag} \|= IS_BINARY;
	2331	}
	2332	$$v_ref .= $c2;
	2333	} else {
	2334	$self->__parse_inside_quotes($ctx, 2025);
	2335	return;
	2336	}
	2337	}
	2338	}
	2339	elsif ($ctx->{flag} & IS_QUOTED) {
	2340	my $c2 = $self->__get($ctx);
	2341	if (!defined $c2) { # EOF
	2342	$ctx->{used}--;
	2343	$self->__error_inside_quotes($ctx, 2024);
	2344	return;
	2345	}
	2346	if ($c2 eq '0') {
	2347	$$v_ref .= "\0";
	2348	}
	2349	elsif (
	2350	(defined $quot and $c2 eq $quot) or
	2351	(defined $sep and $c2 eq $sep) or
	2352	(defined $esc and $c2 eq $esc) or
	2353	$ctx->{allow_loose_escapes}
	2354	) {
	2355	if ($ctx->{utf8}) {
	2356	$ctx->{flag} \|= IS_BINARY;
	2357	}
	2358	$$v_ref .= $c2;
	2359	} else {
	2360	$ctx->{used}--;
	2361	$self->__error_inside_quotes($ctx, 2025);
	2362	return;
	2363	}
	2364	}
	2365	elsif ($v_ref) {
	2366	my $c2 = $self->__get($ctx);
	2367	if (!defined $c2) { # EOF
	2368	$ctx->{used}--;
	2369	$self->__error_inside_field($ctx, 2035);
	2370	return;
	2371	}
	2372	$$v_ref .= $c2;
	2373	}
	2374	else {
	2375	$self->__error_inside_field($ctx, 2036);
	2376	return;
	2377	}
	2378	}
	2379	elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
	2380	EOLX:
	2381	if ($waitingForField) {
	2382	# ,1,"foo, 3",,bar,
	2383	# ^
	2384	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
	2385	$$v_ref = undef;
	2386	} else {
	2387	$$v_ref = "";
	2388	}
	2389	unless ($ctx->{is_bound}) {
	2390	push @$fields, $$v_ref;
	2391	}
	2392	if ($ctx->{keep_meta_info} and $fflags) {
	2393	push @$fflags, $ctx->{flag};
	2394	}
	2395	return 1;
	2396	}
	2397	if ($ctx->{flag} & IS_QUOTED) {
	2398	# ,1,"foo\n 3",,bar,
	2399	# ^
	2400	$ctx->{flag} \|= IS_BINARY;
	2401	unless ($ctx->{binary}) {
	2402	$self->__error_inside_quotes($ctx, 2021);
	2403	return;
	2404	}
	2405	$$v_ref .= $c;
	2406	}
	2407	elsif ($ctx->{verbatim}) {
	2408	# ,1,foo\n 3,,bar,
	2409	# This feature should be deprecated
	2410	$ctx->{flag} \|= IS_BINARY;
	2411	unless ($ctx->{binary}) {
	2412	$self->__error_inside_field($ctx, 2030);
	2413	return;
	2414	}
	2415	$$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
	2416	}
	2417	else {
	2418	# sep=,
	2419	# ^
	2420	if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
	2421	$ctx->{sep} = $1;
	2422	use bytes;
	2423	my $len = length $ctx->{sep};
	2424	if ($len <= 16) {
	2425	$ctx->{sep_len} = $len == 1 ? 0 : $len;
	2426	return $self->____parse($ctx, $src, $fields, $fflags);
	2427	}
	2428	}
	2429
	2430	# ,1,"foo\n 3",,bar
	2431	# ^
	2432	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2433	return 1;
	2434	}
	2435	}
	2436	elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
	2437	if ($waitingForField) {
	2438	$waitingForField = 0;
	2439	if ($ctx->{eol_is_cr}) {
	2440	# ,1,"foo\n 3",,bar,\r
	2441	# ^
	2442	$c = "\012";
	2443	goto RESTART;
	2444	}
	2445
	2446	my $c2 = $self->__get($ctx);
	2447	if (!defined $c2) { # EOF
	2448	# ,1,"foo\n 3",,bar,\r
	2449	# ^
	2450	$c = undef;
	2451	goto RESTART;
	2452	}
	2453	if ($c2 eq "\012") { # \r is not optional before EOLX!
	2454	# ,1,"foo\n 3",,bar,\r\n
	2455	# ^
	2456	$c = $c2;
	2457	goto RESTART;
	2458	}
	2459
	2460	if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
	2461	# ,1,"foo\n 3",,bar,\r
	2462	# baz,4
	2463	# ^
	2464	$self->__set_eol_is_cr($ctx);
	2465	$ctx->{used}--;
	2466	$ctx->{has_ahead} = 1;
	2467	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2468	return 1;
	2469	}
	2470
	2471	# ,1,"foo\n 3",,bar,\r\t
	2472	# ^
	2473	$ctx->{used}--;
	2474	$self->__error_inside_field($ctx, 2031);
	2475	return;
	2476	}
	2477	if ($ctx->{flag} & IS_QUOTED) {
	2478	# ,1,"foo\r 3",,bar,\r\t
	2479	# ^
	2480	$ctx->{flag} \|= IS_BINARY;
	2481	unless ($ctx->{binary}) {
	2482	$self->__error_inside_quotes($ctx, 2022);
	2483	return;
	2484	}
	2485	$$v_ref .= $c;
	2486	}
	2487	else {
	2488	if ($ctx->{eol_is_cr}) {
	2489	# ,1,"foo\n 3",,bar\r
	2490	# ^
	2491	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2492	return 1;
	2493	}
	2494
	2495	my $c2 = $self->__get($ctx);
	2496	if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
	2497	# ,1,"foo\n 3",,bar\r\n
	2498	# ^
	2499	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2500	return 1;
	2501	}
	2502
	2503	if ($ctx->{useIO} and !$ctx->{eol_len} and $c2 !~ /[^\x09\x20-\x7E]/) {
	2504	# ,1,"foo\n 3",,bar\r
	2505	# baz,4
	2506	# ^
	2507	$self->__set_eol_is_cr($ctx);
	2508	$ctx->{used}--;
	2509	$ctx->{has_ahead} = 1;
	2510	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2511	return 1;
	2512	}
	2513
	2514	# ,1,"foo\n 3",,bar\r\t
	2515	# ^
	2516	$self->__error_inside_field($ctx, 2032);
	2517	return;
	2518	}
	2519	}
	2520	else {
	2521	if ($ctx->{eolx} and $c eq $eol) {
	2522	$c = '';
	2523	goto EOLX;
	2524	}
	2525
	2526	if ($waitingForField) {
	2527	if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
	2528	do {
	2529	$c = $self->__get($ctx);
	2530	last if !defined $c;
	2531	} while $self->__is_whitespace($ctx, $c);
	2532	goto RESTART;
	2533	}
	2534	$waitingForField = 0;
	2535	goto RESTART;
	2536	}
	2537	if ($ctx->{flag} & IS_QUOTED) {
	2538	if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
	2539	$ctx->{flag} \|= IS_BINARY;
	2540	unless ($ctx->{binary} or $ctx->{utf8}) {
	2541	$self->__error_inside_quotes($ctx, 2026);
	2542	return;
	2543	}
	2544	}
	2545	$$v_ref .= $c;
	2546	} else {
	2547	if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
	2548	$ctx->{flag} \|= IS_BINARY;
	2549	unless ($ctx->{binary} or $ctx->{utf8}) {
	2550	$self->__error_inside_field($ctx, 2037);
	2551	return;
	2552	}
	2553	}
	2554	$$v_ref .= $c;
	2555	}
	2556	}
	2557	last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
	2558	}
	2559	}
	2560
	2561	if ($waitingForField) {
	2562	if ($seenSomething or !$ctx->{useIO}) {
	2563	# new field
	2564	if (!$v_ref) {
	2565	if ($ctx->{is_bound}) {
	2566	$v_ref = $self->__bound_field($ctx, $fnum, 0);
	2567	} else {
	2568	$value = '';
	2569	$v_ref = \$value;
	2570	}
	2571	$fnum++;
	2572	return unless $v_ref;
	2573	$ctx->{flag} = 0;
	2574	$ctx->{fld_idx}++;
	2575	}
	2576	if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
	2577	$$v_ref = undef;
	2578	} else {
	2579	$$v_ref = "";
	2580	}
	2581	unless ($ctx->{is_bound}) {
	2582	push @$fields, $$v_ref;
	2583	}
	2584	if ($ctx->{keep_meta_info} and $fflags) {
	2585	push @$fflags, $ctx->{flag};
	2586	}
	2587	return 1;
	2588	}
	2589	$self->SetDiag(2012);
	2590	return;
	2591	}
	2592
	2593	if ($ctx->{flag} & IS_QUOTED) {
	2594	$self->__error_inside_quotes($ctx, 2027);
	2595	return;
	2596	}
	2597
	2598	if ($v_ref) {
	2599	$self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
	2600	}
	2601	return 1;
	2602	}
	2603
	2604	sub __get_from_src {
	2605	my ($self, $ctx, $src) = @_;
	2606	return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
	2607	return 1 if $ctx->{used} < $ctx->{size};
	2608	return unless $ctx->{useIO};
	2609	my $res = $src->getline;
	2610	if (defined $res) {
	2611	if ($ctx->{has_ahead}) {
	2612	$ctx->{tmp} = $self->{_AHEAD};
	2613	$ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
	2614	$ctx->{tmp} .= $res;
	2615	$ctx->{has_ahead} = 0;
	2616	} else {
	2617	$ctx->{tmp} = $res;
	2618	}
	2619	if ($ctx->{size} = length $ctx->{tmp}) {
	2620	$ctx->{used} = -1;
	2621	$ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
	2622	pos($ctx->{tmp}) = 0;
	2623	return 1;
	2624	}
	2625	} elsif (delete $ctx->{has_leftover}) {
	2626	$ctx->{tmp} = $self->{_AHEAD};
	2627	$ctx->{has_ahead} = 0;
	2628	$ctx->{useIO} \|= useIO_EOF;
	2629	if ($ctx->{size} = length $ctx->{tmp}) {
	2630	$ctx->{used} = -1;
	2631	$ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
	2632	pos($ctx->{tmp}) = 0;
	2633	return 1;
	2634	}
	2635	}
	2636	$ctx->{tmp} = '' unless defined $ctx->{tmp};
	2637	$ctx->{useIO} \|= useIO_EOF;
	2638	return;
	2639	}
	2640
	2641	sub __set_eol_is_cr {
	2642	my ($self, $ctx) = @_;
	2643	$ctx->{eol} = "\015";
	2644	$ctx->{eol_is_cr} = 1;
	2645	$ctx->{eol_len} = 1;
	2646	%{$self->{_CACHE}} = %$ctx;
	2647
	2648	$self->{eol} = $ctx->{eol};
	2649	}
	2650
	2651	sub __bound_field {
	2652	my ($self, $ctx, $i, $keep) = @_;
	2653	if ($i >= $ctx->{is_bound}) {
	2654	$self->SetDiag(3006);
	2655	return;
	2656	}
	2657	if (ref $ctx->{bound} eq 'ARRAY') {
	2658	my $ref = $ctx->{bound}[$i];
	2659	if (ref $ref) {
	2660	if ($keep) {
	2661	return $ref;
	2662	}
	2663	unless (Scalar::Util::readonly($$ref)) {
	2664	$$ref = "";
	2665	return $ref;
	2666	}
	2667	}
	2668	}
	2669	$self->SetDiag(3008);
	2670	return;
	2671	}
	2672
	2673	sub __get {
	2674	my ($self, $ctx) = @_;
	2675	return unless defined $ctx->{used};
	2676	return if $ctx->{used} >= $ctx->{size};
	2677	my $pos = pos($ctx->{tmp});
	2678	if ($ctx->{tmp} =~ /\G($ctx->{_re}\|.)/gs) {
	2679	my $c = $1;
	2680	if ($c =~ /[^\x09\x20-\x7e]/) {
	2681	$ctx->{flag} \|= IS_BINARY;
	2682	}
	2683	$ctx->{used} = pos($ctx->{tmp});
	2684	return $c;
	2685	} else {
	2686	pos($ctx->{tmp}) = $pos;
	2687	return;
	2688	}
	2689	}
	2690
	2691	sub __error_inside_quotes {
	2692	my ($self, $ctx, $error) = @_;
	2693	$self->__parse_error($ctx, $error, $ctx->{used} - 1);
	2694	}
	2695
	2696	sub __error_inside_field {
	2697	my ($self, $ctx, $error) = @_;
	2698	$self->__parse_error($ctx, $error, $ctx->{used} - 1);
	2699	}
	2700
	2701	sub __parse_error {
	2702	my ($self, $ctx, $error, $pos) = @_;
	2703	$self->{_ERROR_POS} = $pos;
	2704	$self->{_ERROR_FLD} = $ctx->{fld_idx};
	2705	$self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
	2706	$self->SetDiag($error);
	2707	return;
	2708	}
	2709
	2710	sub __is_whitespace {
	2711	my ($self, $ctx, $c) = @_;
	2712	return unless defined $c;
	2713	return (
	2714	(!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
	2715	(!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
	2716	(!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
	2717	($c eq " " or $c eq "\t")
	2718	);
	2719	}
	2720
	2721	sub __push_value { # AV_PUSH (part of)
	2722	my ($self, $ctx, $v_ref, $fields, $fflags, $flag, $fnum) = @_;
	2723	utf8::encode($$v_ref) if $ctx->{utf8};
	2724	if ($ctx->{formula} && $$v_ref && substr($$v_ref, 0, 1) eq '=') {
	2725	my $value = $self->_formula($ctx, $$v_ref, $fnum);
	2726	push @$fields, defined $value ? $value : undef;
	2727	return;
	2728	}
	2729	if (
	2730	(!defined $$v_ref or $$v_ref eq '') and
	2731	($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
	2732	) {
	2733	$$v_ref = undef;
	2734	} else {
	2735	if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
	2736	$$v_ref =~ s/[ \t]+$//;
	2737	}
	2738	if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} \|\| _is_valid_utf8($$v_ref))) {
	2739	utf8::decode($$v_ref);
	2740	}
	2741	}
	2742	unless ($ctx->{is_bound}) {
	2743	push @$fields, $$v_ref;
	2744	}
	2745	if ($ctx->{keep_meta_info} and $fflags) {
	2746	push @$fflags, $flag;
	2747	}
	2748	}
	2749
	2750	sub getline {
	2751	my ($self, $io) = @_;
	2752
	2753	my (@fields, @fflags);
	2754	my $res = $self->__parse(\@fields, \@fflags, $io, 1);
	2755	$res ? \@fields : undef;
	2756	}
	2757
	2758	sub getline_all {
	2759	my ( $self, $io, $offset, $len ) = @_;
	2760
	2761	my $ctx = $self->_setup_ctx;
	2762
	2763	my $tail = 0;
	2764	my $n = 0;
	2765	$offset \|\|= 0;
	2766
	2767	if ( $offset < 0 ) {
	2768	$tail = -$offset;
	2769	$offset = -1;
	2770	}
	2771
	2772	my (@row, @list);
	2773	while ($self->___parse($ctx, \@row, undef, $io, 1)) {
	2774	$ctx = $self->_setup_ctx;
	2775
	2776	if ($offset > 0) {
	2777	$offset--;
	2778	@row = ();
	2779	next;
	2780	}
	2781	if ($n++ >= $tail and $tail) {
	2782	shift @list;
	2783	$n--;
	2784	}
	2785	if (($ctx->{has_hooks} \|\| 0) & HOOK_AFTER_PARSE) {
	2786	unless ($self->_hook(after_parse => \@row)) {
	2787	@row = ();
	2788	next;
	2789	}
	2790	}
	2791	push @list, [@row];
	2792	@row = ();
	2793
	2794	last if defined $len && $n >= $len and $offset >= 0; # exceeds limit size
	2795	}
	2796
	2797	if ( defined $len && $n > $len ) {
	2798	@list = splice( @list, 0, $len);
	2799	}
	2800
	2801	return \@list;
	2802	}
	2803
	2804	sub _is_valid_utf8 {
	2805	return ( $_[0] =~ /^(?:
	2806	[\x00-\x7F]
	2807	\|[\xC2-\xDF][\x80-\xBF]
	2808	\|[\xE0][\xA0-\xBF][\x80-\xBF]
	2809	\|[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
	2810	\|[\xED][\x80-\x9F][\x80-\xBF]
	2811	\|[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
	2812	\|[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
	2813	\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
	2814	\|[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
	2815	)+$/x ) ? 1 : 0;
	2816	}
	2817
	2818	################################################################################
	2819	# methods for errors
	2820	################################################################################
	2821
	2822	sub _set_error_diag {
	2823	my ( $self, $error, $pos ) = @_;
	2824
	2825	$self->SetDiag($error);
	2826
	2827	if (defined $pos) {
	2828	$_[0]->{_ERROR_POS} = $pos;
	2829	}
	2830
	2831	return;
	2832	}
	2833
	2834	sub error_input {
	2835	my $self = shift;
	2836	if ($self and ((Scalar::Util::reftype($self) \|\| '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
	2837	return $self->{_ERROR_INPUT};
	2838	}
	2839	return;
	2840	}
	2841
	2842	sub _sv_diag {
	2843	my ($self, $error) = @_;
	2844	bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
	2845	}
	2846
	2847	sub _set_diag {
	2848	my ($self, $ctx, $error) = @_;
	2849
	2850	$last_error = $self->_sv_diag($error);
	2851	$self->{_ERROR_DIAG} = $last_error;
	2852	if ($error == 0) {
	2853	$self->{_ERROR_POS} = 0;
	2854	$self->{_ERROR_FLD} = 0;
	2855	$self->{_ERROR_INPUT} = undef;
	2856	$ctx->{has_error_input} = 0;
	2857	}
	2858	if ($error == 2012) { # EOF
	2859	$self->{_EOF} = 1;
	2860	}
	2861	if ($ctx->{auto_diag}) {
	2862	$self->error_diag;
	2863	}
	2864	return $last_error;
	2865	}
	2866
	2867	sub SetDiag {
	2868	my ($self, $error, $errstr) = @_;
	2869	my $res;
	2870	if (ref $self) {
	2871	my $ctx = $self->_setup_ctx;
	2872	$res = $self->_set_diag($ctx, $error);
	2873
	2874	} else {
	2875	$res = $self->_sv_diag($error);
	2876	}
	2877	if (defined $errstr) {
	2878	$res->[1] = $errstr;
	2879	}
	2880	$res;
	2881	}
	2882
	2883	################################################################################
	2884	package Text::CSV::ErrorDiag;
	2885
	2886	use strict;
	2887	use overload (
	2888	'""' => \&stringify,
	2889	'+' => \&numeric,
	2890	'-' => \&numeric,
	2891	'*' => \&numeric,
	2892	'/' => \&numeric,
	2893	fallback => 1,
	2894	);
	2895
	2896
	2897	sub numeric {
	2898	my ($left, $right) = @_;
	2899	return ref $left ? $left->[0] : $right->[0];
	2900	}
	2901
	2902
	2903	sub stringify {
	2904	$_[0]->[1];
	2905	}
	2906	################################################################################
	2907	1;
	2908	__END__
	2909
	2910	=head1 NAME
	2911
	2912	Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
	2913
	2914
	2915	=head1 SYNOPSIS
	2916
	2917	This section is taken from Text::CSV_XS.
	2918
	2919	# Functional interface
	2920	use Text::CSV_PP qw( csv );
	2921
	2922	# Read whole file in memory
	2923	my $aoa = csv (in => "data.csv"); # as array of array
	2924	my $aoh = csv (in => "data.csv",
	2925	headers => "auto"); # as array of hash
	2926
	2927	# Write array of arrays as csv file
	2928	csv (in => $aoa, out => "file.csv", sep_char=> ";");
	2929
	2930	# Only show lines where "code" is odd
	2931	csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
	2932
	2933	# Object interface
	2934	use Text::CSV_PP;
	2935
	2936	my @rows;
	2937	# Read/parse CSV
	2938	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
	2939	open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
	2940	while (my $row = $csv->getline ($fh)) {
	2941	$row->[2] =~ m/pattern/ or next; # 3rd field should match
	2942	push @rows, $row;
	2943	}
	2944	close $fh;
	2945
	2946	# and write as CSV
	2947	open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
	2948	$csv->say ($fh, $_) for @rows;
	2949	close $fh or die "new.csv: $!";
	2950
	2951	=head1 DESCRIPTION
	2952
	2953	Text::CSV_PP is a pure-perl module that provides facilities for the
	2954	composition and decomposition of comma-separated values. This is
	2955	(almost) compatible with much faster L<Text::CSV_XS>, and mainly
	2956	used as its fallback module when you use L<Text::CSV> module without
	2957	having installed Text::CSV_XS. If you don't have any reason to use
	2958	this module directly, use Text::CSV for speed boost and portability
	2959	(or maybe Text::CSV_XS when you write an one-off script and don't need
	2960	to care about portability).
	2961
	2962	The following caveats are taken from the doc of Text::CSV_XS.
	2963
	2964	=head2 Embedded newlines
	2965
	2966	B<Important Note>: The default behavior is to accept only ASCII characters
	2967	in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
	2968	fields can not contain newlines. If your data contains newlines embedded in
	2969	fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
	2970	set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
	2971	parsing options, you will always want to set binary.
	2972
	2973	But you still have the problem that you have to pass a correct line to the
	2974	L</parse> method, which is more complicated from the usual point of usage:
	2975
	2976	my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
	2977	while (<>) { # WRONG!
	2978	$csv->parse ($_);
	2979	my @fields = $csv->fields ();
	2980	}
	2981
	2982	this will break, as the C<while> might read broken lines: it does not care
	2983	about the quoting. If you need to support embedded newlines, the way to go
	2984	is to B<not> pass L<C<eol>\|/eol> in the parser (it accepts C<\n>, C<\r>,
	2985	B<and> C<\r\n> by default) and then
	2986
	2987	my $csv = Text::CSV_PP->new ({ binary => 1 });
	2988	open my $fh, "<", $file or die "$file: $!";
	2989	while (my $row = $csv->getline ($fh)) {
	2990	my @fields = @$row;
	2991	}
	2992
	2993	The old(er) way of using global file handles is still supported
	2994
	2995	while (my $row = $csv->getline (*ARGV)) { ... }
	2996
	2997	=head2 Unicode
	2998
	2999	Unicode is only tested to work with perl-5.8.2 and up.
	3000
	3001	See also L</BOM>.
	3002
	3003	The simplest way to ensure the correct encoding is used for in- and output
	3004	is by either setting layers on the filehandles, or setting the L</encoding>
	3005	argument for L</csv>.
	3006
	3007	open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
	3008	or
	3009	my $aoa = csv (in => "in.csv", encoding => "UTF-8");
	3010
	3011	open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
	3012	or
	3013	csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
	3014
	3015	On parsing (both for L</getline> and L</parse>), if the source is marked
	3016	being UTF8, then all fields that are marked binary will also be marked UTF8.
	3017
	3018	On combining (L</print> and L</combine>): if any of the combining fields
	3019	was marked UTF8, the resulting string will be marked as UTF8. Note however
	3020	that all fields I<before> the first field marked UTF8 and contained 8-bit
	3021	characters that were not upgraded to UTF8, these will be C<bytes> in the
	3022	resulting string too, possibly causing unexpected errors. If you pass data
	3023	of different encoding, or you don't know if there is different encoding,
	3024	force it to be upgraded before you pass them on:
	3025
	3026	$csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
	3027
	3028	For complete control over encoding, please use L<Text::CSV::Encoded>:
	3029
	3030	use Text::CSV::Encoded;
	3031	my $csv = Text::CSV::Encoded->new ({
	3032	encoding_in => "iso-8859-1", # the encoding comes into Perl
	3033	encoding_out => "cp1252", # the encoding comes out of Perl
	3034	});
	3035
	3036	$csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
	3037	# combine () and print () accept literally utf8 encoded data
	3038	# parse () and getline () return literally utf8 encoded data
	3039
	3040	$csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
	3041	# combine () and print () accept UTF8 marked data
	3042	# parse () and getline () return UTF8 marked data
	3043
	3044	=head2 BOM
	3045
	3046	BOM (or Byte Order Mark) handling is available only inside the L</header>
	3047	method. This method supports the following encodings: C<utf-8>, C<utf-1>,
	3048	C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
	3049	C<bocu-1>, and C<gb-18030>. See L<Wikipedia\|https://en.wikipedia.org/wiki/Byte_order_mark>.
	3050
	3051	If a file has a BOM, the easiest way to deal with that is
	3052
	3053	my $aoh = csv (in => $file, detect_bom => 1);
	3054
	3055	All records will be encoded based on the detected BOM.
	3056
	3057	This implies a call to the L</header> method, which defaults to also set
	3058	the L</column_names>. So this is B<not> the same as
	3059
	3060	my $aoh = csv (in => $file, headers => "auto");
	3061
	3062	which only reads the first record to set L</column_names> but ignores any
	3063	meaning of possible present BOM.
	3064
	3065	=head1 METHODS
	3066
	3067	This section is also taken from Text::CSV_XS.
	3068
	3069	=head2 version
	3070
	3071	(Class method) Returns the current module version.
	3072
	3073	=head2 new
	3074
	3075	(Class method) Returns a new instance of class Text::CSV_PP. The attributes
	3076	are described by the (optional) hash ref C<\%attr>.
	3077
	3078	my $csv = Text::CSV_PP->new ({ attributes ... });
	3079
	3080	The following attributes are available:
	3081
	3082	=head3 eol
	3083
	3084	my $csv = Text::CSV_PP->new ({ eol => $/ });
	3085	$csv->eol (undef);
	3086	my $eol = $csv->eol;
	3087
	3088	The end-of-line string to add to rows for L</print> or the record separator
	3089	for L</getline>.
	3090
	3091	When not passed in a B<parser> instance, the default behavior is to accept
	3092	C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
	3093	all. Passing C<undef> or the empty string behave the same.
	3094
	3095	When not passed in a B<generating> instance, records are not terminated at
	3096	all, so it is probably wise to pass something you expect. A safe choice for
	3097	C<eol> on output is either C<$/> or C<\r\n>.
	3098
	3099	Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
	3100	(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
	3101	Return). The L<C<eol>\|/eol> attribute cannot exceed 7 (ASCII) characters.
	3102
	3103	If both C<$/> and L<C<eol>\|/eol> equal C<"\015">, parsing lines that end on
	3104	only a Carriage Return without Line Feed, will be L</parse>d correct.
	3105
	3106	=head3 sep_char
	3107
	3108	my $csv = Text::CSV_PP->new ({ sep_char => ";" });
	3109	$csv->sep_char (";");
	3110	my $c = $csv->sep_char;
	3111
	3112	The char used to separate fields, by default a comma. (C<,>). Limited to a
	3113	single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
	3114	(tilde). When longer sequences are required, use L<C<sep>\|/sep>.
	3115
	3116	The separation character can not be equal to the quote character or to the
	3117	escape character.
	3118
	3119	=head3 sep
	3120
	3121	my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
	3122	$csv->sep (";");
	3123	my $sep = $csv->sep;
	3124
	3125	The chars used to separate fields, by default undefined. Limited to 8 bytes.
	3126
	3127	When set, overrules L<C<sep_char>\|/sep_char>. If its length is one byte it
	3128	acts as an alias to L<C<sep_char>\|/sep_char>.
	3129
	3130	=head3 quote_char
	3131
	3132	my $csv = Text::CSV_PP->new ({ quote_char => "'" });
	3133	$csv->quote_char (undef);
	3134	my $c = $csv->quote_char;
	3135
	3136	The character to quote fields containing blanks or binary data, by default
	3137	the double quote character (C<">). A value of undef suppresses quote chars
	3138	(for simple cases only). Limited to a single-byte character, usually in the
	3139	range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
	3140	required, use L<C<quote>\|/quote>.
	3141
	3142	C<quote_char> can not be equal to L<C<sep_char>\|/sep_char>.
	3143
	3144	=head3 quote
	3145
	3146	my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
	3147	$csv->quote ("'");
	3148	my $quote = $csv->quote;
	3149
	3150	The chars used to quote fields, by default undefined. Limited to 8 bytes.
	3151
	3152	When set, overrules L<C<quote_char>\|/quote_char>. If its length is one byte
	3153	it acts as an alias to L<C<quote_char>\|/quote_char>.
	3154
	3155	=head3 escape_char
	3156
	3157	my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
	3158	$csv->escape_char (":");
	3159	my $c = $csv->escape_char;
	3160
	3161	The character to escape certain characters inside quoted fields. This is
	3162	limited to a single-byte character, usually in the range from C<0x20>
	3163	(space) to C<0x7E> (tilde).
	3164
	3165	The C<escape_char> defaults to being the double-quote mark (C<">). In other
	3166	words the same as the default L<C<quote_char>\|/quote_char>. This means that
	3167	doubling the quote mark in a field escapes it:
	3168
	3169	"foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
	3170
	3171	If you change the L<C<quote_char>\|/quote_char> without changing the
	3172	C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
	3173	If instead you want to escape the L<C<quote_char>\|/quote_char> by doubling
	3174	it you will need to also change the C<escape_char> to be the same as what
	3175	you have changed the L<C<quote_char>\|/quote_char> to.
	3176
	3177	Setting C<escape_char> to <undef> or C<""> will disable escaping completely
	3178	and is greatly discouraged. This will also disable C<escape_null>.
	3179
	3180	The escape character can not be equal to the separation character.
	3181
	3182	=head3 binary
	3183
	3184	my $csv = Text::CSV_PP->new ({ binary => 1 });
	3185	$csv->binary (0);
	3186	my $f = $csv->binary;
	3187
	3188	If this attribute is C<1>, you may use binary characters in quoted fields,
	3189	including line feeds, carriage returns and C<NULL> bytes. (The latter could
	3190	be escaped as C<"0>.) By default this feature is off.
	3191
	3192	If a string is marked UTF8, C<binary> will be turned on automatically when
	3193	binary characters other than C<CR> and C<NL> are encountered. Note that a
	3194	simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
	3195	so setting C<< { binary => 1 } >> is still a wise option.
	3196
	3197	=head3 strict
	3198
	3199	my $csv = Text::CSV_PP->new ({ strict => 1 });
	3200	$csv->strict (0);
	3201	my $f = $csv->strict;
	3202
	3203	If this attribute is set to C<1>, any row that parses to a different number
	3204	of fields than the previous row will cause the parser to throw error 2014.
	3205
	3206	=head3 formula_handling
	3207
	3208	=head3 formula
	3209
	3210	my $csv = Text::CSV_PP->new ({ formula => "none" });
	3211	$csv->formula ("none");
	3212	my $f = $csv->formula;
	3213
	3214	This defines the behavior of fields containing I<formulas>. As formulas are
	3215	considered dangerous in spreadsheets, this attribute can define an optional
	3216	action to be taken if a field starts with an equal sign (C<=>).
	3217
	3218	For purpose of code-readability, this can also be written as
	3219
	3220	my $csv = Text::CSV_PP->new ({ formula_handling => "none" });
	3221	$csv->formula_handling ("none");
	3222	my $f = $csv->formula_handling;
	3223
	3224	Possible values for this attribute are
	3225
	3226	=over 2
	3227
	3228	=item none
	3229
	3230	Take no specific action. This is the default.
	3231
	3232	$csv->formula ("none");
	3233
	3234	=item die
	3235
	3236	Cause the process to C<die> whenever a leading C<=> is encountered.
	3237
	3238	$csv->formula ("die");
	3239
	3240	=item croak
	3241
	3242	Cause the process to C<croak> whenever a leading C<=> is encountered. (See
	3243	L<Carp>)
	3244
	3245	$csv->formula ("croak");
	3246
	3247	=item diag
	3248
	3249	Report position and content of the field whenever a leading C<=> is found.
	3250	The value of the field is unchanged.
	3251
	3252	$csv->formula ("diag");
	3253
	3254	=item empty
	3255
	3256	Replace the content of fields that start with a C<=> with the empty string.
	3257
	3258	$csv->formula ("empty");
	3259	$csv->formula ("");
	3260
	3261	=item undef
	3262
	3263	Replace the content of fields that start with a C<=> with C<undef>.
	3264
	3265	$csv->formula ("undef");
	3266	$csv->formula (undef);
	3267
	3268	=back
	3269
	3270	All other values will give a warning and then fallback to C<diag>.
	3271
	3272	=head3 decode_utf8
	3273
	3274	my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
	3275	$csv->decode_utf8 (0);
	3276	my $f = $csv->decode_utf8;
	3277
	3278	This attributes defaults to TRUE.
	3279
	3280	While I<parsing>, fields that are valid UTF-8, are automatically set to be
	3281	UTF-8, so that
	3282
	3283	$csv->parse ("\xC4\xA8\n");
	3284
	3285	results in
	3286
	3287	PV("\304\250"\0) [UTF8 "\x{128}"]
	3288
	3289	Sometimes it might not be a desired action. To prevent those upgrades, set
	3290	this attribute to false, and the result will be
	3291
	3292	PV("\304\250"\0)
	3293
	3294	=head3 auto_diag
	3295
	3296	my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
	3297	$csv->auto_diag (2);
	3298	my $l = $csv->auto_diag;
	3299
	3300	Set this attribute to a number between C<1> and C<9> causes L</error_diag>
	3301	to be automatically called in void context upon errors.
	3302
	3303	In case of error C<2012 - EOF>, this call will be void.
	3304
	3305	If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
	3306	on errors instead of C<warn>. If set to anything unrecognized, it will be
	3307	silently ignored.
	3308
	3309	Future extensions to this feature will include more reliable auto-detection
	3310	of C<autodie> being active in the scope of which the error occurred which
	3311	will increment the value of C<auto_diag> with C<1> the moment the error is
	3312	detected.
	3313
	3314	=head3 diag_verbose
	3315
	3316	my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
	3317	$csv->diag_verbose (2);
	3318	my $l = $csv->diag_verbose;
	3319
	3320	Set the verbosity of the output triggered by C<auto_diag>. Currently only
	3321	adds the current input-record-number (if known) to the diagnostic output
	3322	with an indication of the position of the error.
	3323
	3324	=head3 blank_is_undef
	3325
	3326	my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
	3327	$csv->blank_is_undef (0);
	3328	my $f = $csv->blank_is_undef;
	3329
	3330	Under normal circumstances, C<CSV> data makes no distinction between quoted-
	3331	and unquoted empty fields. These both end up in an empty string field once
	3332	read, thus
	3333
	3334	1,"",," ",2
	3335
	3336	is read as
	3337
	3338	("1", "", "", " ", "2")
	3339
	3340	When I<writing> C<CSV> files with either L<C<always_quote>\|/always_quote>
	3341	or L<C<quote_empty>\|/quote_empty> set, the unquoted I<empty> field is the
	3342	result of an undefined value. To enable this distinction when I<reading>
	3343	C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
	3344	fields to be set to C<undef>, causing the above to be parsed as
	3345
	3346	("1", "", undef, " ", "2")
	3347
	3348	note that this is specifically important when loading C<CSV> fields into a
	3349	database that allows C<NULL> values, as the perl equivalent for C<NULL> is
	3350	C<undef> in L<DBI> land.
	3351
	3352	=head3 empty_is_undef
	3353
	3354	my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
	3355	$csv->empty_is_undef (0);
	3356	my $f = $csv->empty_is_undef;
	3357
	3358	Going one step further than L<C<blank_is_undef>\|/blank_is_undef>, this
	3359	attribute converts all empty fields to C<undef>, so
	3360
	3361	1,"",," ",2
	3362
	3363	is read as
	3364
	3365	(1, undef, undef, " ", 2)
	3366
	3367	Note that this effects only fields that are originally empty, not fields
	3368	that are empty after stripping allowed whitespace. YMMV.
	3369
	3370	=head3 allow_whitespace
	3371
	3372	my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
	3373	$csv->allow_whitespace (0);
	3374	my $f = $csv->allow_whitespace;
	3375
	3376	When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
	3377	surrounding the separation character is removed when parsing. If either
	3378	C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>\|/sep_char>,
	3379	L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> it will not
	3380	be considered whitespace.
	3381
	3382	Now lines like:
	3383
	3384	1 , "foo" , bar , 3 , zapp
	3385
	3386	are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
	3387
	3388	Note that B<all> whitespace is stripped from both start and end of each
	3389	field. That would make it I<more> than a I<feature> to enable parsing bad
	3390	C<CSV> lines, as
	3391
	3392	1, 2.0, 3, ape , monkey
	3393
	3394	will now be parsed as
	3395
	3396	("1", "2.0", "3", "ape", "monkey")
	3397
	3398	even if the original line was perfectly acceptable C<CSV>.
	3399
	3400	=head3 allow_loose_quotes
	3401
	3402	my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
	3403	$csv->allow_loose_quotes (0);
	3404	my $f = $csv->allow_loose_quotes;
	3405
	3406	By default, parsing unquoted fields containing L<C<quote_char>\|/quote_char>
	3407	characters like
	3408
	3409	1,foo "bar" baz,42
	3410
	3411	would result in parse error 2034. Though it is still bad practice to allow
	3412	this format, we cannot help the fact that some vendors make their
	3413	applications spit out lines styled this way.
	3414
	3415	If there is B<really> bad C<CSV> data, like
	3416
	3417	1,"foo "bar" baz",42
	3418
	3419	or
	3420
	3421	1,""foo bar baz"",42
	3422
	3423	there is a way to get this data-line parsed and leave the quotes inside the
	3424	quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
	3425	B<AND> making sure that the L<C<escape_char>\|/escape_char> is I<not> equal
	3426	to L<C<quote_char>\|/quote_char>.
	3427
	3428	=head3 allow_loose_escapes
	3429
	3430	my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
	3431	$csv->allow_loose_escapes (0);
	3432	my $f = $csv->allow_loose_escapes;
	3433
	3434	Parsing fields that have L<C<escape_char>\|/escape_char> characters that
	3435	escape characters that do not need to be escaped, like:
	3436
	3437	my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
	3438	$csv->parse (qq{1,"my bar\'s",baz,42});
	3439
	3440	would result in parse error 2025. Though it is bad practice to allow this
	3441	format, this attribute enables you to treat all escape character sequences
	3442	equal.
	3443
	3444	=head3 allow_unquoted_escape
	3445
	3446	my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
	3447	$csv->allow_unquoted_escape (0);
	3448	my $f = $csv->allow_unquoted_escape;
	3449
	3450	A backward compatibility issue where L<C<escape_char>\|/escape_char> differs
	3451	from L<C<quote_char>\|/quote_char> prevents L<C<escape_char>\|/escape_char>
	3452	to be in the first position of a field. If L<C<quote_char>\|/quote_char> is
	3453	equal to the default C<"> and L<C<escape_char>\|/escape_char> is set to C<\>,
	3454	this would be illegal:
	3455
	3456	1,\0,2
	3457
	3458	Setting this attribute to C<1> might help to overcome issues with backward
	3459	compatibility and allow this style.
	3460
	3461	=head3 always_quote
	3462
	3463	my $csv = Text::CSV_PP->new ({ always_quote => 1 });
	3464	$csv->always_quote (0);
	3465	my $f = $csv->always_quote;
	3466
	3467	By default the generated fields are quoted only if they I<need> to be. For
	3468	example, if they contain the separator character. If you set this attribute
	3469	to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
	3470	quoted, see L</blank_is_undef>). This makes it quite often easier to handle
	3471	exported data in external applications.
	3472
	3473	=head3 quote_space
	3474
	3475	my $csv = Text::CSV_PP->new ({ quote_space => 1 });
	3476	$csv->quote_space (0);
	3477	my $f = $csv->quote_space;
	3478
	3479	By default, a space in a field would trigger quotation. As no rule exists
	3480	this to be forced in C<CSV>, nor any for the opposite, the default is true
	3481	for safety. You can exclude the space from this trigger by setting this
	3482	attribute to 0.
	3483
	3484	=head3 quote_empty
	3485
	3486	my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
	3487	$csv->quote_empty (0);
	3488	my $f = $csv->quote_empty;
	3489
	3490	By default the generated fields are quoted only if they I<need> to be. An
	3491	empty (defined) field does not need quotation. If you set this attribute to
	3492	C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
	3493	quoted, see L</blank_is_undef>). See also L<C<always_quote>\|/always_quote>.
	3494
	3495	=head3 quote_binary
	3496
	3497	my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
	3498	$csv->quote_binary (0);
	3499	my $f = $csv->quote_binary;
	3500
	3501	By default, all "unsafe" bytes inside a string cause the combined field to
	3502	be quoted. By setting this attribute to C<0>, you can disable that trigger
	3503	for bytes >= C<0x7F>.
	3504
	3505	=head3 escape_null
	3506
	3507	my $csv = Text::CSV_PP->new ({ escape_null => 1 });
	3508	$csv->escape_null (0);
	3509	my $f = $csv->escape_null;
	3510
	3511	By default, a C<NULL> byte in a field would be escaped. This option enables
	3512	you to treat the C<NULL> byte as a simple binary character in binary mode
	3513	(the C<< { binary => 1 } >> is set). The default is true. You can prevent
	3514	C<NULL> escapes by setting this attribute to C<0>.
	3515
	3516	When the C<escape_char> attribute is set to undefined, this attribute will
	3517	be set to false.
	3518
	3519	The default setting will encode "=\x00=" as
	3520
	3521	"="0="
	3522
	3523	With C<escape_null> set, this will result in
	3524
	3525	"=\x00="
	3526
	3527	The default when using the C<csv> function is C<false>.
	3528
	3529	For backward compatibility reasons, the deprecated old name C<quote_null>
	3530	is still recognized.
	3531
	3532	=head3 keep_meta_info
	3533
	3534	my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
	3535	$csv->keep_meta_info (0);
	3536	my $f = $csv->keep_meta_info;
	3537
	3538	By default, the parsing of input records is as simple and fast as possible.
	3539	However, some parsing information - like quotation of the original field -
	3540	is lost in that process. Setting this flag to true enables retrieving that
	3541	information after parsing with the methods L</meta_info>, L</is_quoted>,
	3542	and L</is_binary> described below. Default is false for performance.
	3543
	3544	If you set this attribute to a value greater than 9, than you can control
	3545	output quotation style like it was used in the input of the the last parsed
	3546	record (unless quotation was added because of other reasons).
	3547
	3548	my $csv = Text::CSV_PP->new ({
	3549	binary => 1,
	3550	keep_meta_info => 1,
	3551	quote_space => 0,
	3552	});
	3553
	3554	my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
	3555
	3556	$csv->print (*STDOUT, \@row);
	3557	# 1,,, , ,f,g,"h""h",help,help
	3558	$csv->keep_meta_info (11);
	3559	$csv->print (*STDOUT, \@row);
	3560	# 1,,"", ," ",f,"g","h""h",help,"help"
	3561
	3562	=head3 undef_str
	3563
	3564	my $csv = Text::CSV_PP->new ({ undef_str => "\\N" });
	3565	$csv->undef_str (undef);
	3566	my $s = $csv->undef_str;
	3567
	3568	This attribute optionally defines the output of undefined fields. The value
	3569	passed is not changed at all, so if it needs quotation, the quotation needs
	3570	to be included in the value of the attribute. Use with caution, as passing
	3571	a value like C<",",,,,"""> will for sure mess up your output. The default
	3572	for this attribute is C<undef>, meaning no special treatment.
	3573
	3574	This attribute is useful when exporting CSV data to be imported in custom
	3575	loaders, like for MySQL, that recognize special sequences for C<NULL> data.
	3576
	3577	This attribute has no meaning when parsing CSV data.
	3578
	3579	=head3 verbatim
	3580
	3581	my $csv = Text::CSV_PP->new ({ verbatim => 1 });
	3582	$csv->verbatim (0);
	3583	my $f = $csv->verbatim;
	3584
	3585	This is a quite controversial attribute to set, but makes some hard things
	3586	possible.
	3587
	3588	The rationale behind this attribute is to tell the parser that the normally
	3589	special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
	3590	special when this flag is set, and be dealt with as being ordinary binary
	3591	characters. This will ease working with data with embedded newlines.
	3592
	3593	When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
	3594	every line.
	3595
	3596	Imagine a file format like
	3597
	3598	M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
	3599
	3600	where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
	3601	C<^> (caret). None of the fields is quoted, but embedded binary data is
	3602	likely to be present. With the specific line ending, this should not be too
	3603	hard to detect.
	3604
	3605	By default, Text::CSV_PP' parse function is instructed to only know about
	3606	C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
	3607	embedded newline as a real C<end-of-line>, so it can scan the next line if
	3608	binary is true, and the newline is inside a quoted field. With this option,
	3609	we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
	3610	a binary character.
	3611
	3612	For L</parse> this means that the parser has no more idea about line ending
	3613	and L</getline> C<chomp>s line endings on reading.
	3614
	3615	=head3 types
	3616
	3617	A set of column types; the attribute is immediately passed to the L</types>
	3618	method.
	3619
	3620	=head3 callbacks
	3621
	3622	See the L</Callbacks> section below.
	3623
	3624	=head3 accessors
	3625
	3626	To sum it up,
	3627
	3628	$csv = Text::CSV_PP->new ();
	3629
	3630	is equivalent to
	3631
	3632	$csv = Text::CSV_PP->new ({
	3633	eol => undef, # \r, \n, or \r\n
	3634	sep_char => ',',
	3635	sep => undef,
	3636	quote_char => '"',
	3637	quote => undef,
	3638	escape_char => '"',
	3639	binary => 0,
	3640	decode_utf8 => 1,
	3641	auto_diag => 0,
	3642	diag_verbose => 0,
	3643	blank_is_undef => 0,
	3644	empty_is_undef => 0,
	3645	allow_whitespace => 0,
	3646	allow_loose_quotes => 0,
	3647	allow_loose_escapes => 0,
	3648	allow_unquoted_escape => 0,
	3649	always_quote => 0,
	3650	quote_empty => 0,
	3651	quote_space => 1,
	3652	escape_null => 1,
	3653	quote_binary => 1,
	3654	keep_meta_info => 0,
	3655	strict => 0,
	3656	formula => 0,
	3657	verbatim => 0,
	3658	undef_str => undef,
	3659	types => undef,
	3660	callbacks => undef,
	3661	});
	3662
	3663	For all of the above mentioned flags, an accessor method is available where
	3664	you can inquire the current value, or change the value
	3665
	3666	my $quote = $csv->quote_char;
	3667	$csv->binary (1);
	3668
	3669	It is not wise to change these settings halfway through writing C<CSV> data
	3670	to a stream. If however you want to create a new stream using the available
	3671	C<CSV> object, there is no harm in changing them.
	3672
	3673	If the L</new> constructor call fails, it returns C<undef>, and makes the
	3674	fail reason available through the L</error_diag> method.
	3675
	3676	$csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
	3677	die "".Text::CSV_PP->error_diag ();
	3678
	3679	L</error_diag> will return a string like
	3680
	3681	"INI - Unknown attribute 'ecs_char'"
	3682
	3683	=head2 known_attributes
	3684
	3685	@attr = Text::CSV_PP->known_attributes;
	3686	@attr = Text::CSV_PP::known_attributes;
	3687	@attr = $csv->known_attributes;
	3688
	3689	This method will return an ordered list of all the supported attributes as
	3690	described above. This can be useful for knowing what attributes are valid
	3691	in classes that use or extend Text::CSV_PP.
	3692
	3693	=head2 print
	3694
	3695	$status = $csv->print ($fh, $colref);
	3696
	3697	Similar to L</combine> + L</string> + L</print>, but much more efficient.
	3698	It expects an array ref as input (not an array!) and the resulting string
	3699	is not really created, but immediately written to the C<$fh> object,
	3700	typically an IO handle or any other object that offers a L</print> method.
	3701
	3702	For performance reasons C<print> does not create a result string, so all
	3703	L</string>, L</status>, L</fields>, and L</error_input> methods will return
	3704	undefined information after executing this method.
	3705
	3706	If C<$colref> is C<undef> (explicit, not through a variable argument) and
	3707	L</bind_columns> was used to specify fields to be printed, it is possible
	3708	to make performance improvements, as otherwise data would have to be copied
	3709	as arguments to the method call:
	3710
	3711	$csv->bind_columns (\($foo, $bar));
	3712	$status = $csv->print ($fh, undef);
	3713
	3714	A short benchmark
	3715
	3716	my @data = ("aa" .. "zz");
	3717	$csv->bind_columns (\(@data));
	3718
	3719	$csv->print ($fh, [ @data ]); # 11800 recs/sec
	3720	$csv->print ($fh, \@data ); # 57600 recs/sec
	3721	$csv->print ($fh, undef ); # 48500 recs/sec
	3722
	3723	=head2 say
	3724
	3725	$status = $csv->say ($fh, $colref);
	3726
	3727	Like L<C<print>\|/print>, but L<C<eol>\|/eol> defaults to C<$\>.
	3728
	3729	=head2 print_hr
	3730
	3731	$csv->print_hr ($fh, $ref);
	3732
	3733	Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
	3734	provided the column names are set with L</column_names>.
	3735
	3736	It is just a wrapper method with basic parameter checks over
	3737
	3738	$csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
	3739
	3740	=head2 combine
	3741
	3742	$status = $csv->combine (@fields);
	3743
	3744	This method constructs a C<CSV> record from C<@fields>, returning success
	3745	or failure. Failure can result from lack of arguments or an argument that
	3746	contains an invalid character. Upon success, L</string> can be called to
	3747	retrieve the resultant C<CSV> string. Upon failure, the value returned by
	3748	L</string> is undefined and L</error_input> could be called to retrieve the
	3749	invalid argument.
	3750
	3751	=head2 string
	3752
	3753	$line = $csv->string ();
	3754
	3755	This method returns the input to L</parse> or the resultant C<CSV> string
	3756	of L</combine>, whichever was called more recently.
	3757
	3758	=head2 getline
	3759
	3760	$colref = $csv->getline ($fh);
	3761
	3762	This is the counterpart to L</print>, as L</parse> is the counterpart to
	3763	L</combine>: it parses a row from the C<$fh> handle using the L</getline>
	3764	method associated with C<$fh> and parses this row into an array ref. This
	3765	array ref is returned by the function or C<undef> for failure. When C<$fh>
	3766	does not support C<getline>, you are likely to hit errors.
	3767
	3768	When fields are bound with L</bind_columns> the return value is a reference
	3769	to an empty list.
	3770
	3771	The L</string>, L</fields>, and L</status> methods are meaningless again.
	3772
	3773	=head2 getline_all
	3774
	3775	$arrayref = $csv->getline_all ($fh);
	3776	$arrayref = $csv->getline_all ($fh, $offset);
	3777	$arrayref = $csv->getline_all ($fh, $offset, $length);
	3778
	3779	This will return a reference to a list of L<getline ($fh)\|/getline> results.
	3780	In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
	3781	with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
	3782	into consideration.
	3783
	3784	Given a CSV file with 10 lines:
	3785
	3786	lines call
	3787	----- ---------------------------------------------------------
	3788	0..9 $csv->getline_all ($fh) # all
	3789	0..9 $csv->getline_all ($fh, 0) # all
	3790	8..9 $csv->getline_all ($fh, 8) # start at 8
	3791	- $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
	3792	0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
	3793	4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
	3794	8..9 $csv->getline_all ($fh, -2) # last 2 rows
	3795	6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
	3796
	3797	=head2 getline_hr
	3798
	3799	The L</getline_hr> and L</column_names> methods work together to allow you
	3800	to have rows returned as hashrefs. You must call L</column_names> first to
	3801	declare your column names.
	3802
	3803	$csv->column_names (qw( code name price description ));
	3804	$hr = $csv->getline_hr ($fh);
	3805	print "Price for $hr->{name} is $hr->{price} EUR\n";
	3806
	3807	L</getline_hr> will croak if called before L</column_names>.
	3808
	3809	Note that L</getline_hr> creates a hashref for every row and will be much
	3810	slower than the combined use of L</bind_columns> and L</getline> but still
	3811	offering the same ease of use hashref inside the loop:
	3812
	3813	my @cols = @{$csv->getline ($fh)};
	3814	$csv->column_names (@cols);
	3815	while (my $row = $csv->getline_hr ($fh)) {
	3816	print $row->{price};
	3817	}
	3818
	3819	Could easily be rewritten to the much faster:
	3820
	3821	my @cols = @{$csv->getline ($fh)};
	3822	my $row = {};
	3823	$csv->bind_columns (\@{$row}{@cols});
	3824	while ($csv->getline ($fh)) {
	3825	print $row->{price};
	3826	}
	3827
	3828	Your mileage may vary for the size of the data and the number of rows. With
	3829	perl-5.14.2 the comparison for a 100_000 line file with 14 rows:
	3830
	3831	Rate hashrefs getlines
	3832	hashrefs 1.00/s -- -76%
	3833	getlines 4.15/s 313% --
	3834
	3835	=head2 getline_hr_all
	3836
	3837	$arrayref = $csv->getline_hr_all ($fh);
	3838	$arrayref = $csv->getline_hr_all ($fh, $offset);
	3839	$arrayref = $csv->getline_hr_all ($fh, $offset, $length);
	3840
	3841	This will return a reference to a list of L<getline_hr ($fh)\|/getline_hr>
	3842	results. In this call, L<C<keep_meta_info>\|/keep_meta_info> is disabled.
	3843
	3844	=head2 parse
	3845
	3846	$status = $csv->parse ($line);
	3847
	3848	This method decomposes a C<CSV> string into fields, returning success or
	3849	failure. Failure can result from a lack of argument or the given C<CSV>
	3850	string is improperly formatted. Upon success, L</fields> can be called to
	3851	retrieve the decomposed fields. Upon failure calling L</fields> will return
	3852	undefined data and L</error_input> can be called to retrieve the invalid
	3853	argument.
	3854
	3855	You may use the L</types> method for setting column types. See L</types>'
	3856	description below.
	3857
	3858	The C<$line> argument is supposed to be a simple scalar. Everything else is
	3859	supposed to croak and set error 1500.
	3860
	3861	=head2 fragment
	3862
	3863	This function tries to implement RFC7111 (URI Fragment Identifiers for the
	3864	text/csv Media Type) - http://tools.ietf.org/html/rfc7111
	3865
	3866	my $AoA = $csv->fragment ($fh, $spec);
	3867
	3868	In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
	3869	to indicate a range. All indices are C<1>-based: the first row or column
	3870	has index C<1>. Selections can be combined with the semi-colon (C<;>).
	3871
	3872	When using this method in combination with L</column_names>, the returned
	3873	reference will point to a list of hashes instead of a list of lists. A
	3874	disjointed cell-based combined selection might return rows with different
	3875	number of columns making the use of hashes unpredictable.
	3876
	3877	$csv->column_names ("Name", "Age");
	3878	my $AoH = $csv->fragment ($fh, "col=3;8");
	3879
	3880	If the L</after_parse> callback is active, it is also called on every line
	3881	parsed and skipped before the fragment.
	3882
	3883	=over 2
	3884
	3885	=item row
	3886
	3887	row=4
	3888	row=5-7
	3889	row=6-*
	3890	row=1-2;4;6-*
	3891
	3892	=item col
	3893
	3894	col=2
	3895	col=1-3
	3896	col=4-*
	3897	col=1-2;4;7-*
	3898
	3899	=item cell
	3900
	3901	In cell-based selection, the comma (C<,>) is used to pair row and column
	3902
	3903	cell=4,1
	3904
	3905	The range operator (C<->) using C<cell>s can be used to define top-left and
	3906	bottom-right C<cell> location
	3907
	3908	cell=3,1-4,6
	3909
	3910	The C<*> is only allowed in the second part of a pair
	3911
	3912	cell=3,2-*,2 # row 3 till end, only column 2
	3913	cell=3,2-3,* # column 2 till end, only row 3
	3914	cell=3,2-, # strip row 1 and 2, and column 1
	3915
	3916	Cells and cell ranges may be combined with C<;>, possibly resulting in rows
	3917	with different number of columns
	3918
	3919	cell=1,1-2,2;3,3-4,4;1,4;4,1
	3920
	3921	Disjointed selections will only return selected cells. The cells that are
	3922	not specified will not be included in the returned set, not even as
	3923	C<undef>. As an example given a C<CSV> like
	3924
	3925	11,12,13,...19
	3926	21,22,...28,29
	3927	: :
	3928	91,...97,98,99
	3929
	3930	with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
	3931
	3932	11,12,14
	3933	21,22
	3934	33,34
	3935	41,43,44
	3936
	3937	Overlapping cell-specs will return those cells only once, So
	3938	C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
	3939
	3940	11,12,13
	3941	21,22,23,24
	3942	31,32,33,34
	3943	42,43,44
	3944
	3945	=back
	3946
	3947	L<RFC7111\|http://tools.ietf.org/html/rfc7111> does B<not> allow different
	3948	types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
	3949	Passing an invalid fragment specification will croak and set error 2013.
	3950
	3951	=head2 column_names
	3952
	3953	Set the "keys" that will be used in the L</getline_hr> calls. If no keys
	3954	(column names) are passed, it will return the current setting as a list.
	3955
	3956	L</column_names> accepts a list of scalars (the column names) or a single
	3957	array_ref, so you can pass the return value from L</getline> too:
	3958
	3959	$csv->column_names ($csv->getline ($fh));
	3960
	3961	L</column_names> does B<no> checking on duplicates at all, which might lead
	3962	to unexpected results. Undefined entries will be replaced with the string
	3963	C<"\cAUNDEF\cA">, so
	3964
	3965	$csv->column_names (undef, "", "name", "name");
	3966	$hr = $csv->getline_hr ($fh);
	3967
	3968	Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
	3969	the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
	3970	field.
	3971
	3972	L</column_names> croaks on invalid arguments.
	3973
	3974	=head2 header
	3975
	3976	This method does NOT work in perl-5.6.x
	3977
	3978	Parse the CSV header and set L<C<sep>\|/sep>, column_names and encoding.
	3979
	3980	my @hdr = $csv->header ($fh);
	3981	$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
	3982	$csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
	3983
	3984	The first argument should be a file handle.
	3985
	3986	This method resets some object properties, as it is supposed to be invoked
	3987	only once per file or stream. It will leave attributes C<column_names> and
	3988	C<bound_columns> alone of setting column names is disabled. Reading headers
	3989	on previously process objects might fail on perl-5.8.0 and older.
	3990
	3991	Assuming that the file opened for parsing has a header, and the header does
	3992	not contain problematic characters like embedded newlines, read the first
	3993	line from the open handle then auto-detect whether the header separates the
	3994	column names with a character from the allowed separator list.
	3995
	3996	If any of the allowed separators matches, and none of the I<other> allowed
	3997	separators match, set L<C<sep>\|/sep> to that separator for the current
	3998	CSV_PP instance and use it to parse the first line, map those to lowercase,
	3999	and use that to set the instance L</column_names>:
	4000
	4001	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
	4002	open my $fh, "<", "file.csv";
	4003	binmode $fh; # for Windows
	4004	$csv->header ($fh);
	4005	while (my $row = $csv->getline_hr ($fh)) {
	4006	...
	4007	}
	4008
	4009	If the header is empty, contains more than one unique separator out of the
	4010	allowed set, contains empty fields, or contains identical fields (after
	4011	folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
	4012
	4013	If the header contains embedded newlines or is not valid CSV in any other
	4014	way, this method will croak and leave the parse error untouched.
	4015
	4016	A successful call to C<header> will always set the L<C<sep>\|/sep> of the
	4017	C<$csv> object. This behavior can not be disabled.
	4018
	4019	=head3 return value
	4020
	4021	On error this method will croak.
	4022
	4023	In list context, the headers will be returned whether they are used to set
	4024	L</column_names> or not.
	4025
	4026	In scalar context, the instance itself is returned. B<Note>: the values as
	4027	found in the header will effectively be B<lost> if C<set_column_names> is
	4028	false.
	4029
	4030	=head3 Options
	4031
	4032	=over 2
	4033
	4034	=item sep_set
	4035
	4036	$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
	4037
	4038	The list of legal separators defaults to C<[ ";", "," ]> and can be changed
	4039	by this option. As this is probably the most often used option, it can be
	4040	passed on its own as an unnamed argument:
	4041
	4042	$csv->header ($fh, [ ";", ",", "\|", "\t", "::", "\x{2063}" ]);
	4043
	4044	Multi-byte sequences are allowed, both multi-character and Unicode. See
	4045	L<C<sep>\|/sep>.
	4046
	4047	=item detect_bom
	4048
	4049	$csv->header ($fh, { detect_bom => 1 });
	4050
	4051	The default behavior is to detect if the header line starts with a BOM. If
	4052	the header has a BOM, use that to set the encoding of C<$fh>. This default
	4053	behavior can be disabled by passing a false value to C<detect_bom>.
	4054
	4055	Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
	4056	UTF-32LE. BOM's also support UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
	4057	but L<Encode> does not (yet). UTF-7 is not supported.
	4058
	4059	If a supported BOM was detected as start of the stream, it is stored in the
	4060	abject attribute C<ENCODING>.
	4061
	4062	my $enc = $csv->{ENCODING};
	4063
	4064	The encoding is used with C<binmode> on C<$fh>.
	4065
	4066	If the handle was opened in a (correct) encoding, this method will B<not>
	4067	alter the encoding, as it checks the leading B<bytes> of the first line. In
	4068	case the stream starts with a decode BOM (C<U+FEFF>), C<{ENCODING}> will be
	4069	C<""> (empty) instead of the default C<undef>.
	4070
	4071	=item munge_column_names
	4072
	4073	This option offers the means to modify the column names into something that
	4074	is most useful to the application. The default is to map all column names
	4075	to lower case.
	4076
	4077	$csv->header ($fh, { munge_column_names => "lc" });
	4078
	4079	The following values are available:
	4080
	4081	lc - lower case
	4082	uc - upper case
	4083	none - do not change
	4084	\%hash - supply a mapping
	4085	\&cb - supply a callback
	4086
	4087	Literal:
	4088
	4089	$csv->header ($fh, { munge_column_names => "none" });
	4090
	4091	Hash:
	4092
	4093	$csv->header ($fh, { munge_column_names => { foo => "sombrero" });
	4094
	4095	if a value does not exist, the original value is used unchanged
	4096
	4097	Callback:
	4098
	4099	$csv->header ($fh, { munge_column_names => sub { fc } });
	4100	$csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
	4101	$csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
	4102
	4103	As this callback is called in a C<map>, you can use C<$_> directly.
	4104
	4105	=item set_column_names
	4106
	4107	$csv->header ($fh, { set_column_names => 1 });
	4108
	4109	The default is to set the instances column names using L</column_names> if
	4110	the method is successful, so subsequent calls to L</getline_hr> can return
	4111	a hash. Disable setting the header can be forced by using a false value for
	4112	this option.
	4113
	4114	As described in L</return value> above, content is lost in scalar context.
	4115
	4116	=back
	4117
	4118	=head3 Validation
	4119
	4120	When receiving CSV files from external sources, this method can be used to
	4121	protect against changes in the layout by restricting to known headers (and
	4122	typos in the header fields).
	4123
	4124	my %known = (
	4125	"record key" => "c_rec",
	4126	"rec id" => "c_rec",
	4127	"id_rec" => "c_rec",
	4128	"kode" => "code",
	4129	"code" => "code",
	4130	"vaule" => "value",
	4131	"value" => "value",
	4132	);
	4133	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
	4134	open my $fh, "<", $source or die "$source: $!";
	4135	$csv->header ($fh, { munge_column_names => sub {
	4136	s/\s+$//;
	4137	s/^\s+//;
	4138	$known{lc $_} or die "Unknown column '$_' in $source";
	4139	}});
	4140	while (my $row = $csv->getline_hr ($fh)) {
	4141	say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
	4142	}
	4143
	4144	=head2 bind_columns
	4145
	4146	Takes a list of scalar references to be used for output with L</print> or
	4147	to store in the fields fetched by L</getline>. When you do not pass enough
	4148	references to store the fetched fields in, L</getline> will fail with error
	4149	C<3006>. If you pass more than there are fields to return, the content of
	4150	the remaining references is left untouched.
	4151
	4152	$csv->bind_columns (\$code, \$name, \$price, \$description);
	4153	while ($csv->getline ($fh)) {
	4154	print "The price of a $name is \x{20ac} $price\n";
	4155	}
	4156
	4157	To reset or clear all column binding, call L</bind_columns> with the single
	4158	argument C<undef>. This will also clear column names.
	4159
	4160	$csv->bind_columns (undef);
	4161
	4162	If no arguments are passed at all, L</bind_columns> will return the list of
	4163	current bindings or C<undef> if no binds are active.
	4164
	4165	Note that in parsing with C<bind_columns>, the fields are set on the fly.
	4166	That implies that if the third field of a row causes an error (or this row
	4167	has just two fields where the previous row had more), the first two fields
	4168	already have been assigned the values of the current row, while the rest of
	4169	the fields will still hold the values of the previous row. If you want the
	4170	parser to fail in these cases, use the L<C<strict>\|/strict> attribute.
	4171
	4172	=head2 eof
	4173
	4174	$eof = $csv->eof ();
	4175
	4176	If L</parse> or L</getline> was used with an IO stream, this method will
	4177	return true (1) if the last call hit end of file, otherwise it will return
	4178	false (''). This is useful to see the difference between a failure and end
	4179	of file.
	4180
	4181	Note that if the parsing of the last line caused an error, C<eof> is still
	4182	true. That means that if you are I<not> using L</auto_diag>, an idiom like
	4183
	4184	while (my $row = $csv->getline ($fh)) {
	4185	# ...
	4186	}
	4187	$csv->eof or $csv->error_diag;
	4188
	4189	will I<not> report the error. You would have to change that to
	4190
	4191	while (my $row = $csv->getline ($fh)) {
	4192	# ...
	4193	}
	4194	+$csv->error_diag and $csv->error_diag;
	4195
	4196	=head2 types
	4197
	4198	$csv->types (\@tref);
	4199
	4200	This method is used to force that (all) columns are of a given type. For
	4201	example, if you have an integer column, two columns with doubles and a
	4202	string column, then you might do a
	4203
	4204	$csv->types ([Text::CSV_PP::IV (),
	4205	Text::CSV_PP::NV (),
	4206	Text::CSV_PP::NV (),
	4207	Text::CSV_PP::PV ()]);
	4208
	4209	Column types are used only for I<decoding> columns while parsing, in other
	4210	words by the L</parse> and L</getline> methods.
	4211
	4212	You can unset column types by doing a
	4213
	4214	$csv->types (undef);
	4215
	4216	or fetch the current type settings with
	4217
	4218	$types = $csv->types ();
	4219
	4220	=over 4
	4221
	4222	=item IV
	4223
	4224	Set field type to integer.
	4225
	4226	=item NV
	4227
	4228	Set field type to numeric/float.
	4229
	4230	=item PV
	4231
	4232	Set field type to string.
	4233
	4234	=back
	4235
	4236	=head2 fields
	4237
	4238	@columns = $csv->fields ();
	4239
	4240	This method returns the input to L</combine> or the resultant decomposed
	4241	fields of a successful L</parse>, whichever was called more recently.
	4242
	4243	Note that the return value is undefined after using L</getline>, which does
	4244	not fill the data structures returned by L</parse>.
	4245
	4246	=head2 meta_info
	4247
	4248	@flags = $csv->meta_info ();
	4249
	4250	This method returns the "flags" of the input to L</combine> or the flags of
	4251	the resultant decomposed fields of L</parse>, whichever was called more
	4252	recently.
	4253
	4254	For each field, a meta_info field will hold flags that inform something
	4255	about the field returned by the L</fields> method or passed to the
	4256	L</combine> method. The flags are bit-wise-C<or>'d like:
	4257
	4258	=over 2
	4259
	4260	=item C< >0x0001
	4261
	4262	The field was quoted.
	4263
	4264	=item C< >0x0002
	4265
	4266	The field was binary.
	4267
	4268	=back
	4269
	4270	See the C<is_***> methods below.
	4271
	4272	=head2 is_quoted
	4273
	4274	my $quoted = $csv->is_quoted ($column_idx);
	4275
	4276	Where C<$column_idx> is the (zero-based) index of the column in the last
	4277	result of L</parse>.
	4278
	4279	This returns a true value if the data in the indicated column was enclosed
	4280	in L<C<quote_char>\|/quote_char> quotes. This might be important for fields
	4281	where content C<,20070108,> is to be treated as a numeric value, and where
	4282	C<,"20070108",> is explicitly marked as character string data.
	4283
	4284	This method is only valid when L</keep_meta_info> is set to a true value.
	4285
	4286	=head2 is_binary
	4287
	4288	my $binary = $csv->is_binary ($column_idx);
	4289
	4290	Where C<$column_idx> is the (zero-based) index of the column in the last
	4291	result of L</parse>.
	4292
	4293	This returns a true value if the data in the indicated column contained any
	4294	byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
	4295
	4296	This method is only valid when L</keep_meta_info> is set to a true value.
	4297
	4298	=head2 is_missing
	4299
	4300	my $missing = $csv->is_missing ($column_idx);
	4301
	4302	Where C<$column_idx> is the (zero-based) index of the column in the last
	4303	result of L</getline_hr>.
	4304
	4305	$csv->keep_meta_info (1);
	4306	while (my $hr = $csv->getline_hr ($fh)) {
	4307	$csv->is_missing (0) and next; # This was an empty line
	4308	}
	4309
	4310	When using L</getline_hr>, it is impossible to tell if the parsed fields
	4311	are C<undef> because they where not filled in the C<CSV> stream or because
	4312	they were not read at all, as B<all> the fields defined by L</column_names>
	4313	are set in the hash-ref. If you still need to know if all fields in each
	4314	row are provided, you should enable L<C<keep_meta_info>\|/keep_meta_info> so
	4315	you can check the flags.
	4316
	4317	If L<C<keep_meta_info>\|/keep_meta_info> is C<false>, C<is_missing> will
	4318	always return C<undef>, regardless of C<$column_idx> being valid or not. If
	4319	this attribute is C<true> it will return either C<0> (the field is present)
	4320	or C<1> (the field is missing).
	4321
	4322	A special case is the empty line. If the line is completely empty - after
	4323	dealing with the flags - this is still a valid CSV line: it is a record of
	4324	just one single empty field. However, if C<keep_meta_info> is set, invoking
	4325	C<is_missing> with index C<0> will now return true.
	4326
	4327	=head2 status
	4328
	4329	$status = $csv->status ();
	4330
	4331	This method returns the status of the last invoked L</combine> or L</parse>
	4332	call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
	4333
	4334	=head2 error_input
	4335
	4336	$bad_argument = $csv->error_input ();
	4337
	4338	This method returns the erroneous argument (if it exists) of L</combine> or
	4339	L</parse>, whichever was called more recently. If the last invocation was
	4340	successful, C<error_input> will return C<undef>.
	4341
	4342	=head2 error_diag
	4343
	4344	Text::CSV_PP->error_diag ();
	4345	$csv->error_diag ();
	4346	$error_code = 0 + $csv->error_diag ();
	4347	$error_str = "" . $csv->error_diag ();
	4348	($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
	4349
	4350	If (and only if) an error occurred, this function returns the diagnostics
	4351	of that error.
	4352
	4353	If called in void context, this will print the internal error code and the
	4354	associated error message to STDERR.
	4355
	4356	If called in list context, this will return the error code and the error
	4357	message in that order. If the last error was from parsing, the rest of the
	4358	values returned are a best guess at the location within the line that was
	4359	being parsed. Their values are 1-based. The position currently is index of
	4360	the byte at which the parsing failed in the current record. It might change
	4361	to be the index of the current character in a later release. The records is
	4362	the index of the record parsed by the csv instance. The field number is the
	4363	index of the field the parser thinks it is currently trying to parse. See
	4364	F<examples/csv-check> for how this can be used.
	4365
	4366	If called in scalar context, it will return the diagnostics in a single
	4367	scalar, a-la C<$!>. It will contain the error code in numeric context, and
	4368	the diagnostics message in string context.
	4369
	4370	When called as a class method or a direct function call, the diagnostics
	4371	are that of the last L</new> call.
	4372
	4373	=head2 record_number
	4374
	4375	$recno = $csv->record_number ();
	4376
	4377	Returns the records parsed by this csv instance. This value should be more
	4378	accurate than C<$.> when embedded newlines come in play. Records written by
	4379	this instance are not counted.
	4380
	4381	=head2 SetDiag
	4382
	4383	$csv->SetDiag (0);
	4384
	4385	Use to reset the diagnostics if you are dealing with errors.
	4386
	4387	=head1 FUNCTIONS
	4388
	4389	This section is also taken from Text::CSV_XS.
	4390
	4391	=head2 csv
	4392
	4393	This function is not exported by default and should be explicitly requested:
	4394
	4395	use Text::CSV_PP qw( csv );
	4396
	4397	This is an high-level function that aims at simple (user) interfaces. This
	4398	can be used to read/parse a C<CSV> file or stream (the default behavior) or
	4399	to produce a file or write to a stream (define the C<out> attribute). It
	4400	returns an array- or hash-reference on parsing (or C<undef> on fail) or the
	4401	numeric value of L</error_diag> on writing. When this function fails you
	4402	can get to the error using the class call to L</error_diag>
	4403
	4404	my $aoa = csv (in => "test.csv") or
	4405	die Text::CSV_PP->error_diag;
	4406
	4407	This function takes the arguments as key-value pairs. This can be passed as
	4408	a list or as an anonymous hash:
	4409
	4410	my $aoa = csv ( in => "test.csv", sep_char => ";");
	4411	my $aoh = csv ({ in => $fh, headers => "auto" });
	4412
	4413	The arguments passed consist of two parts: the arguments to L</csv> itself
	4414	and the optional attributes to the C<CSV> object used inside the function
	4415	as enumerated and explained in L</new>.
	4416
	4417	If not overridden, the default option used for CSV is
	4418
	4419	auto_diag => 1
	4420	escape_null => 0
	4421
	4422	The option that is always set and cannot be altered is
	4423
	4424	binary => 1
	4425
	4426	As this function will likely be used in one-liners, it allows C<quote> to
	4427	be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
	4428	or C<escape>.
	4429
	4430	Alternative invocations:
	4431
	4432	my $aoa = Text::CSV_PP::csv (in => "file.csv");
	4433
	4434	my $csv = Text::CSV_PP->new ();
	4435	my $aoa = $csv->csv (in => "file.csv");
	4436
	4437	In the latter case, the object attributes are used from the existing object
	4438	and the attribute arguments in the function call are ignored:
	4439
	4440	my $csv = Text::CSV_PP->new ({ sep_char => ";" });
	4441	my $aoh = $csv->csv (in => "file.csv", bom => 1);
	4442
	4443	will parse using C<;> as C<sep_char>, not C<,>.
	4444
	4445	=head3 in
	4446
	4447	Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
	4448	which will be opened for reading and closed when finished, a file handle
	4449	(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
	4450	itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
	4451
	4452	When used with L</out>, C<in> should be a reference to a CSV structure (AoA
	4453	or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
	4454	The code-ref will be invoked with no arguments.
	4455
	4456	my $aoa = csv (in => "file.csv");
	4457
	4458	open my $fh, "<", "file.csv";
	4459	my $aoa = csv (in => $fh);
	4460
	4461	my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
	4462	my $err = csv (in => $csv, out => "file.csv");
	4463
	4464	If called in void context without the L</out> attribute, the resulting ref
	4465	will be used as input to a subsequent call to csv:
	4466
	4467	csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
	4468
	4469	will be a shortcut to
	4470
	4471	csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
	4472
	4473	where, in the absence of the C<out> attribute, this is a shortcut to
	4474
	4475	csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
	4476	out => *STDOUT)
	4477
	4478	=head3 out
	4479
	4480	csv (in => $aoa, out => "file.csv");
	4481	csv (in => $aoa, out => $fh);
	4482	csv (in => $aoa, out => STDOUT);
	4483	csv (in => $aoa, out => *STDOUT);
	4484	csv (in => $aoa, out => \*STDOUT);
	4485	csv (in => $aoa, out => \my $data);
	4486	csv (in => $aoa, out => undef);
	4487	csv (in => $aoa, out => \"skip");
	4488
	4489	In output mode, the default CSV options when producing CSV are
	4490
	4491	eol => "\r\n"
	4492
	4493	The L</fragment> attribute is ignored in output mode.
	4494
	4495	C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
	4496	writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
	4497	reference to a glob (e.g. C<\STDOUT>), the glob itself (e.g. C<STDOUT>),
	4498	or a reference to a scalar (e.g. C<\my $data>).
	4499
	4500	csv (in => sub { $sth->fetch }, out => "dump.csv");
	4501	csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
	4502	headers => $sth->{NAME_lc});
	4503
	4504	When a code-ref is used for C<in>, the output is generated per invocation,
	4505	so no buffering is involved. This implies that there is no size restriction
	4506	on the number of records. The C<csv> function ends when the coderef returns
	4507	a false value.
	4508
	4509	If C<out> is set to a reference of the literal string C<"skip">, the output
	4510	will be suppressed completely, which might be useful in combination with a
	4511	filter for side effects only.
	4512
	4513	my %cache;
	4514	csv (in => "dump.csv",
	4515	out => \"skip",
	4516	on_in => sub { $cache{$_[1][1]}++ });
	4517
	4518	Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
	4519	equivalent to C<\"skip">.
	4520
	4521	=head3 encoding
	4522
	4523	If passed, it should be an encoding accepted by the C<:encoding()> option
	4524	to C<open>. There is no default value. This attribute does not work in perl
	4525	5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
	4526	line invocations.
	4527
	4528	If C<encoding> is set to the literal value C<"auto">, the method L</header>
	4529	will be invoked on the opened stream to check if there is a BOM and set the
	4530	encoding accordingly. This is equal to passing a true value in the option
	4531	L<C<detect_bom>\|/detect_bom>.
	4532
	4533	=head3 detect_bom
	4534
	4535	If C<detect_bom> is given, the method L</header> will be invoked on the
	4536	opened stream to check if there is a BOM and set the encoding accordingly.
	4537
	4538	C<detect_bom> can be abbreviated to C<bom>.
	4539
	4540	This is the same as setting L<C<encoding>\|/encoding> to C<"auto">.
	4541
	4542	Note that as the method L</header> is invoked, its default is to also set
	4543	the headers.
	4544
	4545	=head3 headers
	4546
	4547	If this attribute is not given, the default behavior is to produce an array
	4548	of arrays.
	4549
	4550	If C<headers> is supplied, it should be an anonymous list of column names,
	4551	an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
	4552	or C<skip>.
	4553
	4554	=over 2
	4555
	4556	=item skip
	4557
	4558	When C<skip> is used, the header will not be included in the output.
	4559
	4560	my $aoa = csv (in => $fh, headers => "skip");
	4561
	4562	=item auto
	4563
	4564	If C<auto> is used, the first line of the C<CSV> source will be read as the
	4565	list of field headers and used to produce an array of hashes.
	4566
	4567	my $aoh = csv (in => $fh, headers => "auto");
	4568
	4569	=item lc
	4570
	4571	If C<lc> is used, the first line of the C<CSV> source will be read as the
	4572	list of field headers mapped to lower case and used to produce an array of
	4573	hashes. This is a variation of C<auto>.
	4574
	4575	my $aoh = csv (in => $fh, headers => "lc");
	4576
	4577	=item uc
	4578
	4579	If C<uc> is used, the first line of the C<CSV> source will be read as the
	4580	list of field headers mapped to upper case and used to produce an array of
	4581	hashes. This is a variation of C<auto>.
	4582
	4583	my $aoh = csv (in => $fh, headers => "uc");
	4584
	4585	=item CODE
	4586
	4587	If a coderef is used, the first line of the C<CSV> source will be read as
	4588	the list of mangled field headers in which each field is passed as the only
	4589	argument to the coderef. This list is used to produce an array of hashes.
	4590
	4591	my $aoh = csv (in => $fh,
	4592	headers => sub { lc ($_[0]) =~ s/kode/code/gr });
	4593
	4594	this example is a variation of using C<lc> where all occurrences of C<kode>
	4595	are replaced with C<code>.
	4596
	4597	=item ARRAY
	4598
	4599	If C<headers> is an anonymous list, the entries in the list will be used
	4600	as field names. The first line is considered data instead of headers.
	4601
	4602	my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
	4603	csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
	4604
	4605	=item HASH
	4606
	4607	If C<headers> is an hash reference, this implies C<auto>, but header fields
	4608	for that exist as key in the hashref will be replaced by the value for that
	4609	key. Given a CSV file like
	4610
	4611	post-kode,city,name,id number,fubble
	4612	1234AA,Duckstad,Donald,13,"X313DF"
	4613
	4614	using
	4615
	4616	csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
	4617
	4618	will return an entry like
	4619
	4620	{ pc => "1234AA",
	4621	city => "Duckstad",
	4622	name => "Donald",
	4623	ID => "13",
	4624	fubble => "X313DF",
	4625	}
	4626
	4627	=back
	4628
	4629	See also L<C<munge_column_names>\|/munge_column_names> and
	4630	L<C<set_column_names>\|/set_column_names>.
	4631
	4632	=head3 munge_column_names
	4633
	4634	If C<munge_column_names> is set, the method L</header> is invoked on the
	4635	opened stream with all matching arguments to detect and set the headers.
	4636
	4637	C<munge_column_names> can be abbreviated to C<munge>.
	4638
	4639	=head3 key
	4640
	4641	If passed, will default L<C<headers>\|/headers> to C<"auto"> and return a
	4642	hashref instead of an array of hashes. Allowed values are simple scalars or
	4643	array-references where the first element is the joiner and the rest are the
	4644	fields to join to combine the key.
	4645
	4646	my $ref = csv (in => "test.csv", key => "code");
	4647	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
	4648
	4649	with test.csv like
	4650
	4651	code,product,price,color
	4652	1,pc,850,gray
	4653	2,keyboard,12,white
	4654	3,mouse,5,black
	4655
	4656	the first example will return
	4657
	4658	{ 1 => {
	4659	code => 1,
	4660	color => 'gray',
	4661	price => 850,
	4662	product => 'pc'
	4663	},
	4664	2 => {
	4665	code => 2,
	4666	color => 'white',
	4667	price => 12,
	4668	product => 'keyboard'
	4669	},
	4670	3 => {
	4671	code => 3,
	4672	color => 'black',
	4673	price => 5,
	4674	product => 'mouse'
	4675	}
	4676	}
	4677
	4678	the second example will return
	4679
	4680	{ "1:gray" => {
	4681	code => 1,
	4682	color => 'gray',
	4683	price => 850,
	4684	product => 'pc'
	4685	},
	4686	"2:white" => {
	4687	code => 2,
	4688	color => 'white',
	4689	price => 12,
	4690	product => 'keyboard'
	4691	},
	4692	"3:black" => {
	4693	code => 3,
	4694	color => 'black',
	4695	price => 5,
	4696	product => 'mouse'
	4697	}
	4698	}
	4699
	4700	The C<key> attribute can be combined with L<C<headers>\|/headers> for C<CSV>
	4701	date that has no header line, like
	4702
	4703	my $ref = csv (
	4704	in => "foo.csv",
	4705	headers => [qw( c_foo foo bar description stock )],
	4706	key => "c_foo",
	4707	);
	4708
	4709	=head3 value
	4710
	4711	Used to create key-value hashes.
	4712
	4713	Only allowed when C<key> is valid. A C<value> can be either a single column
	4714	label or an anonymous list of column labels. In the first case, the value
	4715	will be a simple scalar value, in the latter case, it will be a hashref.
	4716
	4717	my $ref = csv (in => "test.csv", key => "code",
	4718	value => "price");
	4719	my $ref = csv (in => "test.csv", key => "code",
	4720	value => [ "product", "price" ]);
	4721	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
	4722	value => "price");
	4723	my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
	4724	value => [ "product", "price" ]);
	4725
	4726	with test.csv like
	4727
	4728	code,product,price,color
	4729	1,pc,850,gray
	4730	2,keyboard,12,white
	4731	3,mouse,5,black
	4732
	4733	the first example will return
	4734
	4735	{ 1 => 850,
	4736	2 => 12,
	4737	3 => 5,
	4738	}
	4739
	4740	the second example will return
	4741
	4742	{ 1 => {
	4743	price => 850,
	4744	product => 'pc'
	4745	},
	4746	2 => {
	4747	price => 12,
	4748	product => 'keyboard'
	4749	},
	4750	3 => {
	4751	price => 5,
	4752	product => 'mouse'
	4753	}
	4754	}
	4755
	4756	the third example will return
	4757
	4758	{ "1:gray" => 850,
	4759	"2:white" => 12,
	4760	"3:black" => 5,
	4761	}
	4762
	4763	the fourth example will return
	4764
	4765	{ "1:gray" => {
	4766	price => 850,
	4767	product => 'pc'
	4768	},
	4769	"2:white" => {
	4770	price => 12,
	4771	product => 'keyboard'
	4772	},
	4773	"3:black" => {
	4774	price => 5,
	4775	product => 'mouse'
	4776	}
	4777	}
	4778
	4779	=head3 keep_headers
	4780
	4781	When using hashes, keep the column names into the arrayref passed, so all
	4782	headers are available after the call in the original order.
	4783
	4784	my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
	4785
	4786	This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
	4787
	4788	This attribute implies a default of C<auto> for the C<headers> attribute.
	4789
	4790	=head3 fragment
	4791
	4792	Only output the fragment as defined in the L</fragment> method. This option
	4793	is ignored when I<generating> C<CSV>. See L</out>.
	4794
	4795	Combining all of them could give something like
	4796
	4797	use Text::CSV_PP qw( csv );
	4798	my $aoh = csv (
	4799	in => "test.txt",
	4800	encoding => "utf-8",
	4801	headers => "auto",
	4802	sep_char => "\|",
	4803	fragment => "row=3;6-9;15-*",
	4804	);
	4805	say $aoh->[15]{Foo};
	4806
	4807	=head3 sep_set
	4808
	4809	If C<sep_set> is set, the method L</header> is invoked on the opened stream
	4810	to detect and set L<C<sep_char>\|/sep_char> with the given set.
	4811
	4812	C<sep_set> can be abbreviated to C<seps>.
	4813
	4814	Note that as the L</header> method is invoked, its default is to also set
	4815	the headers.
	4816
	4817	=head3 set_column_names
	4818
	4819	If C<set_column_names> is passed, the method L</header> is invoked on the
	4820	opened stream with all arguments meant for L</header>.
	4821
	4822	If C<set_column_names> is passed as a false value, the content of the first
	4823	row is only preserved if the output is AoA:
	4824
	4825	With an input-file like
	4826
	4827	bAr,foo
	4828	1,2
	4829	3,4,5
	4830
	4831	This call
	4832
	4833	my $aoa = csv (in => $file, set_column_names => 0);
	4834
	4835	will result in
	4836
	4837	[[ "bar", "foo" ],
	4838	[ "1", "2" ],
	4839	[ "3", "4", "5" ]]
	4840
	4841	and
	4842
	4843	my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
	4844
	4845	will result in
	4846
	4847	[[ "bAr", "foo" ],
	4848	[ "1", "2" ],
	4849	[ "3", "4", "5" ]]
	4850
	4851	=head2 Callbacks
	4852
	4853	Callbacks enable actions triggered from the I<inside> of Text::CSV_PP.
	4854
	4855	While most of what this enables can easily be done in an unrolled loop as
	4856	described in the L</SYNOPSIS> callbacks can be used to meet special demands
	4857	or enhance the L</csv> function.
	4858
	4859	=over 2
	4860
	4861	=item error
	4862
	4863	$csv->callbacks (error => sub { $csv->SetDiag (0) });
	4864
	4865	the C<error> callback is invoked when an error occurs, but I<only> when
	4866	L</auto_diag> is set to a true value. A callback is invoked with the values
	4867	returned by L</error_diag>:
	4868
	4869	my ($c, $s);
	4870
	4871	sub ignore3006
	4872	{
	4873	my ($err, $msg, $pos, $recno, $fldno) = @_;
	4874	if ($err == 3006) {
	4875	# ignore this error
	4876	($c, $s) = (undef, undef);
	4877	Text::CSV_PP->SetDiag (0);
	4878	}
	4879	# Any other error
	4880	return;
	4881	} # ignore3006
	4882
	4883	$csv->callbacks (error => \&ignore3006);
	4884	$csv->bind_columns (\$c, \$s);
	4885	while ($csv->getline ($fh)) {
	4886	# Error 3006 will not stop the loop
	4887	}
	4888
	4889	=item after_parse
	4890
	4891	$csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
	4892	while (my $row = $csv->getline ($fh)) {
	4893	$row->[-1] eq "NEW";
	4894	}
	4895
	4896	This callback is invoked after parsing with L</getline> only if no error
	4897	occurred. The callback is invoked with two arguments: the current C<CSV>
	4898	parser object and an array reference to the fields parsed.
	4899
	4900	The return code of the callback is ignored unless it is a reference to the
	4901	string "skip", in which case the record will be skipped in L</getline_all>.
	4902
	4903	sub add_from_db
	4904	{
	4905	my ($csv, $row) = @_;
	4906	$sth->execute ($row->[4]);
	4907	push @$row, $sth->fetchrow_array;
	4908	} # add_from_db
	4909
	4910	my $aoa = csv (in => "file.csv", callbacks => {
	4911	after_parse => \&add_from_db });
	4912
	4913	This hook can be used for validation:
	4914
	4915	=over 2
	4916
	4917	=item FAIL
	4918
	4919	Die if any of the records does not validate a rule:
	4920
	4921	after_parse => sub {
	4922	$_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
	4923	die "5th field does not have a valid Dutch zipcode";
	4924	}
	4925
	4926	=item DEFAULT
	4927
	4928	Replace invalid fields with a default value:
	4929
	4930	after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
	4931
	4932	=item SKIP
	4933
	4934	Skip records that have invalid fields (only applies to L</getline_all>):
	4935
	4936	after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
	4937
	4938	=back
	4939
	4940	=item before_print
	4941
	4942	my $idx = 1;
	4943	$csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
	4944	$csv->print (*STDOUT, [ 0, $_ ]) for @members;
	4945
	4946	This callback is invoked before printing with L</print> only if no error
	4947	occurred. The callback is invoked with two arguments: the current C<CSV>
	4948	parser object and an array reference to the fields passed.
	4949
	4950	The return code of the callback is ignored.
	4951
	4952	sub max_4_fields
	4953	{
	4954	my ($csv, $row) = @_;
	4955	@$row > 4 and splice @$row, 4;
	4956	} # max_4_fields
	4957
	4958	csv (in => csv (in => "file.csv"), out => *STDOUT,
	4959	callbacks => { before print => \&max_4_fields });
	4960
	4961	This callback is not active for L</combine>.
	4962
	4963	=back
	4964
	4965	=head3 Callbacks for csv ()
	4966
	4967	The L</csv> allows for some callbacks that do not integrate in XS internals
	4968	but only feature the L</csv> function.
	4969
	4970	csv (in => "file.csv",
	4971	callbacks => {
	4972	filter => { 6 => sub { $_ > 15 } }, # first
	4973	after_parse => sub { say "AFTER PARSE"; }, # first
	4974	after_in => sub { say "AFTER IN"; }, # second
	4975	on_in => sub { say "ON IN"; }, # third
	4976	},
	4977	);
	4978
	4979	csv (in => $aoh,
	4980	out => "file.csv",
	4981	callbacks => {
	4982	on_in => sub { say "ON IN"; }, # first
	4983	before_out => sub { say "BEFORE OUT"; }, # second
	4984	before_print => sub { say "BEFORE PRINT"; }, # third
	4985	},
	4986	);
	4987
	4988	=over 2
	4989
	4990	=item filter
	4991
	4992	This callback can be used to filter records. It is called just after a new
	4993	record has been scanned. The callback accepts a:
	4994
	4995	=over 2
	4996
	4997	=item hashref
	4998
	4999	The keys are the index to the row (the field name or field number, 1-based)
	5000	and the values are subs to return a true or false value.
	5001
	5002	csv (in => "file.csv", filter => {
	5003	3 => sub { m/a/ }, # third field should contain an "a"
	5004	5 => sub { length > 4 }, # length of the 5th field minimal 5
	5005	});
	5006
	5007	csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
	5008
	5009	If the keys to the filter hash contain any character that is not a digit it
	5010	will also implicitly set L</headers> to C<"auto"> unless L</headers> was
	5011	already passed as argument. When headers are active, returning an array of
	5012	hashes, the filter is not applicable to the header itself.
	5013
	5014	All sub results should match, as in AND.
	5015
	5016	The context of the callback sets C<$_> localized to the field indicated by
	5017	the filter. The two arguments are as with all other callbacks, so the other
	5018	fields in the current row can be seen:
	5019
	5020	filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
	5021
	5022	If the context is set to return a list of hashes (L</headers> is defined),
	5023	the current record will also be available in the localized C<%_>:
	5024
	5025	filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
	5026
	5027	If the filter is used to I<alter> the content by changing C<$_>, make sure
	5028	that the sub returns true in order not to have that record skipped:
	5029
	5030	filter => { 2 => sub { $_ = uc }}
	5031
	5032	will upper-case the second field, and then skip it if the resulting content
	5033	evaluates to false. To always accept, end with truth:
	5034
	5035	filter => { 2 => sub { $_ = uc; 1 }}
	5036
	5037	=item coderef
	5038
	5039	csv (in => "file.csv", filter => sub { $n++; 0; });
	5040
	5041	If the argument to C<filter> is a coderef, it is an alias or shortcut to a
	5042	filter on column 0:
	5043
	5044	csv (filter => sub { $n++; 0 });
	5045
	5046	is equal to
	5047
	5048	csv (filter => { 0 => sub { $n++; 0 });
	5049
	5050	=item filter-name
	5051
	5052	csv (in => "file.csv", filter => "not_blank");
	5053	csv (in => "file.csv", filter => "not_empty");
	5054	csv (in => "file.csv", filter => "filled");
	5055
	5056	These are predefined filters
	5057
	5058	Given a file like (line numbers prefixed for doc purpose only):
	5059
	5060	1:1,2,3
	5061	2:
	5062	3:,
	5063	4:""
	5064	5:,,
	5065	6:, ,
	5066	7:"",
	5067	8:" "
	5068	9:4,5,6
	5069
	5070	=over 2
	5071
	5072	=item not_blank
	5073
	5074	Filter out the blank lines
	5075
	5076	This filter is a shortcut for
	5077
	5078	filter => { 0 => sub { @{$_[1]} > 1 or
	5079	defined $_[1][0] && $_[1][0] ne "" } }
	5080
	5081	Due to the implementation, it is currently impossible to also filter lines
	5082	that consists only of a quoted empty field. These lines are also considered
	5083	blank lines.
	5084
	5085	With the given example, lines 2 and 4 will be skipped.
	5086
	5087	=item not_empty
	5088
	5089	Filter out lines where all the fields are empty.
	5090
	5091	This filter is a shortcut for
	5092
	5093	filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
	5094
	5095	A space is not regarded being empty, so given the example data, lines 2, 3,
	5096	4, 5, and 7 are skipped.
	5097
	5098	=item filled
	5099
	5100	Filter out lines that have no visible data
	5101
	5102	This filter is a shortcut for
	5103
	5104	filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
	5105
	5106	This filter rejects all lines that I<not> have at least one field that does
	5107	not evaluate to the empty string.
	5108
	5109	With the given example data, this filter would skip lines 2 through 8.
	5110
	5111	=back
	5112
	5113	=back
	5114
	5115	=item after_in
	5116
	5117	This callback is invoked for each record after all records have been parsed
	5118	but before returning the reference to the caller. The hook is invoked with
	5119	two arguments: the current C<CSV> parser object and a reference to the
	5120	record. The reference can be a reference to a HASH or a reference to an
	5121	ARRAY as determined by the arguments.
	5122
	5123	This callback can also be passed as an attribute without the C<callbacks>
	5124	wrapper.
	5125
	5126	=item before_out
	5127
	5128	This callback is invoked for each record before the record is printed. The
	5129	hook is invoked with two arguments: the current C<CSV> parser object and a
	5130	reference to the record. The reference can be a reference to a HASH or a
	5131	reference to an ARRAY as determined by the arguments.
	5132
	5133	This callback can also be passed as an attribute without the C<callbacks>
	5134	wrapper.
	5135
	5136	This callback makes the row available in C<%_> if the row is a hashref. In
	5137	this case C<%_> is writable and will change the original row.
	5138
	5139	=item on_in
	5140
	5141	This callback acts exactly as the L</after_in> or the L</before_out> hooks.
	5142
	5143	This callback can also be passed as an attribute without the C<callbacks>
	5144	wrapper.
	5145
	5146	This callback makes the row available in C<%_> if the row is a hashref. In
	5147	this case C<%_> is writable and will change the original row. So e.g. with
	5148
	5149	my $aoh = csv (
	5150	in => \"foo\n1\n2\n",
	5151	headers => "auto",
	5152	on_in => sub { $_{bar} = 2; },
	5153	);
	5154
	5155	C<$aoh> will be:
	5156
	5157	[ { foo => 1,
	5158	bar => 2,
	5159	}
	5160	{ foo => 2,
	5161	bar => 2,
	5162	}
	5163	]
	5164
	5165	=item csv
	5166
	5167	The I<function> L</csv> can also be called as a method or with an existing
	5168	Text::CSV_PP object. This could help if the function is to be invoked a lot
	5169	of times and the overhead of creating the object internally over and over
	5170	again would be prevented by passing an existing instance.
	5171
	5172	my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
	5173
	5174	my $aoa = $csv->csv (in => $fh);
	5175	my $aoa = csv (in => $fh, csv => $csv);
	5176
	5177	both act the same. Running this 20000 times on a 20 lines CSV file, showed
	5178	a 53% speedup.
	5179
	5180	=back
	5181
	5182	=head1 DIAGNOSTICS
	5183
	5184	This section is also taken from Text::CSV_XS.
	5185
	5186	Still under construction ...
	5187
	5188	If an error occurs, C<< $csv->error_diag >> can be used to get information
	5189	on the cause of the failure. Note that for speed reasons the internal value
	5190	is never cleared on success, so using the value returned by L</error_diag>
	5191	in normal cases - when no error occurred - may cause unexpected results.
	5192
	5193	If the constructor failed, the cause can be found using L</error_diag> as a
	5194	class method, like C<< Text::CSV_PP->error_diag >>.
	5195
	5196	The C<< $csv->error_diag >> method is automatically invoked upon error when
	5197	the contractor was called with L<C<auto_diag>\|/auto_diag> set to C<1> or
	5198	C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
	5199	C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
	5200	EOF> is excluded from L<C<auto_diag>\|/auto_diag> reports.
	5201
	5202	Errors can be (individually) caught using the L</error> callback.
	5203
	5204	The errors as described below are available. I have tried to make the error
	5205	itself explanatory enough, but more descriptions will be added. For most of
	5206	these errors, the first three capitals describe the error category:
	5207
	5208	=over 2
	5209
	5210	=item *
	5211	INI
	5212
	5213	Initialization error or option conflict.
	5214
	5215	=item *
	5216	ECR
	5217
	5218	Carriage-Return related parse error.
	5219
	5220	=item *
	5221	EOF
	5222
	5223	End-Of-File related parse error.
	5224
	5225	=item *
	5226	EIQ
	5227
	5228	Parse error inside quotation.
	5229
	5230	=item *
	5231	EIF
	5232
	5233	Parse error inside field.
	5234
	5235	=item *
	5236	ECB
	5237
	5238	Combine error.
	5239
	5240	=item *
	5241	EHR
	5242
	5243	HashRef parse related error.
	5244
	5245	=back
	5246
	5247	And below should be the complete list of error codes that can be returned:
	5248
	5249	=over 2
	5250
	5251	=item *
	5252	1001 "INI - sep_char is equal to quote_char or escape_char"
	5253
	5254	The L<separation character\|/sep_char> cannot be equal to L<the quotation
	5255	character\|/quote_char> or to L<the escape character\|/escape_char>, as this
	5256	would invalidate all parsing rules.
	5257
	5258	=item *
	5259	1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
	5260
	5261	Using the L<C<allow_whitespace>\|/allow_whitespace> attribute when either
	5262	L<C<quote_char>\|/quote_char> or L<C<escape_char>\|/escape_char> is equal to
	5263	C<SPACE> or C<TAB> is too ambiguous to allow.
	5264
	5265	=item *
	5266	1003 "INI - \r or \n in main attr not allowed"
	5267
	5268	Using default L<C<eol>\|/eol> characters in either L<C<sep_char>\|/sep_char>,
	5269	L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> is not
	5270	allowed.
	5271
	5272	=item *
	5273	1004 "INI - callbacks should be undef or a hashref"
	5274
	5275	The L<C<callbacks>\|/Callbacks> attribute only allows one to be C<undef> or
	5276	a hash reference.
	5277
	5278	=item *
	5279	1005 "INI - EOL too long"
	5280
	5281	The value passed for EOL is exceeding its maximum length (16).
	5282
	5283	=item *
	5284	1006 "INI - SEP too long"
	5285
	5286	The value passed for SEP is exceeding its maximum length (16).
	5287
	5288	=item *
	5289	1007 "INI - QUOTE too long"
	5290
	5291	The value passed for QUOTE is exceeding its maximum length (16).
	5292
	5293	=item *
	5294	1008 "INI - SEP undefined"
	5295
	5296	The value passed for SEP should be defined and not empty.
	5297
	5298	=item *
	5299	1010 "INI - the header is empty"
	5300
	5301	The header line parsed in the L</header> is empty.
	5302
	5303	=item *
	5304	1011 "INI - the header contains more than one valid separator"
	5305
	5306	The header line parsed in the L</header> contains more than one (unique)
	5307	separator character out of the allowed set of separators.
	5308
	5309	=item *
	5310	1012 "INI - the header contains an empty field"
	5311
	5312	The header line parsed in the L</header> is contains an empty field.
	5313
	5314	=item *
	5315	1013 "INI - the header contains nun-unique fields"
	5316
	5317	The header line parsed in the L</header> contains at least two identical
	5318	fields.
	5319
	5320	=item *
	5321	1014 "INI - header called on undefined stream"
	5322
	5323	The header line cannot be parsed from an undefined sources.
	5324
	5325	=item *
	5326	1500 "PRM - Invalid/unsupported argument(s)"
	5327
	5328	Function or method called with invalid argument(s) or parameter(s).
	5329
	5330	=item *
	5331	1501 "PRM - The key attribute is passed as an unsupported type"
	5332
	5333	The C<key> attribute is of an unsupported type.
	5334
	5335	=item *
	5336	1502 "PRM - The value attribute is passed without the key attribute"
	5337
	5338	The C<value> attribute is only allowed when a valid key is given.
	5339
	5340	=item *
	5341	1503 "PRM - The value attribute is passed as an unsupported type"
	5342
	5343	The C<value> attribute is of an unsupported type.
	5344
	5345	=item *
	5346	2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
	5347
	5348	When L<C<eol>\|/eol> has been set to anything but the default, like
	5349	C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
	5350	L<C<quote_char>\|/quote_char>, where the characters following the C<"\r"> do
	5351	not make up the L<C<eol>\|/eol> sequence, this is an error.
	5352
	5353	=item *
	5354	2011 "ECR - Characters after end of quoted field"
	5355
	5356	Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
	5357	field and after the closing double-quote, there should be either a new-line
	5358	sequence or a separation character.
	5359
	5360	=item *
	5361	2012 "EOF - End of data in parsing input stream"
	5362
	5363	Self-explaining. End-of-file while inside parsing a stream. Can happen only
	5364	when reading from streams with L</getline>, as using L</parse> is done on
	5365	strings that are not required to have a trailing L<C<eol>\|/eol>.
	5366
	5367	=item *
	5368	2013 "INI - Specification error for fragments RFC7111"
	5369
	5370	Invalid specification for URI L</fragment> specification.
	5371
	5372	=item *
	5373	2014 "ENF - Inconsistent number of fields"
	5374
	5375	Inconsistent number of fields under strict parsing.
	5376
	5377	=item *
	5378	2021 "EIQ - NL char inside quotes, binary off"
	5379
	5380	Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
	5381	has been selected with the constructor.
	5382
	5383	=item *
	5384	2022 "EIQ - CR char inside quotes, binary off"
	5385
	5386	Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
	5387	has been selected with the constructor.
	5388
	5389	=item *
	5390	2023 "EIQ - QUO character not allowed"
	5391
	5392	Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
	5393	will cause this error.
	5394
	5395	=item *
	5396	2024 "EIQ - EOF cannot be escaped, not even inside quotes"
	5397
	5398	The escape character is not allowed as last character in an input stream.
	5399
	5400	=item *
	5401	2025 "EIQ - Loose unescaped escape"
	5402
	5403	An escape character should escape only characters that need escaping.
	5404
	5405	Allowing the escape for other characters is possible with the attribute
	5406	L</allow_loose_escape>.
	5407
	5408	=item *
	5409	2026 "EIQ - Binary character inside quoted field, binary off"
	5410
	5411	Binary characters are not allowed by default. Exceptions are fields that
	5412	contain valid UTF-8, that will automatically be upgraded if the content is
	5413	valid UTF-8. Set L<C<binary>\|/binary> to C<1> to accept binary data.
	5414
	5415	=item *
	5416	2027 "EIQ - Quoted field not terminated"
	5417
	5418	When parsing a field that started with a quotation character, the field is
	5419	expected to be closed with a quotation character. When the parsed line is
	5420	exhausted before the quote is found, that field is not terminated.
	5421
	5422	=item *
	5423	2030 "EIF - NL char inside unquoted verbatim, binary off"
	5424
	5425	=item *
	5426	2031 "EIF - CR char is first char of field, not part of EOL"
	5427
	5428	=item *
	5429	2032 "EIF - CR char inside unquoted, not part of EOL"
	5430
	5431	=item *
	5432	2034 "EIF - Loose unescaped quote"
	5433
	5434	=item *
	5435	2035 "EIF - Escaped EOF in unquoted field"
	5436
	5437	=item *
	5438	2036 "EIF - ESC error"
	5439
	5440	=item *
	5441	2037 "EIF - Binary character in unquoted field, binary off"
	5442
	5443	=item *
	5444	2110 "ECB - Binary character in Combine, binary off"
	5445
	5446	=item *
	5447	2200 "EIO - print to IO failed. See errno"
	5448
	5449	=item *
	5450	3001 "EHR - Unsupported syntax for column_names ()"
	5451
	5452	=item *
	5453	3002 "EHR - getline_hr () called before column_names ()"
	5454
	5455	=item *
	5456	3003 "EHR - bind_columns () and column_names () fields count mismatch"
	5457
	5458	=item *
	5459	3004 "EHR - bind_columns () only accepts refs to scalars"
	5460
	5461	=item *
	5462	3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
	5463
	5464	=item *
	5465	3007 "EHR - bind_columns needs refs to writable scalars"
	5466
	5467	=item *
	5468	3008 "EHR - unexpected error in bound fields"
	5469
	5470	=item *
	5471	3009 "EHR - print_hr () called before column_names ()"
	5472
	5473	=item *
	5474	3010 "EHR - print_hr () called with invalid arguments"
	5475
	5476	=back
	5477
	5478	=head1 SEE ALSO
	5479
	5480	L<Text::CSV_XS>, L<Text::CSV>
	5481
	5482	Older versions took many regexp from L<http://www.din.or.jp/~ohzaki/perl.htm>
	5483
	5484	=head1 AUTHOR
	5485
	5486	Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
	5487	Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
	5488
	5489	Text::CSV_XS was written by E<lt>joe[at]ispsoft.deE<gt>
	5490	and maintained by E<lt>h.m.brand[at]xs4all.nlE<gt>.
	5491
	5492	Text::CSV was written by E<lt>alan[at]mfgrtl.comE<gt>.
	5493
	5494	=head1 COPYRIGHT AND LICENSE
	5495
	5496	Copyright 2017- by Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
	5497	Copyright 2005-2015 by Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
	5498
	5499	Most of the code and doc is directly taken from the pure perl part of
	5500	Text::CSV_XS.
	5501
	5502	Copyright (C) 2007-2016 H.Merijn Brand. All rights reserved.
	5503	Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
	5504	Copyright (C) 1997 Alan Citterman. All rights reserved.
	5505
	5506	This library is free software; you can redistribute it and/or modify
	5507	it under the same terms as Perl itself.
	5508
	5509	=cut

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/perllib/cpan/Text/CSV_PP.pm@ 33235

Download in other formats: