1 | package open;
|
---|
2 | use warnings;
|
---|
3 | use Carp;
|
---|
4 | $open::hint_bits = 0x20000; # HINT_LOCALIZE_HH
|
---|
5 |
|
---|
6 | our $VERSION = '1.05';
|
---|
7 |
|
---|
8 | require 5.008001; # for PerlIO::get_layers()
|
---|
9 |
|
---|
10 | my $locale_encoding;
|
---|
11 |
|
---|
12 | sub _get_encname {
|
---|
13 | return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/;
|
---|
14 | return;
|
---|
15 | }
|
---|
16 |
|
---|
17 | sub _drop_oldenc {
|
---|
18 | # If by the time we arrive here there already is at the top of the
|
---|
19 | # perlio layer stack an encoding identical to what we would like
|
---|
20 | # to push via this open pragma, we will pop away the old encoding
|
---|
21 | # (+utf8) so that we can push ourselves in place (this is easier
|
---|
22 | # than ignoring pushing ourselves because of the way how ${^OPEN}
|
---|
23 | # works). So we are looking for something like
|
---|
24 | #
|
---|
25 | # stdio encoding(xxx) utf8
|
---|
26 | #
|
---|
27 | # in the existing layer stack, and in the new stack chunk for
|
---|
28 | #
|
---|
29 | # :encoding(xxx)
|
---|
30 | #
|
---|
31 | # If we find a match, we pop the old stack (once, since
|
---|
32 | # the utf8 is just a flag on the encoding layer)
|
---|
33 | my ($h, @new) = @_;
|
---|
34 | return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/;
|
---|
35 | my @old = PerlIO::get_layers($h);
|
---|
36 | return unless @old >= 3 &&
|
---|
37 | $old[-1] eq 'utf8' &&
|
---|
38 | $old[-2] =~ /^encoding\(.+\)$/;
|
---|
39 | require Encode;
|
---|
40 | my ($loname, $lcname) = _get_encname($old[-2]);
|
---|
41 | unless (defined $lcname) { # Should we trust get_layers()?
|
---|
42 | require Carp;
|
---|
43 | Carp::croak("open: Unknown encoding '$loname'");
|
---|
44 | }
|
---|
45 | my ($voname, $vcname) = _get_encname($new[-1]);
|
---|
46 | unless (defined $vcname) {
|
---|
47 | require Carp;
|
---|
48 | Carp::croak("open: Unknown encoding '$voname'");
|
---|
49 | }
|
---|
50 | if ($lcname eq $vcname) {
|
---|
51 | binmode($h, ":pop"); # utf8 is part of the encoding layer
|
---|
52 | }
|
---|
53 | }
|
---|
54 |
|
---|
55 | sub import {
|
---|
56 | my ($class,@args) = @_;
|
---|
57 | croak("open: needs explicit list of PerlIO layers") unless @args;
|
---|
58 | my $std;
|
---|
59 | $^H |= $open::hint_bits;
|
---|
60 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
|
---|
61 | while (@args) {
|
---|
62 | my $type = shift(@args);
|
---|
63 | my $dscp;
|
---|
64 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
|
---|
65 | $type = 'IO';
|
---|
66 | $dscp = ":$1";
|
---|
67 | } elsif ($type eq ':std') {
|
---|
68 | $std = 1;
|
---|
69 | next;
|
---|
70 | } else {
|
---|
71 | $dscp = shift(@args) || '';
|
---|
72 | }
|
---|
73 | my @val;
|
---|
74 | foreach my $layer (split(/\s+/,$dscp)) {
|
---|
75 | $layer =~ s/^://;
|
---|
76 | if ($layer eq 'locale') {
|
---|
77 | require Encode;
|
---|
78 | require encoding;
|
---|
79 | $locale_encoding = encoding::_get_locale_encoding()
|
---|
80 | unless defined $locale_encoding;
|
---|
81 | (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
|
---|
82 | unless defined $locale_encoding;
|
---|
83 | if ($locale_encoding =~ /^utf-?8$/i) {
|
---|
84 | $layer = "utf8";
|
---|
85 | } else {
|
---|
86 | $layer = "encoding($locale_encoding)";
|
---|
87 | }
|
---|
88 | $std = 1;
|
---|
89 | } else {
|
---|
90 | my $target = $layer; # the layer name itself
|
---|
91 | $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
|
---|
92 |
|
---|
93 | unless(PerlIO::Layer::->find($target,1)) {
|
---|
94 | warnings::warnif("layer", "Unknown PerlIO layer '$target'");
|
---|
95 | }
|
---|
96 | }
|
---|
97 | push(@val,":$layer");
|
---|
98 | if ($layer =~ /^(crlf|raw)$/) {
|
---|
99 | $^H{"open_$type"} = $layer;
|
---|
100 | }
|
---|
101 | }
|
---|
102 | if ($type eq 'IN') {
|
---|
103 | _drop_oldenc(*STDIN, @val);
|
---|
104 | $in = join(' ', @val);
|
---|
105 | }
|
---|
106 | elsif ($type eq 'OUT') {
|
---|
107 | _drop_oldenc(*STDOUT, @val);
|
---|
108 | $out = join(' ', @val);
|
---|
109 | }
|
---|
110 | elsif ($type eq 'IO') {
|
---|
111 | _drop_oldenc(*STDIN, @val);
|
---|
112 | _drop_oldenc(*STDOUT, @val);
|
---|
113 | $in = $out = join(' ', @val);
|
---|
114 | }
|
---|
115 | else {
|
---|
116 | croak "Unknown PerlIO layer class '$type'";
|
---|
117 | }
|
---|
118 | }
|
---|
119 | ${^OPEN} = join("\0", $in, $out);
|
---|
120 | if ($std) {
|
---|
121 | if ($in) {
|
---|
122 | if ($in =~ /:utf8\b/) {
|
---|
123 | binmode(STDIN, ":utf8");
|
---|
124 | } elsif ($in =~ /(\w+\(.+\))/) {
|
---|
125 | binmode(STDIN, ":$1");
|
---|
126 | }
|
---|
127 | }
|
---|
128 | if ($out) {
|
---|
129 | if ($out =~ /:utf8\b/) {
|
---|
130 | binmode(STDOUT, ":utf8");
|
---|
131 | binmode(STDERR, ":utf8");
|
---|
132 | } elsif ($out =~ /(\w+\(.+\))/) {
|
---|
133 | binmode(STDOUT, ":$1");
|
---|
134 | binmode(STDERR, ":$1");
|
---|
135 | }
|
---|
136 | }
|
---|
137 | }
|
---|
138 | }
|
---|
139 |
|
---|
140 | 1;
|
---|
141 | __END__
|
---|
142 |
|
---|
143 | =head1 NAME
|
---|
144 |
|
---|
145 | open - perl pragma to set default PerlIO layers for input and output
|
---|
146 |
|
---|
147 | =head1 SYNOPSIS
|
---|
148 |
|
---|
149 | use open IN => ":crlf", OUT => ":bytes";
|
---|
150 | use open OUT => ':utf8';
|
---|
151 | use open IO => ":encoding(iso-8859-7)";
|
---|
152 |
|
---|
153 | use open IO => ':locale';
|
---|
154 |
|
---|
155 | use open ':utf8';
|
---|
156 | use open ':locale';
|
---|
157 | use open ':encoding(iso-8859-7)';
|
---|
158 |
|
---|
159 | use open ':std';
|
---|
160 |
|
---|
161 | =head1 DESCRIPTION
|
---|
162 |
|
---|
163 | Full-fledged support for I/O layers is now implemented provided
|
---|
164 | Perl is configured to use PerlIO as its IO system (which is now the
|
---|
165 | default).
|
---|
166 |
|
---|
167 | The C<open> pragma serves as one of the interfaces to declare default
|
---|
168 | "layers" (also known as "disciplines") for all I/O. Any two-argument
|
---|
169 | open(), readpipe() (aka qx//) and similar operators found within the
|
---|
170 | lexical scope of this pragma will use the declared defaults.
|
---|
171 | Even three-argument opens may be affected by this pragma
|
---|
172 | when they don't specify IO layers in MODE.
|
---|
173 |
|
---|
174 | With the C<IN> subpragma you can declare the default layers
|
---|
175 | of input streams, and with the C<OUT> subpragma you can declare
|
---|
176 | the default layers of output streams. With the C<IO> subpragma
|
---|
177 | you can control both input and output streams simultaneously.
|
---|
178 |
|
---|
179 | If you have a legacy encoding, you can use the C<:encoding(...)> tag.
|
---|
180 |
|
---|
181 | If you want to set your encoding layers based on your
|
---|
182 | locale environment variables, you can use the C<:locale> tag.
|
---|
183 | For example:
|
---|
184 |
|
---|
185 | $ENV{LANG} = 'ru_RU.KOI8-R';
|
---|
186 | # the :locale will probe the locale environment variables like LANG
|
---|
187 | use open OUT => ':locale';
|
---|
188 | open(O, ">koi8");
|
---|
189 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
|
---|
190 | close O;
|
---|
191 | open(I, "<koi8");
|
---|
192 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
|
---|
193 | close I;
|
---|
194 |
|
---|
195 | These are equivalent
|
---|
196 |
|
---|
197 | use open ':utf8';
|
---|
198 | use open IO => ':utf8';
|
---|
199 |
|
---|
200 | as are these
|
---|
201 |
|
---|
202 | use open ':locale';
|
---|
203 | use open IO => ':locale';
|
---|
204 |
|
---|
205 | and these
|
---|
206 |
|
---|
207 | use open ':encoding(iso-8859-7)';
|
---|
208 | use open IO => ':encoding(iso-8859-7)';
|
---|
209 |
|
---|
210 | The matching of encoding names is loose: case does not matter, and
|
---|
211 | many encodings have several aliases. See L<Encode::Supported> for
|
---|
212 | details and the list of supported locales.
|
---|
213 |
|
---|
214 | Note that C<:utf8> PerlIO layer must always be specified exactly like
|
---|
215 | that, it is not subject to the loose matching of encoding names.
|
---|
216 |
|
---|
217 | When open() is given an explicit list of layers (with the three-arg
|
---|
218 | syntax), they override the list declared using this pragma.
|
---|
219 |
|
---|
220 | The C<:std> subpragma on its own has no effect, but if combined with
|
---|
221 | the C<:utf8> or C<:encoding> subpragmas, it converts the standard
|
---|
222 | filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
|
---|
223 | for input/output handles. For example, if both input and out are
|
---|
224 | chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
|
---|
225 | STDERR are also in C<:utf8>. On the other hand, if only output is
|
---|
226 | chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
|
---|
227 | STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
|
---|
228 | implicitly turns on C<:std>.
|
---|
229 |
|
---|
230 | The logic of C<:locale> is described in full in L<encoding>,
|
---|
231 | but in short it is first trying nl_langinfo(CODESET) and then
|
---|
232 | guessing from the LC_ALL and LANG locale environment variables.
|
---|
233 |
|
---|
234 | Directory handles may also support PerlIO layers in the future.
|
---|
235 |
|
---|
236 | =head1 NONPERLIO FUNCTIONALITY
|
---|
237 |
|
---|
238 | If Perl is not built to use PerlIO as its IO system then only the two
|
---|
239 | pseudo-layers C<:bytes> and C<:crlf> are available.
|
---|
240 |
|
---|
241 | The C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
|
---|
242 | layer corresponds to "text mode" on platforms that distinguish
|
---|
243 | between the two modes when opening files (which is many DOS-like
|
---|
244 | platforms, including Windows). These two layers are no-ops on
|
---|
245 | platforms where binmode() is a no-op, but perform their functions
|
---|
246 | everywhere if PerlIO is enabled.
|
---|
247 |
|
---|
248 | =head1 IMPLEMENTATION DETAILS
|
---|
249 |
|
---|
250 | There is a class method in C<PerlIO::Layer> C<find> which is
|
---|
251 | implemented as XS code. It is called by C<import> to validate the
|
---|
252 | layers:
|
---|
253 |
|
---|
254 | PerlIO::Layer::->find("perlio")
|
---|
255 |
|
---|
256 | The return value (if defined) is a Perl object, of class
|
---|
257 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
|
---|
258 | yet there is nothing useful you can do with the object at the perl
|
---|
259 | level.
|
---|
260 |
|
---|
261 | =head1 SEE ALSO
|
---|
262 |
|
---|
263 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
|
---|
264 | L<encoding>
|
---|
265 |
|
---|
266 | =cut
|
---|