1 | @rem = '--*-Perl-*--
|
---|
2 | @echo off
|
---|
3 | if "%OS%" == "Windows_NT" goto WinNT
|
---|
4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
|
---|
5 | goto endofperl
|
---|
6 | :WinNT
|
---|
7 | perl -x -S %0 %*
|
---|
8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
|
---|
9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
|
---|
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
|
---|
11 | goto endofperl
|
---|
12 | @rem ';
|
---|
13 | #!perl
|
---|
14 | #line 15
|
---|
15 | eval 'exec c:\shaoqunWu\perl\bin\perl.exe -S $0 ${1+"$@"}'
|
---|
16 | if $running_under_some_shell;
|
---|
17 | #!./perl
|
---|
18 | # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
|
---|
19 | #
|
---|
20 | use 5.8.0;
|
---|
21 | use strict;
|
---|
22 | use Encode ;
|
---|
23 | use Encode::Alias;
|
---|
24 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
|
---|
25 |
|
---|
26 | use File::Basename;
|
---|
27 | my $name = basename($0);
|
---|
28 |
|
---|
29 | use Getopt::Long qw(:config no_ignore_case);
|
---|
30 |
|
---|
31 | my %Opt;
|
---|
32 |
|
---|
33 | help()
|
---|
34 | unless
|
---|
35 | GetOptions(\%Opt,
|
---|
36 | 'from|f=s',
|
---|
37 | 'to|t=s',
|
---|
38 | 'list|l',
|
---|
39 | 'string|s=s',
|
---|
40 | 'check|C=i',
|
---|
41 | 'c',
|
---|
42 | 'perlqq|p',
|
---|
43 | 'debug|D',
|
---|
44 | 'scheme|S=s',
|
---|
45 | 'resolve|r=s',
|
---|
46 | 'help',
|
---|
47 | );
|
---|
48 |
|
---|
49 | $Opt{help} and help();
|
---|
50 | $Opt{list} and list_encodings();
|
---|
51 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
|
---|
52 | defined $Opt{resolve} and resolve_encoding($Opt{resolve});
|
---|
53 | $Opt{from} || $Opt{to} || help();
|
---|
54 | my $from = $Opt{from} || $locale or help("from_encoding unspecified");
|
---|
55 | my $to = $Opt{to} || $locale or help("to_encoding unspecified");
|
---|
56 | $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
|
---|
57 | my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
|
---|
58 | $Opt{check} ||= $Opt{c};
|
---|
59 | $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
|
---|
60 |
|
---|
61 | if ($Opt{debug}){
|
---|
62 | my $cfrom = Encode->getEncoding($from)->name;
|
---|
63 | my $cto = Encode->getEncoding($to)->name;
|
---|
64 | print <<"EOT";
|
---|
65 | Scheme: $scheme
|
---|
66 | From: $from => $cfrom
|
---|
67 | To: $to => $cto
|
---|
68 | EOT
|
---|
69 | }
|
---|
70 |
|
---|
71 | # we do not use <> (or ARGV) for the sake of binmode()
|
---|
72 | @ARGV or push @ARGV, \*STDIN;
|
---|
73 |
|
---|
74 | unless ($scheme eq 'perlio'){
|
---|
75 | binmode STDOUT;
|
---|
76 | for my $argv (@ARGV){
|
---|
77 | my $ifh = ref $argv ? $argv : undef;
|
---|
78 | $ifh or open $ifh, "<", $argv or next;
|
---|
79 | binmode $ifh;
|
---|
80 | if ($scheme eq 'from_to'){ # default
|
---|
81 | while(<$ifh>){
|
---|
82 | Encode::from_to($_, $from, $to, $Opt{check});
|
---|
83 | print;
|
---|
84 | }
|
---|
85 | }elsif ($scheme eq 'decode_encode'){ # step-by-step
|
---|
86 | while(<$ifh>){
|
---|
87 | my $decoded = decode($from, $_, $Opt{check});
|
---|
88 | my $encoded = encode($to, $decoded);
|
---|
89 | print $encoded;
|
---|
90 | }
|
---|
91 | } else { # won't reach
|
---|
92 | die "$name: unknown scheme: $scheme";
|
---|
93 | }
|
---|
94 | }
|
---|
95 | }else{
|
---|
96 | # NI-S favorite
|
---|
97 | binmode STDOUT => "raw:encoding($to)";
|
---|
98 | for my $argv (@ARGV){
|
---|
99 | my $ifh = ref $argv ? $argv : undef;
|
---|
100 | $ifh or open $ifh, "<", $argv or next;
|
---|
101 | binmode $ifh => "raw:encoding($from)";
|
---|
102 | print while(<$ifh>);
|
---|
103 | }
|
---|
104 | }
|
---|
105 |
|
---|
106 | sub list_encodings{
|
---|
107 | print join("\n", Encode->encodings(":all")), "\n";
|
---|
108 | exit 0;
|
---|
109 | }
|
---|
110 |
|
---|
111 | sub resolve_encoding {
|
---|
112 | if (my $alias = Encode::resolve_alias($_[0])) {
|
---|
113 | print $alias, "\n";
|
---|
114 | exit 0;
|
---|
115 | } else {
|
---|
116 | warn "$name: $_[0] is not known to Encode\n";
|
---|
117 | exit 1;
|
---|
118 | }
|
---|
119 | }
|
---|
120 |
|
---|
121 | sub help{
|
---|
122 | my $message = shift;
|
---|
123 | $message and print STDERR "$name error: $message\n";
|
---|
124 | print STDERR <<"EOT";
|
---|
125 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
|
---|
126 | $name -l
|
---|
127 | $name -r encoding_alias
|
---|
128 | -l,--list
|
---|
129 | lists all available encodings
|
---|
130 | -r,--resolve encoding_alias
|
---|
131 | resolve encoding to its (Encode) canonical name
|
---|
132 | -f,--from from_encoding
|
---|
133 | when omitted, the current locale will be used
|
---|
134 | -t,--to to_encoding
|
---|
135 | when omitted, the current locale will be used
|
---|
136 | -s,--string string
|
---|
137 | "string" will be the input instead of STDIN or files
|
---|
138 | The following are mainly of interest to Encode hackers:
|
---|
139 | -D,--debug show debug information
|
---|
140 | -C N | -c | -p check the validity of the input
|
---|
141 | -S,--scheme scheme use the scheme for conversion
|
---|
142 | EOT
|
---|
143 | exit;
|
---|
144 | }
|
---|
145 |
|
---|
146 | __END__
|
---|
147 |
|
---|
148 | =head1 NAME
|
---|
149 |
|
---|
150 | piconv -- iconv(1), reinvented in perl
|
---|
151 |
|
---|
152 | =head1 SYNOPSIS
|
---|
153 |
|
---|
154 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
|
---|
155 | piconv -l
|
---|
156 | piconv [-C N|-c|-p]
|
---|
157 | piconv -S scheme ...
|
---|
158 | piconv -r encoding
|
---|
159 | piconv -D ...
|
---|
160 | piconv -h
|
---|
161 |
|
---|
162 | =head1 DESCRIPTION
|
---|
163 |
|
---|
164 | B<piconv> is perl version of B<iconv>, a character encoding converter
|
---|
165 | widely available for various Unixen today. This script was primarily
|
---|
166 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the
|
---|
167 | place of iconv for virtually any case.
|
---|
168 |
|
---|
169 | piconv converts the character encoding of either STDIN or files
|
---|
170 | specified in the argument and prints out to STDOUT.
|
---|
171 |
|
---|
172 | Here is the list of options. Each option can be in short format (-f)
|
---|
173 | or long (--from).
|
---|
174 |
|
---|
175 | =over 4
|
---|
176 |
|
---|
177 | =item -f,--from from_encoding
|
---|
178 |
|
---|
179 | Specifies the encoding you are converting from. Unlike B<iconv>,
|
---|
180 | this option can be omitted. In such cases, the current locale is used.
|
---|
181 |
|
---|
182 | =item -t,--to to_encoding
|
---|
183 |
|
---|
184 | Specifies the encoding you are converting to. Unlike B<iconv>,
|
---|
185 | this option can be omitted. In such cases, the current locale is used.
|
---|
186 |
|
---|
187 | Therefore, when both -f and -t are omitted, B<piconv> just acts
|
---|
188 | like B<cat>.
|
---|
189 |
|
---|
190 | =item -s,--string I<string>
|
---|
191 |
|
---|
192 | uses I<string> instead of file for the source of text.
|
---|
193 |
|
---|
194 | =item -l,--list
|
---|
195 |
|
---|
196 | Lists all available encodings, one per line, in case-insensitive
|
---|
197 | order. Note that only the canonical names are listed; many aliases
|
---|
198 | exist. For example, the names are case-insensitive, and many standard
|
---|
199 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
|
---|
200 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
|
---|
201 | for a full discussion.
|
---|
202 |
|
---|
203 | =item -C,--check I<N>
|
---|
204 |
|
---|
205 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something
|
---|
206 | interesting happens when it encounters an invalid character.
|
---|
207 |
|
---|
208 | =item -c
|
---|
209 |
|
---|
210 | Same as C<-C 1>.
|
---|
211 |
|
---|
212 | =item -p,--perlqq
|
---|
213 |
|
---|
214 | Same as C<-C -1>.
|
---|
215 |
|
---|
216 | =item -h,--help
|
---|
217 |
|
---|
218 | Show usage.
|
---|
219 |
|
---|
220 | =item -D,--debug
|
---|
221 |
|
---|
222 | Invokes debugging mode. Primarily for Encode hackers.
|
---|
223 |
|
---|
224 | =item -S,--scheme scheme
|
---|
225 |
|
---|
226 | Selects which scheme is to be used for conversion. Available schemes
|
---|
227 | are as follows:
|
---|
228 |
|
---|
229 | =over 4
|
---|
230 |
|
---|
231 | =item from_to
|
---|
232 |
|
---|
233 | Uses Encode::from_to for conversion. This is the default.
|
---|
234 |
|
---|
235 | =item decode_encode
|
---|
236 |
|
---|
237 | Input strings are decode()d then encode()d. A straight two-step
|
---|
238 | implementation.
|
---|
239 |
|
---|
240 | =item perlio
|
---|
241 |
|
---|
242 | The new perlIO layer is used. NI-S' favorite.
|
---|
243 |
|
---|
244 | =back
|
---|
245 |
|
---|
246 | Like the I<-D> option, this is also for Encode hackers.
|
---|
247 |
|
---|
248 | =back
|
---|
249 |
|
---|
250 | =head1 SEE ALSO
|
---|
251 |
|
---|
252 | L<iconv/1>
|
---|
253 | L<locale/3>
|
---|
254 | L<Encode>
|
---|
255 | L<Encode::Supported>
|
---|
256 | L<Encode::Alias>
|
---|
257 | L<PerlIO>
|
---|
258 |
|
---|
259 | =cut
|
---|
260 |
|
---|
261 | __END__
|
---|
262 | :endofperl
|
---|