source: other-projects/trunk/realistic-books/bin/windows/perl/bin/enc2xs.bat@ 19631

Last change on this file since 19631 was 19631, checked in by davidb, 15 years ago

addition of bin directory

  • Property svn:executable set to *
File size: 37.6 KB
Line 
1@rem = '--*-Perl-*--
2@echo off
3if "%OS%" == "Windows_NT" goto WinNT
4perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
5goto endofperl
6:WinNT
7perl -x -S %0 %*
8if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
9if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11goto endofperl
12@rem ';
13#!perl
14#line 15
15 eval 'exec c:\shaoqunWu\perl\bin\perl.exe -S $0 ${1+"$@"}'
16 if $running_under_some_shell;
17#!./perl
18BEGIN {
19 # @INC poking no longer needed w/ new MakeMaker and Makefile.PL's
20 # with $ENV{PERL_CORE} set
21 # In case we need it in future...
22 require Config; import Config;
23}
24use strict;
25use warnings;
26use Getopt::Std;
27my @orig_ARGV = @ARGV;
28our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
29
30# These may get re-ordered.
31# RAW is a do_now as inserted by &enter
32# AGG is an aggreagated do_now, as built up by &process
33
34use constant {
35 RAW_NEXT => 0,
36 RAW_IN_LEN => 1,
37 RAW_OUT_BYTES => 2,
38 RAW_FALLBACK => 3,
39
40 AGG_MIN_IN => 0,
41 AGG_MAX_IN => 1,
42 AGG_OUT_BYTES => 2,
43 AGG_NEXT => 3,
44 AGG_IN_LEN => 4,
45 AGG_OUT_LEN => 5,
46 AGG_FALLBACK => 6,
47};
48
49# (See the algorithm in encengine.c - we're building structures for it)
50
51# There are two sorts of structures.
52# "do_now" (an array, two variants of what needs storing) is whatever we need
53# to do now we've read an input byte.
54# It's housed in a "do_next" (which is how we got to it), and in turn points
55# to a "do_next" which contains all the "do_now"s for the next input byte.
56
57# There will be a "do_next" which is the start state.
58# For a single byte encoding it's the only "do_next" - each "do_now" points
59# back to it, and each "do_now" will cause bytes. There is no state.
60
61# For a multi-byte encoding where all characters in the input are the same
62# length, then there will be a tree of "do_now"->"do_next"->"do_now"
63# branching out from the start state, one step for each input byte.
64# The leaf "do_now"s will all be at the same distance from the start state,
65# only the leaf "do_now"s cause output bytes, and they in turn point back to
66# the start state.
67
68# For an encoding where there are varaible length input byte sequences, you
69# will encounter a leaf "do_now" sooner for the shorter input sequences, but
70# as before the leaves will point back to the start state.
71
72# The system will cope with escape encodings (imagine them as a mostly
73# self-contained tree for each escape state, and cross links between trees
74# at the state-switching characters) but so far no input format defines these.
75
76# The system will also cope with having output "leaves" in the middle of
77# the bifurcating branches, not just at the extremities, but again no
78# input format does this yet.
79
80# There are two variants of the "do_now" structure. The first, smaller variant
81# is generated by &enter as the input file is read. There is one structure
82# for each input byte. Say we are mapping a single byte encoding to a
83# single byte encoding, with "ABCD" going "abcd". There will be
84# 4 "do_now"s, {"A" => [...,"a",...], "B" => [...,"b",...], "C"=>..., "D"=>...}
85
86# &process then walks the tree, building aggregate "do_now" structres for
87# adjacent bytes where possible. The aggregate is for a contiguous range of
88# bytes which each produce the same length of output, each move to the
89# same next state, and each have the same fallback flag.
90# So our 4 RAW "do_now"s above become replaced by a single structure
91# containing:
92# ["A", "D", "abcd", 1, ...]
93# ie, for an input byte $_ in "A".."D", output 1 byte, found as
94# substr ("abcd", (ord $_ - ord "A") * 1, 1)
95# which maps very nicely into pointer arithmetic in C for encengine.c
96
97sub encode_U
98{
99 # UTF-8 encode long hand - only covers part of perl's range
100 ## my $uv = shift;
101 # chr() works in native space so convert value from table
102 # into that space before using chr().
103 my $ch = chr(utf8::unicode_to_native($_[0]));
104 # Now get core perl to encode that the way it likes.
105 utf8::encode($ch);
106 return $ch;
107}
108
109sub encode_S
110{
111 # encode single byte
112 ## my ($ch,$page) = @_; return chr($ch);
113 return chr $_[0];
114}
115
116sub encode_D
117{
118 # encode double byte MS byte first
119 ## my ($ch,$page) = @_; return chr($page).chr($ch);
120 return chr ($_[1]) . chr $_[0];
121}
122
123sub encode_M
124{
125 # encode Multi-byte - single for 0..255 otherwise double
126 ## my ($ch,$page) = @_;
127 ## return &encode_D if $page;
128 ## return &encode_S;
129 return chr ($_[1]) . chr $_[0] if $_[1];
130 return chr $_[0];
131}
132
133my %encode_types = (U => \&encode_U,
134 S => \&encode_S,
135 D => \&encode_D,
136 M => \&encode_M,
137 );
138
139# Win32 does not expand globs on command line
140eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32');
141
142my %opt;
143# I think these are:
144# -Q to disable the duplicate codepoint test
145# -S make mapping errors fatal
146# -q to remove comments written to output files
147# -O to enable the (brute force) substring optimiser
148# -o <output> to specify the output file name (else it's the first arg)
149# -f <inlist> to give a file with a list of input files (else use the args)
150# -n <name> to name the encoding (else use the basename of the input file.
151getopts('CM:SQqOo:f:n:',\%opt);
152
153$opt{M} and make_makefile_pl($opt{M}, @ARGV);
154$opt{C} and make_configlocal_pm($opt{C}, @ARGV);
155
156# This really should go first, else the die here causes empty (non-erroneous)
157# output files to be written.
158my @encfiles;
159if (exists $opt{'f'}) {
160 # -F is followed by name of file containing list of filenames
161 my $flist = $opt{'f'};
162 open(FLIST,$flist) || die "Cannot open $flist:$!";
163 chomp(@encfiles = <FLIST>);
164 close(FLIST);
165} else {
166 @encfiles = @ARGV;
167}
168
169my $cname = (exists $opt{'o'}) ? $opt{'o'} : shift(@ARGV);
170chmod(0666,$cname) if -f $cname && !-w $cname;
171open(C,">$cname") || die "Cannot open $cname:$!";
172
173my $dname = $cname;
174my $hname = $cname;
175
176my ($doC,$doEnc,$doUcm,$doPet);
177
178if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARSE_STYLE defined
179 {
180 $doC = 1;
181 $dname =~ s/(\.[^\.]*)?$/.exh/;
182 chmod(0666,$dname) if -f $cname && !-w $dname;
183 open(D,">$dname") || die "Cannot open $dname:$!";
184 $hname =~ s/(\.[^\.]*)?$/.h/;
185 chmod(0666,$hname) if -f $cname && !-w $hname;
186 open(H,">$hname") || die "Cannot open $hname:$!";
187
188 foreach my $fh (\*C,\*D,\*H)
189 {
190 print $fh <<"END" unless $opt{'q'};
191/*
192 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
193 This file was autogenerated by:
194 $^X $0 @orig_ARGV
195*/
196END
197 }
198
199 if ($cname =~ /(\w+)\.xs$/)
200 {
201 print C "#include <EXTERN.h>\n";
202 print C "#include <perl.h>\n";
203 print C "#include <XSUB.h>\n";
204 print C "#define U8 U8\n";
205 }
206 print C "#include \"encode.h\"\n\n";
207
208 }
209elsif ($cname =~ /\.enc$/)
210 {
211 $doEnc = 1;
212 }
213elsif ($cname =~ /\.ucm$/)
214 {
215 $doUcm = 1;
216 }
217elsif ($cname =~ /\.pet$/)
218 {
219 $doPet = 1;
220 }
221
222my %encoding;
223my %strings;
224my $string_acc;
225my %strings_in_acc;
226
227my $saved = 0;
228my $subsave = 0;
229my $strings = 0;
230
231sub cmp_name
232{
233 if ($a =~ /^.*-(\d+)/)
234 {
235 my $an = $1;
236 if ($b =~ /^.*-(\d+)/)
237 {
238 my $r = $an <=> $1;
239 return $r if $r;
240 }
241 }
242 return $a cmp $b;
243}
244
245
246foreach my $enc (sort cmp_name @encfiles)
247 {
248 my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/;
249 $name = $opt{'n'} if exists $opt{'n'};
250 if (open(E,$enc))
251 {
252 if ($sfx eq 'enc')
253 {
254 compile_enc(\*E,lc($name));
255 }
256 else
257 {
258 compile_ucm(\*E,lc($name));
259 }
260 }
261 else
262 {
263 warn "Cannot open $enc for $name:$!";
264 }
265 }
266
267if ($doC)
268 {
269 print STDERR "Writing compiled form\n";
270 foreach my $name (sort cmp_name keys %encoding)
271 {
272 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
273 process($name.'_utf8',$e2u);
274 addstrings(\*C,$e2u);
275
276 process('utf8_'.$name,$u2e);
277 addstrings(\*C,$u2e);
278 }
279 outbigstring(\*C,"enctable");
280 foreach my $name (sort cmp_name keys %encoding)
281 {
282 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
283 outtable(\*C,$e2u, "enctable");
284 outtable(\*C,$u2e, "enctable");
285
286 # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
287 }
288 foreach my $enc (sort cmp_name keys %encoding)
289 {
290 # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
291 my ($e2u,$u2e,$rep,$min_el,$max_el) = @{$encoding{$enc}};
292 #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
293 my $replen = 0;
294 $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g);
295 my @info = ($e2u->{Cname},$u2e->{Cname},qq((U8 *)"$rep"),$replen,$min_el,$max_el);
296 my $sym = "${enc}_encoding";
297 $sym =~ s/\W+/_/g;
298 print C "encode_t $sym = \n";
299 # This is to make null encoding work -- dankogai
300 for (my $i = (scalar @info) - 1; $i >= 0; --$i){
301 $info[$i] ||= 1;
302 }
303 # end of null tweak -- dankogai
304 print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n";
305 }
306
307 foreach my $enc (sort cmp_name keys %encoding)
308 {
309 my $sym = "${enc}_encoding";
310 $sym =~ s/\W+/_/g;
311 print H "extern encode_t $sym;\n";
312 print D " Encode_XSEncoding(aTHX_ &$sym);\n";
313 }
314
315 if ($cname =~ /(\w+)\.xs$/)
316 {
317 my $mod = $1;
318 print C <<'END';
319
320static void
321Encode_XSEncoding(pTHX_ encode_t *enc)
322{
323 dSP;
324 HV *stash = gv_stashpv("Encode::XS", TRUE);
325 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
326 int i = 0;
327 PUSHMARK(sp);
328 XPUSHs(sv);
329 while (enc->name[i])
330 {
331 const char *name = enc->name[i++];
332 XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
333 }
334 PUTBACK;
335 call_pv("Encode::define_encoding",G_DISCARD);
336 SvREFCNT_dec(sv);
337}
338
339END
340
341 print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n";
342 print C "BOOT:\n{\n";
343 print C "#include \"$dname\"\n";
344 print C "}\n";
345 }
346 # Close in void context is bad, m'kay
347 close(D) or warn "Error closing '$dname': $!";
348 close(H) or warn "Error closing '$hname': $!";
349
350 my $perc_saved = $saved/($strings + $saved) * 100;
351 my $perc_subsaved = $subsave/($strings + $subsave) * 100;
352 printf STDERR "%d bytes in string tables\n",$strings;
353 printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n",
354 $saved, $perc_saved if $saved;
355 printf STDERR "%d bytes (%.3g%%) saved using substrings\n",
356 $subsave, $perc_subsaved if $subsave;
357 }
358elsif ($doEnc)
359 {
360 foreach my $name (sort cmp_name keys %encoding)
361 {
362 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
363 output_enc(\*C,$name,$e2u);
364 }
365 }
366elsif ($doUcm)
367 {
368 foreach my $name (sort cmp_name keys %encoding)
369 {
370 my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
371 output_ucm(\*C,$name,$u2e,$erep,$min_el,$max_el);
372 }
373 }
374
375# writing half meg files and then not checking to see if you just filled the
376# disk is bad, m'kay
377close(C) or die "Error closing '$cname': $!";
378
379# End of the main program.
380
381sub compile_ucm
382{
383 my ($fh,$name) = @_;
384 my $e2u = {};
385 my $u2e = {};
386 my $cs;
387 my %attr;
388 while (<$fh>)
389 {
390 s/#.*$//;
391 last if /^\s*CHARMAP\s*$/i;
392 if (/^\s*<(\w+)>\s+"?([^"]*)"?\s*$/i) # " # Grrr
393 {
394 $attr{$1} = $2;
395 }
396 }
397 if (!defined($cs = $attr{'code_set_name'}))
398 {
399 warn "No <code_set_name> in $name\n";
400 }
401 else
402 {
403 $name = $cs unless exists $opt{'n'};
404 }
405 my $erep;
406 my $urep;
407 my $max_el;
408 my $min_el;
409 if (exists $attr{'subchar'})
410 {
411 #my @byte;
412 #$attr{'subchar'} =~ /^\s*/cg;
413 #push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
414 #$erep = join('',map(chr(hex($_)),@byte));
415 $erep = $attr{'subchar'};
416 $erep =~ s/^\s+//; $erep =~ s/\s+$//;
417 }
418 print "Reading $name ($cs)\n";
419 my $nfb = 0;
420 my $hfb = 0;
421 while (<$fh>)
422 {
423 s/#.*$//;
424 last if /^\s*END\s+CHARMAP\s*$/i;
425 next if /^\s*$/;
426 my (@uni, @byte) = ();
427 my ($uni, $byte, $fb) = m/^(\S+)\s+(\S+)\s+(\S+)\s+/o
428 or die "Bad line: $_";
429 while ($uni =~ m/\G<([U0-9a-fA-F\+]+)>/g){
430 push @uni, map { substr($_, 1) } split(/\+/, $1);
431 }
432 while ($byte =~ m/\G\\x([0-9a-fA-F]+)/g){
433 push @byte, $1;
434 }
435 if (@uni)
436 {
437 my $uch = join('', map { encode_U(hex($_)) } @uni );
438 my $ech = join('',map(chr(hex($_)),@byte));
439 my $el = length($ech);
440 $max_el = $el if (!defined($max_el) || $el > $max_el);
441 $min_el = $el if (!defined($min_el) || $el < $min_el);
442 if (length($fb))
443 {
444 $fb = substr($fb,1);
445 $hfb++;
446 }
447 else
448 {
449 $nfb++;
450 $fb = '0';
451 }
452 # $fb is fallback flag
453 # 0 - round trip safe
454 # 1 - fallback for unicode -> enc
455 # 2 - skip sub-char mapping
456 # 3 - fallback enc -> unicode
457 enter($u2e,$uch,$ech,$u2e,$fb+0) if ($fb =~ /[01]/);
458 enter($e2u,$ech,$uch,$e2u,$fb+0) if ($fb =~ /[03]/);
459 }
460 else
461 {
462 warn $_;
463 }
464 }
465 if ($nfb && $hfb)
466 {
467 die "$nfb entries without fallback, $hfb entries with\n";
468 }
469 $encoding{$name} = [$e2u,$u2e,$erep,$min_el,$max_el];
470}
471
472
473
474sub compile_enc
475{
476 my ($fh,$name) = @_;
477 my $e2u = {};
478 my $u2e = {};
479
480 my $type;
481 while ($type = <$fh>)
482 {
483 last if $type !~ /^\s*#/;
484 }
485 chomp($type);
486 return if $type eq 'E';
487 # Do the hash lookup once, rather than once per function call. 4% speedup.
488 my $type_func = $encode_types{$type};
489 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
490 warn "$type encoded $name\n";
491 my $rep = '';
492 # Save a defined test by setting these to defined values.
493 my $min_el = ~0; # A very big integer
494 my $max_el = 0; # Anything must be longer than 0
495 {
496 my $v = hex($def);
497 $rep = &$type_func($v & 0xFF, ($v >> 8) & 0xffe);
498 }
499 my $errors;
500 my $seen;
501 # use -Q to silence the seen test. Makefile.PL uses this by default.
502 $seen = {} unless $opt{Q};
503 do
504 {
505 my $line = <$fh>;
506 chomp($line);
507 my $page = hex($line);
508 my $ch = 0;
509 my $i = 16;
510 do
511 {
512 # So why is it 1% faster to leave the my here?
513 my $line = <$fh>;
514 $line =~ s/\r\n$/\n/;
515 die "$.:${line}Line should be exactly 65 characters long including
516 newline (".length($line).")" unless length ($line) == 65;
517 # Split line into groups of 4 hex digits, convert groups to ints
518 # This takes 65.35
519 # map {hex $_} $line =~ /(....)/g
520 # This takes 63.75 (2.5% less time)
521 # unpack "n*", pack "H*", $line
522 # There's an implicit loop in map. Loops are bad, m'kay. Ops are bad, m'kay
523 # Doing it as while ($line =~ /(....)/g) took 74.63
524 foreach my $val (unpack "n*", pack "H*", $line)
525 {
526 next if $val == 0xFFFD;
527 my $ech = &$type_func($ch,$page);
528 if ($val || (!$ch && !$page))
529 {
530 my $el = length($ech);
531 $max_el = $el if $el > $max_el;
532 $min_el = $el if $el < $min_el;
533 my $uch = encode_U($val);
534 if ($seen) {
535 # We're doing the test.
536 # We don't need to read this quickly, so storing it as a scalar,
537 # rather than 3 (anon array, plus the 2 scalars it holds) saves
538 # RAM and may make us faster on low RAM systems. [see __END__]
539 if (exists $seen->{$uch})
540 {
541 warn sprintf("U%04X is %02X%02X and %04X\n",
542 $val,$page,$ch,$seen->{$uch});
543 $errors++;
544 }
545 else
546 {
547 $seen->{$uch} = $page << 8 | $ch;
548 }
549 }
550 # Passing 2 extra args each time is 3.6% slower!
551 # Even with having to add $fallback ||= 0 later
552 enter_fb0($e2u,$ech,$uch);
553 enter_fb0($u2e,$uch,$ech);
554 }
555 else
556 {
557 # No character at this position
558 # enter($e2u,$ech,undef,$e2u);
559 }
560 $ch++;
561 }
562 } while --$i;
563 } while --$pages;
564 die "\$min_el=$min_el, \$max_el=$max_el - seems we read no lines"
565 if $min_el > $max_el;
566 die "$errors mapping conflicts\n" if ($errors && $opt{'S'});
567 $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el];
568}
569
570# my ($a,$s,$d,$t,$fb) = @_;
571sub enter {
572 my ($current,$inbytes,$outbytes,$next,$fallback) = @_;
573 # state we shift to after this (multibyte) input character defaults to same
574 # as current state.
575 $next ||= $current;
576 # Making sure it is defined seems to be faster than {no warnings;} in
577 # &process, or passing it in as 0 explicity.
578 # XXX $fallback ||= 0;
579
580 # Start at the beginning and work forwards through the string to zero.
581 # effectively we are removing 1 character from the front each time
582 # but we don't actually edit the string. [this alone seems to be 14% speedup]
583 # Hence -$pos is the length of the remaining string.
584 my $pos = -length $inbytes;
585 while (1) {
586 my $byte = substr $inbytes, $pos, 1;
587 # RAW_NEXT => 0,
588 # RAW_IN_LEN => 1,
589 # RAW_OUT_BYTES => 2,
590 # RAW_FALLBACK => 3,
591 # to unicode an array would seem to be better, because the pages are dense.
592 # from unicode can be very sparse, favouring a hash.
593 # hash using the bytes (all length 1) as keys rather than ord value,
594 # as it's easier to sort these in &process.
595
596 # It's faster to always add $fallback even if it's undef, rather than
597 # choosing between 3 and 4 element array. (hence why we set it defined
598 # above)
599 my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,'',$fallback];
600 # When $pos was -1 we were at the last input character.
601 unless (++$pos) {
602 $do_now->[RAW_OUT_BYTES] = $outbytes;
603 $do_now->[RAW_NEXT] = $next;
604 return;
605 }
606 # Tail recursion. The intermdiate state may not have a name yet.
607 $current = $do_now->[RAW_NEXT];
608 }
609}
610
611# This is purely for optimistation. It's just &enter hard coded for $fallback
612# of 0, using only a 3 entry array ref to save memory for every entry.
613sub enter_fb0 {
614 my ($current,$inbytes,$outbytes,$next) = @_;
615 $next ||= $current;
616
617 my $pos = -length $inbytes;
618 while (1) {
619 my $byte = substr $inbytes, $pos, 1;
620 my $do_now = $current->{Raw}{$byte} ||= [{},-$pos,''];
621 unless (++$pos) {
622 $do_now->[RAW_OUT_BYTES] = $outbytes;
623 $do_now->[RAW_NEXT] = $next;
624 return;
625 }
626 $current = $do_now->[RAW_NEXT];
627 }
628}
629
630sub process
631{
632 my ($name,$a) = @_;
633 $name =~ s/\W+/_/g;
634 $a->{Cname} = $name;
635 my $raw = $a->{Raw};
636 my ($l, $agg_max_in, $agg_next, $agg_in_len, $agg_out_len, $agg_fallback);
637 my @ent;
638 $agg_max_in = 0;
639 foreach my $key (sort keys %$raw) {
640 # RAW_NEXT => 0,
641 # RAW_IN_LEN => 1,
642 # RAW_OUT_BYTES => 2,
643 # RAW_FALLBACK => 3,
644 my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}};
645 # Now we are converting from raw to aggregate, switch from 1 byte strings
646 # to numbers
647 my $b = ord $key;
648 $fallback ||= 0;
649 if ($l &&
650 # If this == fails, we're going to reset $agg_max_in below anyway.
651 $b == ++$agg_max_in &&
652 # References in numeric context give the pointer as an int.
653 $agg_next == $next &&
654 $agg_in_len == $in_len &&
655 $agg_out_len == length $out_bytes &&
656 $agg_fallback == $fallback
657 # && length($l->[AGG_OUT_BYTES]) < 16
658 ) {
659 # my $i = ord($b)-ord($l->[AGG_MIN_IN]);
660 # we can aggregate this byte onto the end.
661 $l->[AGG_MAX_IN] = $b;
662 $l->[AGG_OUT_BYTES] .= $out_bytes;
663 } else {
664 # AGG_MIN_IN => 0,
665 # AGG_MAX_IN => 1,
666 # AGG_OUT_BYTES => 2,
667 # AGG_NEXT => 3,
668 # AGG_IN_LEN => 4,
669 # AGG_OUT_LEN => 5,
670 # AGG_FALLBACK => 6,
671 # Reset the last thing we saw, plus set 5 lexicals to save some derefs.
672 # (only gains .6% on euc-jp -- is it worth it?)
673 push @ent, $l = [$b, $agg_max_in = $b, $out_bytes, $agg_next = $next,
674 $agg_in_len = $in_len, $agg_out_len = length $out_bytes,
675 $agg_fallback = $fallback];
676 }
677 if (exists $next->{Cname}) {
678 $next->{'Forward'} = 1 if $next != $a;
679 } else {
680 process(sprintf("%s_%02x",$name,$b),$next);
681 }
682 }
683 # encengine.c rules say that last entry must be for 255
684 if ($agg_max_in < 255) {
685 push @ent, [1+$agg_max_in, 255,undef,$a,0,0];
686 }
687 $a->{'Entries'} = \@ent;
688}
689
690
691sub addstrings
692{
693 my ($fh,$a) = @_;
694 my $name = $a->{'Cname'};
695 # String tables
696 foreach my $b (@{$a->{'Entries'}})
697 {
698 next unless $b->[AGG_OUT_LEN];
699 $strings{$b->[AGG_OUT_BYTES]} = undef;
700 }
701 if ($a->{'Forward'})
702 {
703 my $var = $^O eq 'MacOS' ? 'extern' : 'static';
704 print $fh "$var encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
705 }
706 $a->{'DoneStrings'} = 1;
707 foreach my $b (@{$a->{'Entries'}})
708 {
709 my ($s,$e,$out,$t,$end,$l) = @$b;
710 addstrings($fh,$t) unless $t->{'DoneStrings'};
711 }
712}
713
714sub outbigstring
715{
716 my ($fh,$name) = @_;
717
718 $string_acc = '';
719
720 # Make the big string in the string accumulator. Longest first, on the hope
721 # that this makes it more likely that we find the short strings later on.
722 # Not sure if it helps sorting strings of the same length lexcically.
723 foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) {
724 my $index = index $string_acc, $s;
725 if ($index >= 0) {
726 $saved += length($s);
727 $strings_in_acc{$s} = $index;
728 } else {
729 OPTIMISER: {
730 if ($opt{'O'}) {
731 my $sublength = length $s;
732 while (--$sublength > 0) {
733 # progressively lop characters off the end, to see if the start of
734 # the new string overlaps the end of the accumulator.
735 if (substr ($string_acc, -$sublength)
736 eq substr ($s, 0, $sublength)) {
737 $subsave += $sublength;
738 $strings_in_acc{$s} = length ($string_acc) - $sublength;
739 # append the last bit on the end.
740 $string_acc .= substr ($s, $sublength);
741 last OPTIMISER;
742 }
743 # or if the end of the new string overlaps the start of the
744 # accumulator
745 next unless substr ($string_acc, 0, $sublength)
746 eq substr ($s, -$sublength);
747 # well, the last $sublength characters of the accumulator match.
748 # so as we're prepending to the accumulator, need to shift all our
749 # existing offsets forwards
750 $_ += $sublength foreach values %strings_in_acc;
751 $subsave += $sublength;
752 $strings_in_acc{$s} = 0;
753 # append the first bit on the start.
754 $string_acc = substr ($s, 0, -$sublength) . $string_acc;
755 last OPTIMISER;
756 }
757 }
758 # Optimiser (if it ran) found nothing, so just going have to tack the
759 # whole thing on the end.
760 $strings_in_acc{$s} = length $string_acc;
761 $string_acc .= $s;
762 };
763 }
764 }
765
766 $strings = length $string_acc;
767 my $definition = "\nstatic const U8 $name\[$strings] = { " .
768 join(',',unpack "C*",$string_acc);
769 # We have a single long line. Split it at convenient commas.
770 print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs;
771 print $fh substr ($definition, pos $definition), " };\n";
772}
773
774sub findstring {
775 my ($name,$s) = @_;
776 my $offset = $strings_in_acc{$s};
777 die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator"
778 unless defined $offset;
779 "$name + $offset";
780}
781
782sub outtable
783{
784 my ($fh,$a,$bigname) = @_;
785 my $name = $a->{'Cname'};
786 $a->{'Done'} = 1;
787 foreach my $b (@{$a->{'Entries'}})
788 {
789 my ($s,$e,$out,$t,$end,$l) = @$b;
790 outtable($fh,$t,$bigname) unless $t->{'Done'};
791 }
792 print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n";
793 foreach my $b (@{$a->{'Entries'}})
794 {
795 my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
796 # $end |= 0x80 if $fb; # what the heck was on your mind, Nick? -- Dan
797 print $fh "{";
798 if ($l)
799 {
800 printf $fh findstring($bigname,$out);
801 }
802 else
803 {
804 print $fh "0";
805 }
806 print $fh ",",$t->{Cname};
807 printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec;
808 }
809 print $fh "};\n";
810}
811
812sub output_enc
813{
814 my ($fh,$name,$a) = @_;
815 die "Changed - fix me for new structure";
816 foreach my $b (sort keys %$a)
817 {
818 my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}};
819 }
820}
821
822sub decode_U
823{
824 my $s = shift;
825}
826
827my @uname;
828sub char_names
829{
830 my $s = do "unicore/Name.pl";
831 die "char_names: unicore/Name.pl: $!\n" unless defined $s;
832 pos($s) = 0;
833 while ($s =~ /\G([0-9a-f]+)\t([0-9a-f]*)\t(.*?)\s*\n/igc)
834 {
835 my $name = $3;
836 my $s = hex($1);
837 last if $s >= 0x10000;
838 my $e = length($2) ? hex($2) : $s;
839 for (my $i = $s; $i <= $e; $i++)
840 {
841 $uname[$i] = $name;
842# print sprintf("U%04X $name\n",$i);
843 }
844 }
845}
846
847sub output_ucm_page
848{
849 my ($cmap,$a,$t,$pre) = @_;
850 # warn sprintf("Page %x\n",$pre);
851 my $raw = $t->{Raw};
852 foreach my $key (sort keys %$raw) {
853 # RAW_NEXT => 0,
854 # RAW_IN_LEN => 1,
855 # RAW_OUT_BYTES => 2,
856 # RAW_FALLBACK => 3,
857 my ($next, $in_len, $out_bytes, $fallback) = @{$raw->{$key}};
858 my $u = ord $key;
859 $fallback ||= 0;
860
861 if ($next != $a && $next != $t) {
862 output_ucm_page($cmap,$a,$next,(($pre|($u &0x3F)) << 6)&0xFFFF);
863 } elsif (length $out_bytes) {
864 if ($pre) {
865 $u = $pre|($u &0x3f);
866 }
867 my $s = sprintf "<U%04X> ",$u;
868 #foreach my $c (split(//,$out_bytes)) {
869 # $s .= sprintf "\\x%02X",ord($c);
870 #}
871 # 9.5% faster changing that loop to this:
872 $s .= sprintf +("\\x%02X" x length $out_bytes), unpack "C*", $out_bytes;
873 $s .= sprintf " |%d # %s\n",($fallback ? 1 : 0),$uname[$u];
874 push(@$cmap,$s);
875 } else {
876 warn join(',',$u, @{$raw->{$key}},$a,$t);
877 }
878 }
879}
880
881sub output_ucm
882{
883 my ($fh,$name,$h,$rep,$min_el,$max_el) = @_;
884 print $fh "# $0 @orig_ARGV\n" unless $opt{'q'};
885 print $fh "<code_set_name> \"$name\"\n";
886 char_names();
887 if (defined $min_el)
888 {
889 print $fh "<mb_cur_min> $min_el\n";
890 }
891 if (defined $max_el)
892 {
893 print $fh "<mb_cur_max> $max_el\n";
894 }
895 if (defined $rep)
896 {
897 print $fh "<subchar> ";
898 foreach my $c (split(//,$rep))
899 {
900 printf $fh "\\x%02X",ord($c);
901 }
902 print $fh "\n";
903 }
904 my @cmap;
905 output_ucm_page(\@cmap,$h,$h,0);
906 print $fh "#\nCHARMAP\n";
907 foreach my $line (sort { substr($a,8) cmp substr($b,8) } @cmap)
908 {
909 print $fh $line;
910 }
911 print $fh "END CHARMAP\n";
912}
913
914use vars qw(
915 $_Enc2xs
916 $_Version
917 $_Inc
918 $_E2X
919 $_Name
920 $_TableFiles
921 $_Now
922);
923
924sub find_e2x{
925 eval { require File::Find; };
926 my (@inc, %e2x_dir);
927 for my $inc (@INC){
928 push @inc, $inc unless $inc eq '.'; #skip current dir
929 }
930 File::Find::find(
931 sub {
932 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
933 $atime,$mtime,$ctime,$blksize,$blocks)
934 = lstat($_) or return;
935 -f _ or return;
936 if (/^.*\.e2x$/o){
937 no warnings 'once';
938 $e2x_dir{$File::Find::dir} ||= $mtime;
939 }
940 return;
941 }, @inc);
942 warn join("\n", keys %e2x_dir), "\n";
943 for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){
944 $_E2X = $d;
945 # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
946 return $_E2X;
947 }
948}
949
950sub make_makefile_pl
951{
952 eval { require Encode; };
953 $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n";
954 # our used for variable expanstion
955 $_Enc2xs = $0;
956 $_Version = $VERSION;
957 $_E2X = find_e2x();
958 $_Name = shift;
959 $_TableFiles = join(",", map {qq('$_')} @_);
960 $_Now = scalar localtime();
961
962 eval { require File::Spec; };
963 _print_expand(File::Spec->catfile($_E2X,"Makefile_PL.e2x"),"Makefile.PL");
964 _print_expand(File::Spec->catfile($_E2X,"_PM.e2x"), "$_Name.pm");
965 _print_expand(File::Spec->catfile($_E2X,"_T.e2x"), "t/$_Name.t");
966 _print_expand(File::Spec->catfile($_E2X,"README.e2x"), "README");
967 _print_expand(File::Spec->catfile($_E2X,"Changes.e2x"), "Changes");
968 exit;
969}
970
971use vars qw(
972 $_ModLines
973 $_LocalVer
974 );
975
976sub make_configlocal_pm
977{
978 eval { require Encode; };
979 $@ and die "Unable to require Encode: $@\n";
980 eval { require File::Spec; };
981 # our used for variable expanstion
982 my %in_core = map {$_=>1}('ascii','iso-8859-1','utf8');
983 my %LocalMod = ();
984 for my $d (@INC){
985 my $inc = File::Spec->catfile($d, "Encode");
986 -d $inc or next;
987 opendir my $dh, $inc or die "$inc:$!";
988 warn "Checking $inc...\n";
989 for my $f (grep /\.pm$/o, readdir($dh)){
990 -f File::Spec->catfile($inc, "$f") or next;
991 $INC{"Encode/$f"} and next;
992 warn "require Encode/$f;\n";
993 eval { require "Encode/$f"; };
994 $@ and die "Can't require Encode/$f: $@\n";
995 for my $enc (Encode->encodings()){
996 no warnings 'once';
997 $in_core{$enc} and next;
998 $Encode::Config::ExtModule{$enc} and next;
999 my $mod = "Encode/$f";
1000 $mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
1001 $LocalMod{$enc} ||= $mod;
1002 }
1003 }
1004 }
1005 $_ModLines = "";
1006 for my $enc (sort keys %LocalMod){
1007 $_ModLines .=
1008 qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
1009 }
1010 warn $_ModLines;
1011 $_LocalVer = _mkversion();
1012 $_E2X = find_e2x();
1013 $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;
1014 _print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"),
1015 File::Spec->catfile($_Inc,"ConfigLocal.pm"),
1016 1);
1017 exit;
1018}
1019
1020sub _mkversion{
1021 my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime();
1022 $yyyy += 1900, $mo +=1;
1023 return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm);
1024}
1025
1026sub _print_expand{
1027 eval { require File::Basename; };
1028 $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n";
1029 File::Basename->import();
1030 my ($src, $dst, $clobber) = @_;
1031 if (!$clobber and -e $dst){
1032 warn "$dst exists. skipping\n";
1033 return;
1034 }
1035 warn "Generating $dst...\n";
1036 open my $in, $src or die "$src : $!";
1037 if ((my $d = dirname($dst)) ne '.'){
1038 -d $d or mkdir $d, 0755 or die "mkdir $d : $!";
1039 }
1040 open my $out, ">$dst" or die "$!";
1041 my $asis = 0;
1042 while (<$in>){
1043 if (/^#### END_OF_HEADER/){
1044 $asis = 1; next;
1045 }
1046 s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
1047 print $out $_;
1048 }
1049}
1050__END__
1051
1052=head1 NAME
1053
1054enc2xs -- Perl Encode Module Generator
1055
1056=head1 SYNOPSIS
1057
1058 enc2xs -[options]
1059 enc2xs -M ModName mapfiles...
1060 enc2xs -C
1061
1062=head1 DESCRIPTION
1063
1064F<enc2xs> builds a Perl extension for use by Encode from either
1065Unicode Character Mapping files (.ucm) or Tcl Encoding Files (.enc).
1066Besides being used internally during the build process of the Encode
1067module, you can use F<enc2xs> to add your own encoding to perl.
1068No knowledge of XS is necessary.
1069
1070=head1 Quick Guide
1071
1072If you want to know as little about Perl as possible but need to
1073add a new encoding, just read this chapter and forget the rest.
1074
1075=over 4
1076
1077=item 0.
1078
1079Have a .ucm file ready. You can get it from somewhere or you can write
1080your own from scratch or you can grab one from the Encode distribution
1081and customize it. For the UCM format, see the next Chapter. In the
1082example below, I'll call my theoretical encoding myascii, defined
1083in I<my.ucm>. C<$> is a shell prompt.
1084
1085 $ ls -F
1086 my.ucm
1087
1088=item 1.
1089
1090Issue a command as follows;
1091
1092 $ enc2xs -M My my.ucm
1093 generating Makefile.PL
1094 generating My.pm
1095 generating README
1096 generating Changes
1097
1098Now take a look at your current directory. It should look like this.
1099
1100 $ ls -F
1101 Makefile.PL My.pm my.ucm t/
1102
1103The following files were created.
1104
1105 Makefile.PL - MakeMaker script
1106 My.pm - Encode submodule
1107 t/My.t - test file
1108
1109=over 4
1110
1111=item 1.1.
1112
1113If you want *.ucm installed together with the modules, do as follows;
1114
1115 $ mkdir Encode
1116 $ mv *.ucm Encode
1117 $ enc2xs -M My Encode/*ucm
1118
1119=back
1120
1121=item 2.
1122
1123Edit the files generated. You don't have to if you have no time AND no
1124intention to give it to someone else. But it is a good idea to edit
1125the pod and to add more tests.
1126
1127=item 3.
1128
1129Now issue a command all Perl Mongers love:
1130
1131 $ perl Makefile.PL
1132 Writing Makefile for Encode::My
1133
1134=item 4.
1135
1136Now all you have to do is make.
1137
1138 $ make
1139 cp My.pm blib/lib/Encode/My.pm
1140 /usr/local/bin/perl /usr/local/bin/enc2xs -Q -O \
1141 -o encode_t.c -f encode_t.fnm
1142 Reading myascii (myascii)
1143 Writing compiled form
1144 128 bytes in string tables
1145 384 bytes (75%) saved spotting duplicates
1146 1 bytes (0.775%) saved using substrings
1147 ....
1148 chmod 644 blib/arch/auto/Encode/My/My.bs
1149 $
1150
1151The time it takes varies depending on how fast your machine is and
1152how large your encoding is. Unless you are working on something big
1153like euc-tw, it won't take too long.
1154
1155=item 5.
1156
1157You can "make install" already but you should test first.
1158
1159 $ make test
1160 PERL_DL_NONLAZY=1 /usr/local/bin/perl -Iblib/arch -Iblib/lib \
1161 -e 'use Test::Harness qw(&runtests $verbose); \
1162 $verbose=0; runtests @ARGV;' t/*.t
1163 t/My....ok
1164 All tests successful.
1165 Files=1, Tests=2, 0 wallclock secs
1166 ( 0.09 cusr + 0.01 csys = 0.09 CPU)
1167
1168=item 6.
1169
1170If you are content with the test result, just "make install"
1171
1172=item 7.
1173
1174If you want to add your encoding to Encode's demand-loading list
1175(so you don't have to "use Encode::YourEncoding"), run
1176
1177 enc2xs -C
1178
1179to update Encode::ConfigLocal, a module that controls local settings.
1180After that, "use Encode;" is enough to load your encodings on demand.
1181
1182=back
1183
1184=head1 The Unicode Character Map
1185
1186Encode uses the Unicode Character Map (UCM) format for source character
1187mappings. This format is used by IBM's ICU package and was adopted
1188by Nick Ing-Simmons for use with the Encode module. Since UCM is
1189more flexible than Tcl's Encoding Map and far more user-friendly,
1190this is the recommended formet for Encode now.
1191
1192A UCM file looks like this.
1193
1194 #
1195 # Comments
1196 #
1197 <code_set_name> "US-ascii" # Required
1198 <code_set_alias> "ascii" # Optional
1199 <mb_cur_min> 1 # Required; usually 1
1200 <mb_cur_max> 1 # Max. # of bytes/char
1201 <subchar> \x3F # Substitution char
1202 #
1203 CHARMAP
1204 <U0000> \x00 |0 # <control>
1205 <U0001> \x01 |0 # <control>
1206 <U0002> \x02 |0 # <control>
1207 ....
1208 <U007C> \x7C |0 # VERTICAL LINE
1209 <U007D> \x7D |0 # RIGHT CURLY BRACKET
1210 <U007E> \x7E |0 # TILDE
1211 <U007F> \x7F |0 # <control>
1212 END CHARMAP
1213
1214=over 4
1215
1216=item *
1217
1218Anything that follows C<#> is treated as a comment.
1219
1220=item *
1221
1222The header section continues until a line containing the word
1223CHARMAP. This section has a form of I<E<lt>keywordE<gt> value>, one
1224pair per line. Strings used as values must be quoted. Barewords are
1225treated as numbers. I<\xXX> represents a byte.
1226
1227Most of the keywords are self-explanatory. I<subchar> means
1228substitution character, not subcharacter. When you decode a Unicode
1229sequence to this encoding but no matching character is found, the byte
1230sequence defined here will be used. For most cases, the value here is
1231\x3F; in ASCII, this is a question mark.
1232
1233=item *
1234
1235CHARMAP starts the character map section. Each line has a form as
1236follows:
1237
1238 <UXXXX> \xXX.. |0 # comment
1239 ^ ^ ^
1240 | | +- Fallback flag
1241 | +-------- Encoded byte sequence
1242 +-------------- Unicode Character ID in hex
1243
1244The format is roughly the same as a header section except for the
1245fallback flag: | followed by 0..3. The meaning of the possible
1246values is as follows:
1247
1248=over 4
1249
1250=item |0
1251
1252Round trip safe. A character decoded to Unicode encodes back to the
1253same byte sequence. Most characters have this flag.
1254
1255=item |1
1256
1257Fallback for unicode -> encoding. When seen, enc2xs adds this
1258character for the encode map only.
1259
1260=item |2
1261
1262Skip sub-char mapping should there be no code point.
1263
1264=item |3
1265
1266Fallback for encoding -> unicode. When seen, enc2xs adds this
1267character for the decode map only.
1268
1269=back
1270
1271=item *
1272
1273And finally, END OF CHARMAP ends the section.
1274
1275=back
1276
1277When you are manually creating a UCM file, you should copy ascii.ucm
1278or an existing encoding which is close to yours, rather than write
1279your own from scratch.
1280
1281When you do so, make sure you leave at least B<U0000> to B<U0020> as
1282is, unless your environment is EBCDIC.
1283
1284B<CAVEAT>: not all features in UCM are implemented. For example,
1285icu:state is not used. Because of that, you need to write a perl
1286module if you want to support algorithmical encodings, notably
1287the ISO-2022 series. Such modules include L<Encode::JP::2022_JP>,
1288L<Encode::KR::2022_KR>, and L<Encode::TW::HZ>.
1289
1290=head2 Coping with duplicate mappings
1291
1292When you create a map, you SHOULD make your mappings round-trip safe.
1293That is, C<encode('your-encoding', decode('your-encoding', $data)) eq
1294$data> stands for all characters that are marked as C<|0>. Here is
1295how to make sure:
1296
1297=over 4
1298
1299=item *
1300
1301Sort your map in Unicode order.
1302
1303=item *
1304
1305When you have a duplicate entry, mark either one with '|1' or '|3'.
1306
1307=item *
1308
1309And make sure the '|1' or '|3' entry FOLLOWS the '|0' entry.
1310
1311=back
1312
1313Here is an example from big5-eten.
1314
1315 <U2550> \xF9\xF9 |0
1316 <U2550> \xA2\xA4 |3
1317
1318Internally Encoding -> Unicode and Unicode -> Encoding Map looks like
1319this;
1320
1321 E to U U to E
1322 --------------------------------------
1323 \xF9\xF9 => U2550 U2550 => \xF9\xF9
1324 \xA2\xA4 => U2550
1325
1326So it is round-trip safe for \xF9\xF9. But if the line above is upside
1327down, here is what happens.
1328
1329 E to U U to E
1330 --------------------------------------
1331 \xA2\xA4 => U2550 U2550 => \xF9\xF9
1332 (\xF9\xF9 => U2550 is now overwritten!)
1333
1334The Encode package comes with F<ucmlint>, a crude but sufficient
1335utility to check the integrity of a UCM file. Check under the
1336Encode/bin directory for this.
1337
1338When in doubt, you can use F<ucmsort>, yet another utility under
1339Encode/bin directory.
1340
1341=head1 Bookmarks
1342
1343=over 4
1344
1345=item *
1346
1347ICU Home Page
1348L<http://oss.software.ibm.com/icu/>
1349
1350=item *
1351
1352ICU Character Mapping Tables
1353L<http://oss.software.ibm.com/icu/charset/>
1354
1355=item *
1356
1357ICU:Conversion Data
1358L<http://oss.software.ibm.com/icu/userguide/conversion-data.html>
1359
1360=back
1361
1362=head1 SEE ALSO
1363
1364L<Encode>,
1365L<perlmod>,
1366L<perlpod>
1367
1368=cut
1369
1370# -Q to disable the duplicate codepoint test
1371# -S make mapping errors fatal
1372# -q to remove comments written to output files
1373# -O to enable the (brute force) substring optimiser
1374# -o <output> to specify the output file name (else it's the first arg)
1375# -f <inlist> to give a file with a list of input files (else use the args)
1376# -n <name> to name the encoding (else use the basename of the input file.
1377
1378With %seen holding array refs:
1379
1380 865.66 real 28.80 user 8.79 sys
1381 7904 maximum resident set size
1382 1356 average shared memory size
1383 18566 average unshared data size
1384 229 average unshared stack size
1385 46080 page reclaims
1386 33373 page faults
1387
1388With %seen holding simple scalars:
1389
1390 342.16 real 27.11 user 3.54 sys
1391 8388 maximum resident set size
1392 1394 average shared memory size
1393 14969 average unshared data size
1394 236 average unshared stack size
1395 28159 page reclaims
1396 9839 page faults
1397
1398Yes, 5 minutes is faster than 15. Above is for CP936 in CN. Only difference is
1399how %seen is storing things its seen. So it is pathalogically bad on a 16M
1400RAM machine, but it's going to help even on modern machines.
1401Swapping is bad, m'kay :-)
1402
1403__END__
1404:endofperl
Note: See TracBrowser for help on using the repository browser.