Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: for-distributions/trunk/bin/windows/perl/lib/unicore/mktables@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago
upgrading to perl 5.8
File size: 60.6 KB

Line
1	## !!!!!!!!!!!!!! IF YOU MODIFY THIS FILE !!!!!!!!!!!!!!!!!!!!!!!!!
2	## Any files created or read by this program should be listed in 'mktables.lst'
3
4	#!/usr/bin/perl -w
5	require 5.008; # Needs pack "U". Probably safest to run on 5.8.x
6	use strict;
7	use Carp;
8	use File::Spec;
9
10	##
11	## mktables -- create the runtime Perl Unicode files (lib/unicore/*/.pl)
12	## from the Unicode database files (lib/unicore/*.txt).
13	##
14
15	## "Fuzzy" means this section in Unicode TR18:
16	##
17	## The recommended names for UCD properties and property values are in
18	## PropertyAliases.txt [Prop] and PropertyValueAliases.txt
19	## [PropValue]. There are both abbreviated names and longer, more
20	## descriptive names. It is strongly recommended that both names be
21	## recognized, and that loose matching of property names be used,
22	## whereby the case distinctions, whitespace, hyphens, and underbar
23	## are ignored.
24
25	## Base names already used in lib/gc_sc (for avoiding 8.3 conflicts)
26	my %BaseNames;
27
28	##
29	## Process any args.
30	##
31	my $Verbose = 0;
32	my $MakeTestScript = 0;
33	my $AlwaysWrite = 0;
34	my $UseDir = "";
35	my $FileList = "$0.lst";
36	my $MakeList = 0;
37
38	while (@ARGV)
39	{
40	my $arg = shift @ARGV;
41	if ($arg eq '-v') {
42	$Verbose = 1;
43	} elsif ($arg eq '-q') {
44	$Verbose = 0;
45	} elsif ($arg eq '-w') {
46	$AlwaysWrite = 1; # update the files even if they havent changed
47	$FileList = "";
48	} elsif ($arg eq '-maketest') {
49	$MakeTestScript = 1;
50	} elsif ($arg eq '-makelist') {
51	$MakeList = 1;
52	} elsif ($arg eq '-C' && defined ($UseDir = shift)) {
53	-d $UseDir or die "Unknown directory '$UseDir'";
54	} elsif ($arg eq '-L' && defined ($FileList = shift)) {
55	-e $FileList or die "Filelist '$FileList' doesn't appear to exist!";
56	} else {
57	die "usage: $0 [-v\|-q\|-w\|-C dir\|-L filelist] [-maketest] [-makelist]\n",
58	" -v : Verbose Mode\n",
59	" -q : Quiet Mode\n",
60	" -w : Write files regardless\n",
61	" -maketest : Make test script\n",
62	" -makelist : Rewrite the file list based on current setup\n",
63	" -L filelist : Use this file list, (defaults to $0)\n",
64	" -C dir : Change to this directory before proceding\n";
65	}
66	}
67
68	if ($FileList) {
69	print "Reading file list '$FileList'\n"
70	if $Verbose;
71	open my $fh,"<",$FileList or die "Failed to read '$FileList':$!";
72	my @input;
73	my @output;
74	for my $list ( \@input, \@output ) {
75	while (<$fh>) {
76	s/^ \s+ \| \s+ $//xg;
77	next if /^ \s* (?: \# .* )? $/x;
78	last if /^ =+ $/x;
79	my ( $file ) = split /\t/, $_;
80	push @$list, $file;
81	}
82	my %dupe;
83	@$list = grep !$dupe{ $_ }++, @$list;
84	}
85	close $fh;
86	die "No input or output files in '$FileList'!"
87	if !@input or !@output;
88	if ( $MakeList ) {
89	foreach my $file (@output) {
90	unlink $file;
91	}
92	}
93	if ( $Verbose ) {
94	print "Expecting ".scalar( @input )." input files. ",
95	"Checking ".scalar( @output )." output files.\n";
96	}
97	# we set maxtime to be the youngest input file, including $0 itself.
98	my $maxtime = -M $0; # do this before the chdir!
99	if ($UseDir) {
100	chdir $UseDir or die "Failed to chdir to '$UseDir':$!";
101	}
102	foreach my $in (@input) {
103	my $time = -M $in;
104	die "Missing input file '$in'" unless defined $time;
105	$maxtime = $time if $maxtime < $time;
106	}
107
108	# now we check to see if any output files are older than maxtime, if
109	# they are we need to continue on, otherwise we can presumably bail.
110	my $ok = 1;
111	foreach my $out (@output) {
112	if ( ! -e $out ) {
113	print "'$out' is missing.\n"
114	if $Verbose;
115	$ok = 0;
116	last;
117	}
118	if ( -M $out > $maxtime ) {
119	print "'$out' is too old.\n"
120	if $Verbose;
121	$ok = 0;
122	last;
123	}
124	}
125	if ($ok) {
126	print "Files seem to be ok, not bothering to rebuild.\n";
127	exit(0);
128	}
129	print "Must rebuild tables.\n"
130	if $Verbose;
131	} else {
132	if ($Verbose) {
133	print "Not checking filelist.\n";
134	}
135	if ($UseDir) {
136	chdir $UseDir or die "Failed to chdir to '$UseDir':$!";
137	}
138	}
139
140	foreach my $lib ('To', 'lib',
141	map {File::Spec->catdir("lib",$_)}
142	qw(gc_sc dt bc hst ea jt lb nt ccc)) {
143	next if -d $lib;
144	mkdir $lib, 0755 or die "mkdir '$lib': $!";
145	}
146
147	my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
148
149	my $HEADER=<<"EOF";
150	# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
151	# This file is built by $0 from e.g. UnicodeData.txt.
152	# Any changes made here will be lost!
153
154	EOF
155
156	sub force_unlink {
157	my $filename = shift;
158	return unless -e $filename;
159	return if CORE::unlink($filename);
160	# We might need write permission
161	chmod 0777, $filename;
162	CORE::unlink($filename) or die "Couldn't unlink $filename: $!\n";
163	}
164
165	##
166	## Given a filename and a reference to an array of lines,
167	## write the lines to the file only if the contents have not changed.
168	## Filename can be given as an arrayref of directory names
169	##
170	sub WriteIfChanged($\@)
171	{
172	my $file = shift;
173	my $lines = shift;
174
175	$file = File::Spec->catfile(@$file) if ref $file;
176
177	my $TextToWrite = join '', @$lines;
178	if (open IN, $file) {
179	local($/) = undef;
180	my $PreviousText = <IN>;
181	close IN;
182	if ($PreviousText eq $TextToWrite) {
183	print "$file unchanged.\n" if $Verbose;
184	return unless $AlwaysWrite;
185	}
186	}
187	force_unlink ($file);
188	if (not open OUT, ">$file") {
189	die "$0: can't open $file for output: $!\n";
190	}
191	print "$file written.\n" if $Verbose;
192
193	print OUT $TextToWrite;
194	close OUT;
195	}
196
197	##
198	## The main datastructure (a "Table") represents a set of code points that
199	## are part of a particular quality (that are part of \pL, \p{InGreek},
200	## etc.). They are kept as ranges of code points (starting and ending of
201	## each range).
202	##
203	## For example, a range ASCII LETTERS would be represented as:
204	## [ [ 0x41 => 0x5A, 'UPPER' ],
205	## [ 0x61 => 0x7A, 'LOWER, ] ]
206	##
207	sub RANGE_START() { 0 } ## index into range element
208	sub RANGE_END() { 1 } ## index into range element
209	sub RANGE_NAME() { 2 } ## index into range element
210
211	## Conceptually, these should really be folded into the 'Table' objects
212	my %TableInfo;
213	my %TableDesc;
214	my %FuzzyNames;
215	my %AliasInfo;
216	my %CanonicalToOrig;
217
218	##
219	## Turn something like
220	## OLD-ITALIC
221	## into
222	## OldItalic
223	##
224	sub CanonicalName($)
225	{
226	my $orig = shift;
227	my $name = lc $orig;
228	$name =~ s/(?<![a-z])(\w)/\u$1/g;
229	$name =~ s/[-_\s]+//g;
230
231	$CanonicalToOrig{$name} = $orig if not $CanonicalToOrig{$name};
232	return $name;
233	}
234
235
236	##
237	## Store the alias definitions for later use.
238	##
239	my %PropertyAlias;
240	my %PropValueAlias;
241
242	my %PA_reverse;
243	my %PVA_reverse;
244
245	sub Build_Aliases()
246	{
247	##
248	## Most of the work with aliases doesn't occur here,
249	## but rather in utf8_heavy.pl, which uses PVA.pl,
250
251	# Placate the warnings about used only once. (They are used again, but
252	# via a typeglob lookup)
253	%utf8::PropertyAlias = ();
254	%utf8::PA_reverse = ();
255	%utf8::PropValueAlias = ();
256	%utf8::PVA_reverse = ();
257	%utf8::PVA_abbr_map = ();
258
259	open PA, "< PropertyAliases.txt"
260	or confess "Can't open PropertyAliases.txt: $!";
261	while (<PA>) {
262	s/#.*//;
263	s/\s+$//;
264	next if /^$/;
265
266	my ($abbrev, $name) = split /\s;\s/;
267	next if $abbrev eq "n/a";
268	$PropertyAlias{$abbrev} = $name;
269	$PA_reverse{$name} = $abbrev;
270
271	# The %utf8::... versions use japhy's code originally from utf8_pva.pl
272	# However, it's moved here so that we build the tables at runtime.
273	tr/ _-//d for $abbrev, $name;
274	$utf8::PropertyAlias{lc $abbrev} = $name;
275	$utf8::PA_reverse{lc $name} = $abbrev;
276	}
277	close PA;
278
279	open PVA, "< PropValueAliases.txt"
280	or confess "Can't open PropValueAliases.txt: $!";
281	while (<PVA>) {
282	s/#.*//;
283	s/\s+$//;
284	next if /^$/;
285
286	my ($prop, @data) = split /\s;\s/;
287
288	if ($prop eq 'ccc') {
289	$PropValueAlias{$prop}{$data[1]} = [ @data[0,2] ];
290	$PVA_reverse{$prop}{$data[2]} = [ @data[0,1] ];
291	}
292	else {
293	next if $data[0] eq "n/a";
294	$PropValueAlias{$prop}{$data[0]} = $data[1];
295	$PVA_reverse{$prop}{$data[1]} = $data[0];
296	}
297
298	shift @data if $prop eq 'ccc';
299	next if $data[0] eq "n/a";
300
301	$data[1] =~ tr/ _-//d;
302	$utf8::PropValueAlias{$prop}{lc $data[0]} = $data[1];
303	$utf8::PVA_reverse{$prop}{lc $data[1]} = $data[0];
304
305	my $abbr_class = ($prop eq 'gc' or $prop eq 'sc') ? 'gc_sc' : $prop;
306	$utf8::PVA_abbr_map{$abbr_class}{lc $data[0]} = $data[0];
307	}
308	close PVA;
309
310	# backwards compatibility for L& -> LC
311	$utf8::PropValueAlias{gc}{'l&'} = $utf8::PropValueAlias{gc}{lc};
312	$utf8::PVA_abbr_map{gc_sc}{'l&'} = $utf8::PVA_abbr_map{gc_sc}{lc};
313
314	}
315
316
317	##
318	## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
319	##
320	## Called like:
321	## New_Prop(In => 'Greek', $Table, Desc => 'Greek Block', Fuzzy => 1);
322	##
323	## Normally, these parameters are set when the Table is created (when the
324	## Table->New constructor is called), but there are times when it needs to
325	## be done after-the-fact...)
326	##
327	sub New_Prop($$$@)
328	{
329	my $Type = shift; ## "Is" or "In";
330	my $Name = shift;
331	my $Table = shift;
332
333	## remaining args are optional key/val
334	my %Args = @_;
335
336	my $Fuzzy = delete $Args{Fuzzy};
337	my $Desc = delete $Args{Desc}; # description
338
339	$Name = CanonicalName($Name) if $Fuzzy;
340
341	## sanity check a few args
342	if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
343	confess "$0: bad args to New_Prop"
344	}
345
346	if (not $TableInfo{$Type}->{$Name})
347	{
348	$TableInfo{$Type}->{$Name} = $Table;
349	$TableDesc{$Type}->{$Name} = $Desc;
350	if ($Fuzzy) {
351	$FuzzyNames{$Type}->{$Name} = $Name;
352	}
353	}
354	}
355
356
357	##
358	## Creates a new Table object.
359	##
360	## Args are key/value pairs:
361	## In => Name -- Name of "In" property to be associated with
362	## Is => Name -- Name of "Is" property to be associated with
363	## Fuzzy => Boolean -- True if name can be accessed "fuzzily"
364	## Desc => String -- Description of the property
365	##
366	## No args are required.
367	##
368	sub Table::New
369	{
370	my $class = shift;
371	my %Args = @_;
372
373	my $Table = bless [], $class;
374
375	my $Fuzzy = delete $Args{Fuzzy};
376	my $Desc = delete $Args{Desc};
377
378	for my $Type ('Is', 'In')
379	{
380	if (my $Name = delete $Args{$Type}) {
381	New_Prop($Type => $Name, $Table, Desc => $Desc, Fuzzy => $Fuzzy);
382	}
383	}
384
385	## shouldn't have any left over
386	if (%Args) {
387	confess "$0: bad args to Table->New"
388	}
389
390	return $Table;
391	}
392
393
394	##
395	## Returns the maximum code point currently in the table.
396	##
397	sub Table::Max
398	{
399	my $last = $_[0]->[-1]; ## last code point
400	confess "oops" unless $last; ## must have code points to have a max
401	return $last->[RANGE_END];
402	}
403
404	##
405	## Replaces the codepoints in the Table with those in the Table given
406	## as an arg. (NOTE: this is not a "deep copy").
407	##
408	sub Table::Replace($$)
409	{
410	my $Table = shift; #self
411	my $New = shift;
412
413	@$Table = @$New;
414	}
415
416	##
417	## Given a new code point, make the last range of the Table extend to
418	## include the new (and all intervening) code points.
419	##
420	## Takes the time to make sure that the extension is valid.
421	##
422	sub Table::Extend
423	{
424	my $Table = shift; #self
425	my $codepoint = shift;
426
427	my $PrevMax = $Table->Max;
428
429	confess "oops ($codepoint <= $PrevMax)" if $codepoint <= $PrevMax;
430
431	$Table->ExtendNoCheck($codepoint);
432	}
433
434
435	##
436	## Given a new code point, make the last range of the Table extend to
437	## include the new (and all intervening) code points.
438	##
439	## Does NOT check that the extension is valid. Assumes that the caller
440	## has already made this check.
441	##
442	sub Table::ExtendNoCheck
443	{
444	## Optmized adding: Assumes $Table and $codepoint as parms
445	$_[0]->[-1]->[RANGE_END] = $_[1];
446	}
447
448	##
449	## Given a code point range start and end (and optional name), blindly
450	## append them to the list of ranges for the Table.
451	##
452	## NOTE: Code points must be added in strictly ascending numeric order.
453	##
454	sub Table::RawAppendRange
455	{
456	my $Table = shift; #self
457	my $start = shift;
458	my $end = shift;
459	my $name = shift;
460	$name = "" if not defined $name; ## warning: $name can be "0"
461
462	push @$Table, [ $start, # RANGE_START
463	$end, # RANGE_END
464	$name ]; # RANGE_NAME
465	}
466
467	##
468	## Given a code point (and optional name), add it to the Table.
469	##
470	## NOTE: Code points must be added in strictly ascending numeric order.
471	##
472	sub Table::Append
473	{
474	my $Table = shift; #self
475	my $codepoint = shift;
476	my $name = shift;
477	$name = "" if not defined $name; ## warning: $name can be "0"
478
479	##
480	## If we've already got a range working, and this code point is the next
481	## one in line, and if the name is the same, just extend the current range.
482	##
483	my $last = $Table->[-1];
484	if ($last
485	and
486	$last->[RANGE_END] == $codepoint - 1
487	and
488	$last->[RANGE_NAME] eq $name)
489	{
490	$Table->ExtendNoCheck($codepoint);
491	}
492	else
493	{
494	$Table->RawAppendRange($codepoint, $codepoint, $name);
495	}
496	}
497
498	##
499	## Given a code point range starting value and ending value (and name),
500	## Add the range to teh Table.
501	##
502	## NOTE: Code points must be added in strictly ascending numeric order.
503	##
504	sub Table::AppendRange
505	{
506	my $Table = shift; #self
507	my $start = shift;
508	my $end = shift;
509	my $name = shift;
510	$name = "" if not defined $name; ## warning: $name can be "0"
511
512	$Table->Append($start, $name);
513	$Table->Extend($end) if $end > $start;
514	}
515
516	##
517	## Return a new Table that represents all code points not in the Table.
518	##
519	sub Table::Invert
520	{
521	my $Table = shift; #self
522
523	my $New = Table->New();
524	my $max = -1;
525	for my $range (@$Table)
526	{
527	my $start = $range->[RANGE_START];
528	my $end = $range->[RANGE_END];
529	if ($start-1 >= $max+1) {
530	$New->AppendRange($max+1, $start-1, "");
531	}
532	$max = $end;
533	}
534	if ($max+1 < $LastUnicodeCodepoint) {
535	$New->AppendRange($max+1, $LastUnicodeCodepoint);
536	}
537	return $New;
538	}
539
540	##
541	## Merges any number of other tables with $self, returning the new table.
542	## (existing tables are not modified)
543	##
544	##
545	## Args may be Tables, or individual code points (as integers).
546	##
547	## Can be called as either a constructor or a method.
548	##
549	sub Table::Merge
550	{
551	shift(@_) if not ref $_[0]; ## if called as a constructor, lose the class
552	my @Tables = @_;
553
554	## Accumulate all records from all tables
555	my @Records;
556	for my $Arg (@Tables)
557	{
558	if (ref $Arg) {
559	## arg is a table -- get its ranges
560	push @Records, @$Arg;
561	} else {
562	## arg is a codepoint, make a range
563	push @Records, [ $Arg, $Arg ]
564	}
565	}
566
567	## sort by range start, with longer ranges coming first.
568	my ($first, @Rest) = sort {
569	($a->[RANGE_START] <=> $b->[RANGE_START])
570	or
571	($b->[RANGE_END] <=> $b->[RANGE_END])
572	} @Records;
573
574	my $New = Table->New();
575
576	## Ensuring the first range is there makes the subsequent loop easier
577	$New->AppendRange($first->[RANGE_START],
578	$first->[RANGE_END]);
579
580	## Fold in records so long as they add new information.
581	for my $set (@Rest)
582	{
583	my $start = $set->[RANGE_START];
584	my $end = $set->[RANGE_END];
585	if ($start > $New->Max) {
586	$New->AppendRange($start, $end);
587	} elsif ($end > $New->Max) {
588	$New->ExtendNoCheck($end);
589	}
590	}
591
592	return $New;
593	}
594
595	##
596	## Given a filename, write a representation of the Table to a file.
597	## May have an optional comment as a 2nd arg.
598	## Filename may actually be an arrayref of directories
599	##
600	sub Table::Write
601	{
602	my $Table = shift; #self
603	my $filename = shift;
604	my $comment = shift;
605
606	my @OUT = $HEADER;
607	if (defined $comment) {
608	$comment =~ s/\s+\Z//;
609	$comment =~ s/^/# /gm;
610	push @OUT, "#\n$comment\n#\n";
611	}
612	push @OUT, "return <<'END';\n";
613
614	for my $set (@$Table)
615	{
616	my $start = $set->[RANGE_START];
617	my $end = $set->[RANGE_END];
618	my $name = $set->[RANGE_NAME];
619
620	if ($start == $end) {
621	push @OUT, sprintf "%04X\t\t%s\n", $start, $name;
622	} else {
623	push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name;
624	}
625	}
626
627	push @OUT, "END\n";
628
629	WriteIfChanged($filename, @OUT);
630	}
631
632	## This used only for making the test script.
633	## helper function
634	sub IsUsable($)
635	{
636	my $code = shift;
637	return 0 if $code <= 0x0000; ## don't use null
638	return 0 if $code >= $LastUnicodeCodepoint; ## keep in range
639	return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates
640	return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good
641	return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good
642	return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good
643	return 1;
644	}
645
646	## Return a code point that's part of the table.
647	## Returns nothing if the table is empty (or covers only surrogates).
648	## This used only for making the test script.
649	sub Table::ValidCode
650	{
651	my $Table = shift; #self
652	for my $set (@$Table) {
653	return $set->[RANGE_END] if IsUsable($set->[RANGE_END]);
654	}
655	return ();
656	}
657
658	## Return a code point that's not part of the table
659	## Returns nothing if the table covers all code points.
660	## This used only for making the test script.
661	sub Table::InvalidCode
662	{
663	my $Table = shift; #self
664
665	return 0x1234 if not @$Table;
666
667	for my $set (@$Table)
668	{
669	if (IsUsable($set->[RANGE_END] + 1))
670	{
671	return $set->[RANGE_END] + 1;
672	}
673
674	if (IsUsable($set->[RANGE_START] - 1))
675	{
676	return $set->[RANGE_START] - 1;
677	}
678	}
679	return ();
680	}
681
682	###########################################################################
683	###########################################################################
684	###########################################################################
685
686
687	##
688	## Called like:
689	## New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);
690	##
691	## The args must be in that order, although the Fuzzy pair may be omitted.
692	##
693	## This creates 'IsAll' as an alias for 'IsAny'
694	##
695	sub New_Alias($$$@)
696	{
697	my $Type = shift; ## "Is" or "In"
698	my $Alias = shift;
699	my $SameAs = shift; # expecting "SameAs" -- just ignored
700	my $Name = shift;
701
702	## remaining args are optional key/val
703	my %Args = @_;
704
705	my $Fuzzy = delete $Args{Fuzzy};
706
707	## sanity check a few args
708	if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') {
709	confess "$0: bad args to New_Alias"
710	}
711
712	$Alias = CanonicalName($Alias) if $Fuzzy;
713
714	if (not $TableInfo{$Type}->{$Name})
715	{
716	my $CName = CanonicalName($Name);
717	if ($TableInfo{$Type}->{$CName}) {
718	confess "$0: Use canonical form '$CName' instead of '$Name' for alias.";
719	} else {
720	confess "$0: don't have original $Type => $Name to make alias\n";
721	}
722	}
723	if ($TableInfo{$Alias}) {
724	confess "$0: already have original $Type => $Alias; can't make alias";
725	}
726	$AliasInfo{$Type}->{$Name} = $Alias;
727	if ($Fuzzy) {
728	$FuzzyNames{$Type}->{$Alias} = $Name;
729	}
730
731	}
732
733
734	## All assigned code points
735	my $Assigned = Table->New(Is => 'Assigned',
736	Desc => "All assigned code points",
737	Fuzzy => 0);
738
739	my $Name = Table->New(); ## all characters, individually by name
740	my $General = Table->New(); ## all characters, grouped by category
741	my %General;
742	my %Cat;
743
744	## Simple Data::Dumper alike. Good enough for our needs. We can't use the real
745	## thing as we have to run under miniperl
746	sub simple_dumper {
747	my @lines;
748	my $item;
749	foreach $item (@_) {
750	if (ref $item) {
751	if (ref $item eq 'ARRAY') {
752	push @lines, "[\n", simple_dumper (@$item), "],\n";
753	} elsif (ref $item eq 'HASH') {
754	push @lines, "{\n", simple_dumper (%$item), "},\n";
755	} else {
756	die "Can't cope with $item";
757	}
758	} else {
759	if (defined $item) {
760	my $copy = $item;
761	$copy =~ s/([\'\\])/\\$1/gs;
762	push @lines, "'$copy',\n";
763	} else {
764	push @lines, "undef,\n";
765	}
766	}
767	}
768	@lines;
769	}
770
771	##
772	## Process UnicodeData.txt (Categories, etc.)
773	##
774	sub UnicodeData_Txt()
775	{
776	my $Bidi = Table->New();
777	my $Deco = Table->New();
778	my $Comb = Table->New();
779	my $Number = Table->New();
780	my $Mirrored = Table->New();#Is => 'Mirrored',
781	#Desc => "Mirrored in bidirectional text",
782	#Fuzzy => 0);
783
784	my %DC;
785	my %Bidi;
786	my %Number;
787	$DC{can} = Table->New();
788	$DC{com} = Table->New();
789
790	## Initialize Perl-generated categories
791	## (Categories from UnicodeData.txt are auto-initialized in gencat)
792	$Cat{Alnum} =
793	Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0);
794	$Cat{Alpha} =
795	Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0);
796	$Cat{ASCII} =
797	Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0);
798	$Cat{Blank} =
799	Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0);
800	$Cat{Cntrl} =
801	Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0);
802	$Cat{Digit} =
803	Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0);
804	$Cat{Graph} =
805	Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0);
806	$Cat{Lower} =
807	Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0);
808	$Cat{Print} =
809	Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0);
810	$Cat{Punct} =
811	Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0);
812	$Cat{Space} =
813	Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0);
814	$Cat{Title} =
815	Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0);
816	$Cat{Upper} =
817	Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0);
818	$Cat{XDigit} =
819	Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0);
820	$Cat{Word} =
821	Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
822	$Cat{SpacePerl} =
823	Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
824
825	my %To;
826	$To{Upper} = Table->New();
827	$To{Lower} = Table->New();
828	$To{Title} = Table->New();
829	$To{Digit} = Table->New();
830
831	sub gencat($$$$)
832	{
833	my ($name, ## Name ("LATIN CAPITAL LETTER A")
834	$cat, ## Category ("Lu", "Zp", "Nd", etc.)
835	$code, ## Code point (as an integer)
836	$op) = @_;
837
838	my $MajorCat = substr($cat, 0, 1); ## L, M, Z, S, etc
839
840	$Assigned->$op($code);
841	$Name->$op($code, $name);
842	$General->$op($code, $cat);
843
844	## add to the sub category (e.g. "Lu", "Nd", "Cf", ..)
845	$Cat{$cat} \|\|= Table->New(Is => $cat,
846	Desc => "General Category '$cat'",
847	Fuzzy => 0);
848	$Cat{$cat}->$op($code);
849
850	## add to the major category (e.g. "L", "N", "C", ...)
851	$Cat{$MajorCat} \|\|= Table->New(Is => $MajorCat,
852	Desc => "Major Category '$MajorCat'",
853	Fuzzy => 0);
854	$Cat{$MajorCat}->$op($code);
855
856	($General{$name} \|\|= Table->New)->$op($code, $name);
857
858	# 005F: SPACING UNDERSCORE
859	$Cat{Word}->$op($code) if $cat =~ /^[LMN]\|Pc/;
860	$Cat{Alnum}->$op($code) if $cat =~ /^[LM]\|Nd/;
861	$Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
862
863	my $isspace =
864	($cat =~ /Zs\|Zl\|Zp/ &&
865	$code != 0x200B) # 200B is ZWSP which is for line break control
866	# and therefore it is not part of "space" even while it is "Zs".
867	\|\| $code == 0x0009 # 0009: HORIZONTAL TAB
868	\|\| $code == 0x000A # 000A: LINE FEED
869	\|\| $code == 0x000B # 000B: VERTICAL TAB
870	\|\| $code == 0x000C # 000C: FORM FEED
871	\|\| $code == 0x000D # 000D: CARRIAGE RETURN
872	\|\| $code == 0x0085 # 0085: NEL
873
874	;
875
876	$Cat{Space}->$op($code) if $isspace;
877
878	$Cat{SpacePerl}->$op($code) if $isspace
879	&& $code != 0x000B; # Backward compat.
880
881	$Cat{Blank}->$op($code) if $isspace
882	&& !($code == 0x000A \|\|
883	$code == 0x000B \|\|
884	$code == 0x000C \|\|
885	$code == 0x000D \|\|
886	$code == 0x0085 \|\|
887	$cat =~ /^Z[lp]/);
888
889	$Cat{Digit}->$op($code) if $cat eq "Nd";
890	$Cat{Upper}->$op($code) if $cat eq "Lu";
891	$Cat{Lower}->$op($code) if $cat eq "Ll";
892	$Cat{Title}->$op($code) if $cat eq "Lt";
893	$Cat{ASCII}->$op($code) if $code <= 0x007F;
894	$Cat{Cntrl}->$op($code) if $cat =~ /^C/;
895	my $isgraph = !$isspace && $cat !~ /Cc\|Cs\|Cn/;
896	$Cat{Graph}->$op($code) if $isgraph;
897	$Cat{Print}->$op($code) if $isgraph \|\| $isspace;
898	$Cat{Punct}->$op($code) if $cat =~ /^P/;
899
900	$Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9
901	\|\| ($code >= 0x41 && $code <= 0x46) ## A..F
902	\|\| ($code >= 0x61 && $code <= 0x66); ## a..f
903	}
904
905	## open ane read file.....
906	if (not open IN, "UnicodeData.txt") {
907	die "$0: UnicodeData.txt: $!\n";
908	}
909
910	##
911	## For building \p{_CombAbove} and \p{_CanonDCIJ}
912	##
913	my %_Above_HexCodes; ## Hexcodes for chars with $comb == 230 ("ABOVE")
914
915	my %CodeToDeco; ## Maps code to decomp. list for chars with first
916	## decomp. char an "i" or "j" (for \p{_CanonDCIJ})
917
918	## This is filled in as we go....
919	my $CombAbove = Table->New(Is => '_CombAbove',
920	Desc => '(for internal casefolding use)',
921	Fuzzy => 0);
922
923	while (<IN>)
924	{
925	next unless /^[0-9A-Fa-f]+;/;
926	s/\s+$//;
927
928	my ($hexcode, ## code point in hex (e.g. "0041")
929	$name, ## character name (e.g. "LATIN CAPITAL LETTER A")
930	$cat, ## category (e.g. "Lu")
931	$comb, ## Canonical combining class (e.t. "230")
932	$bidi, ## directional category (e.g. "L")
933	$deco, ## decomposition mapping
934	$decimal, ## decimal digit value
935	$digit, ## digit value
936	$number, ## numeric value
937	$mirrored, ## mirrored
938	$unicode10, ## name in Unicode 1.0
939	$comment, ## comment field
940	$upper, ## uppercase mapping
941	$lower, ## lowercase mapping
942	$title, ## titlecase mapping
943	) = split(/\s;\s/);
944
945	# Note that in Unicode 3.2 there will be names like
946	# LINE FEED (LF), which probably means that \N{} needs
947	# to cope also with LINE FEED and LF.
948	$name = $unicode10 if $name eq '<control>' && $unicode10 ne '';
949
950	my $code = hex($hexcode);
951
952	if ($comb and $comb == 230) {
953	$CombAbove->Append($code);
954	$_Above_HexCodes{$hexcode} = 1;
955	}
956
957	## Used in building \p{_CanonDCIJ}
958	if ($deco and $deco =~ m/^006[9A]\b/) {
959	$CodeToDeco{$code} = $deco;
960	}
961
962	##
963	## There are a few pairs of lines like:
964	## AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
965	## D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
966	## that define ranges.
967	##
968	if ($name =~ /^<(.+), (First\|Last)>$/)
969	{
970	$name = $1;
971	gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend');
972	#New_Prop(In => $name, $General{$name}, Fuzzy => 1);
973	}
974	else
975	{
976	## normal (single-character) lines
977	gencat($name, $cat, $code, 'Append');
978
979	# No Append() here since since several codes may map into one.
980	$To{Upper}->RawAppendRange($code, $code, $upper) if $upper;
981	$To{Lower}->RawAppendRange($code, $code, $lower) if $lower;
982	$To{Title}->RawAppendRange($code, $code, $title) if $title;
983	$To{Digit}->Append($code, $decimal) if length $decimal;
984
985	$Bidi->Append($code, $bidi);
986	$Comb->Append($code, $comb) if $comb;
987	$Number->Append($code, $number) if length $number;
988
989	length($decimal) and ($Number{De} \|\|= Table->New())->Append($code)
990	or
991	length($digit) and ($Number{Di} \|\|= Table->New())->Append($code)
992	or
993	length($number) and ($Number{Nu} \|\|= Table->New())->Append($code);
994
995	$Mirrored->Append($code) if $mirrored eq "Y";
996
997	$Bidi{$bidi} \|\|= Table->New();#Is => "bt/$bidi",
998	#Desc => "Bi-directional category '$bidi'",
999	#Fuzzy => 0);
1000	$Bidi{$bidi}->Append($code);
1001
1002	if ($deco)
1003	{
1004	$Deco->Append($code, $deco);
1005	if ($deco =~/^<(\w+)>/)
1006	{
1007	my $dshort = $PVA_reverse{dt}{ucfirst lc $1};
1008	$DC{com}->Append($code);
1009
1010	$DC{$dshort} \|\|= Table->New();
1011	$DC{$dshort}->Append($code);
1012	}
1013	else
1014	{
1015	$DC{can}->Append($code);
1016	}
1017	}
1018	}
1019	}
1020	close IN;
1021
1022	##
1023	## Tidy up a few special cases....
1024	##
1025
1026	$Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist
1027	New_Prop(Is => 'Cn',
1028	$Cat{Cn},
1029	Desc => "General Category 'Cn' [not functional in Perl]",
1030	Fuzzy => 0);
1031
1032	## Unassigned is the same as 'Cn'
1033	New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0);
1034
1035	$Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C
1036
1037
1038	# LC is Ll, Lu, and Lt.
1039	# (used to be L& or L_, but PropValueAliases.txt defines it as LC)
1040	New_Prop(Is => 'LC',
1041	Table->Merge(@Cat{qw[Ll Lu Lt]}),
1042	Desc => '[\p{Ll}\p{Lu}\p{Lt}]',
1043	Fuzzy => 0);
1044
1045	## Any and All are all code points.
1046	my $Any = Table->New(Is => 'Any',
1047	Desc => sprintf("[\\x{0000}-\\x{%X}]",
1048	$LastUnicodeCodepoint),
1049	Fuzzy => 0);
1050	$Any->RawAppendRange(0, $LastUnicodeCodepoint);
1051
1052	New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0);
1053
1054	##
1055	## Build special properties for Perl's internal case-folding needs:
1056	## \p{_CaseIgnorable}
1057	## \p{_CanonDCIJ}
1058	## \p{_CombAbove}
1059	## _CombAbove was built above. Others are built here....
1060	##
1061
1062	## \p{_CaseIgnorable} is [\p{Mn}\0x00AD\x2010]
1063	New_Prop(Is => '_CaseIgnorable',
1064	Table->Merge($Cat{Mn},
1065	0x00AD, #SOFT HYPHEN
1066	0x2010), #HYPHEN
1067	Desc => '(for internal casefolding use)',
1068	Fuzzy => 0);
1069
1070
1071	## \p{_CanonDCIJ} is fairly complex...
1072	my $CanonCDIJ = Table->New(Is => '_CanonDCIJ',
1073	Desc => '(for internal casefolding use)',
1074	Fuzzy => 0);
1075	## It contains the ASCII 'i' and 'j'....
1076	$CanonCDIJ->Append(0x0069); # ASCII ord("i")
1077	$CanonCDIJ->Append(0x006A); # ASCII ord("j")
1078	## ...and any character with a decomposition that starts with either of
1079	## those code points, but only if the decomposition does not have any
1080	## combining character with the "ABOVE" canonical combining class.
1081	for my $code (sort { $a <=> $b} keys %CodeToDeco)
1082	{
1083	## Need to ensure that all decomposition characters do not have
1084	## a %HexCodeToComb in %AboveCombClasses.
1085	my $want = 1;
1086	for my $deco_hexcode (split / /, $CodeToDeco{$code})
1087	{
1088	if (exists $_Above_HexCodes{$deco_hexcode}) {
1089	## one of the decmposition chars has an ABOVE combination
1090	## class, so we're not interested in this one
1091	$want = 0;
1092	last;
1093	}
1094	}
1095	if ($want) {
1096	$CanonCDIJ->Append($code);
1097	}
1098	}
1099
1100
1101
1102	##
1103	## Now dump the files.
1104	##
1105	$Name->Write("Name.pl");
1106
1107	{
1108	my @PVA = $HEADER;
1109	foreach my $name (qw (PropertyAlias PA_reverse PropValueAlias
1110	PVA_reverse PVA_abbr_map)) {
1111	# Should I really jump through typeglob hoops just to avoid a
1112	# symbolic reference? (%{"utf8::$name})
1113	push @PVA, "\n", "\%utf8::$name = (\n",
1114	simple_dumper (%{$utf8::{$name}}), ");\n";
1115	}
1116	push @PVA, "1;\n";
1117	WriteIfChanged("PVA.pl", @PVA);
1118	}
1119
1120	# $Bidi->Write("Bidirectional.pl");
1121	for (keys %Bidi) {
1122	$Bidi{$_}->Write(
1123	["lib","bc","$_.pl"],
1124	"BidiClass category '$PropValueAlias{bc}{$_}'"
1125	);
1126	}
1127
1128	$Comb->Write("CombiningClass.pl");
1129	for (keys %{ $PropValueAlias{ccc} }) {
1130	my ($code, $name) = @{ $PropValueAlias{ccc}{$_} };
1131	(my $c = Table->New())->Append($code);
1132	$c->Write(
1133	["lib","ccc","$_.pl"],
1134	"CombiningClass category '$name'"
1135	);
1136	}
1137
1138	$Deco->Write("Decomposition.pl");
1139	for (keys %DC) {
1140	$DC{$_}->Write(
1141	["lib","dt","$_.pl"],
1142	"DecompositionType category '$PropValueAlias{dt}{$_}'"
1143	);
1144	}
1145
1146	# $Number->Write("Number.pl");
1147	for (keys %Number) {
1148	$Number{$_}->Write(
1149	["lib","nt","$_.pl"],
1150	"NumericType category '$PropValueAlias{nt}{$_}'"
1151	);
1152	}
1153
1154	# $General->Write("Category.pl");
1155
1156	for my $to (sort keys %To) {
1157	$To{$to}->Write(["To","$to.pl"]);
1158	}
1159
1160	for (keys %{ $PropValueAlias{gc} }) {
1161	New_Alias(Is => $PropValueAlias{gc}{$_}, SameAs => $_, Fuzzy => 1);
1162	}
1163	}
1164
1165	##
1166	## Process LineBreak.txt
1167	##
1168	sub LineBreak_Txt()
1169	{
1170	if (not open IN, "LineBreak.txt") {
1171	die "$0: LineBreak.txt: $!\n";
1172	}
1173
1174	my $Lbrk = Table->New();
1175	my %Lbrk;
1176
1177	while (<IN>)
1178	{
1179	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s;\s(\w+)/;
1180
1181	my ($first, $last, $lbrk) = (hex($1), hex($2\|\|""), $3);
1182
1183	$Lbrk->Append($first, $lbrk);
1184
1185	$Lbrk{$lbrk} \|\|= Table->New();
1186	$Lbrk{$lbrk}->Append($first);
1187
1188	if ($last) {
1189	$Lbrk->Extend($last);
1190	$Lbrk{$lbrk}->Extend($last);
1191	}
1192	}
1193	close IN;
1194
1195	# $Lbrk->Write("Lbrk.pl");
1196
1197
1198	for (keys %Lbrk) {
1199	$Lbrk{$_}->Write(
1200	["lib","lb","$_.pl"],
1201	"Linebreak category '$PropValueAlias{lb}{$_}'"
1202	);
1203	}
1204	}
1205
1206	##
1207	## Process ArabicShaping.txt.
1208	##
1209	sub ArabicShaping_txt()
1210	{
1211	if (not open IN, "ArabicShaping.txt") {
1212	die "$0: ArabicShaping.txt: $!\n";
1213	}
1214
1215	my $ArabLink = Table->New();
1216	my $ArabLinkGroup = Table->New();
1217
1218	my %JoinType;
1219
1220	while (<IN>)
1221	{
1222	next unless /^[0-9A-Fa-f]+;/;
1223	s/\s+$//;
1224
1225	my ($hexcode, $name, $link, $linkgroup) = split(/\s;\s/);
1226	my $code = hex($hexcode);
1227	$ArabLink->Append($code, $link);
1228	$ArabLinkGroup->Append($code, $linkgroup);
1229
1230	$JoinType{$link} \|\|= Table->New(Is => "JoinType$link");
1231	$JoinType{$link}->Append($code);
1232	}
1233	close IN;
1234
1235	# $ArabLink->Write("ArabLink.pl");
1236	# $ArabLinkGroup->Write("ArabLnkGrp.pl");
1237
1238
1239	for (keys %JoinType) {
1240	$JoinType{$_}->Write(
1241	["lib","jt","$_.pl"],
1242	"JoiningType category '$PropValueAlias{jt}{$_}'"
1243	);
1244	}
1245	}
1246
1247	##
1248	## Process EastAsianWidth.txt.
1249	##
1250	sub EastAsianWidth_txt()
1251	{
1252	if (not open IN, "EastAsianWidth.txt") {
1253	die "$0: EastAsianWidth.txt: $!\n";
1254	}
1255
1256	my %EAW;
1257
1258	while (<IN>)
1259	{
1260	next unless /^[0-9A-Fa-f]+(\.\.[0-9A-Fa-f]+)?;/;
1261	s/#.*//;
1262	s/\s+$//;
1263
1264	my ($hexcodes, $pv) = split(/\s;\s/);
1265	$EAW{$pv} \|\|= Table->New(Is => "EastAsianWidth$pv");
1266	my ($start, $end) = split(/\.\./, $hexcodes);
1267	if (defined $end) {
1268	$EAW{$pv}->AppendRange(hex($start), hex($end));
1269	} else {
1270	$EAW{$pv}->Append(hex($start));
1271	}
1272	}
1273	close IN;
1274
1275
1276	for (keys %EAW) {
1277	$EAW{$_}->Write(
1278	["lib","ea","$_.pl"],
1279	"EastAsianWidth category '$PropValueAlias{ea}{$_}'"
1280	);
1281	}
1282	}
1283
1284	##
1285	## Process HangulSyllableType.txt.
1286	##
1287	sub HangulSyllableType_txt()
1288	{
1289	if (not open IN, "HangulSyllableType.txt") {
1290	die "$0: HangulSyllableType.txt: $!\n";
1291	}
1292
1293	my %HST;
1294
1295	while (<IN>)
1296	{
1297	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s;\s(\w+)/;
1298	my ($first, $last, $pv) = (hex($1), hex($2\|\|""), $3);
1299
1300	$HST{$pv} \|\|= Table->New(Is => "HangulSyllableType$pv");
1301	$HST{$pv}->Append($first);
1302
1303	if ($last) { $HST{$pv}->Extend($last) }
1304	}
1305	close IN;
1306
1307	for (keys %HST) {
1308	$HST{$_}->Write(
1309	["lib","hst","$_.pl"],
1310	"HangulSyllableType category '$PropValueAlias{hst}{$_}'"
1311	);
1312	}
1313	}
1314
1315	##
1316	## Process Jamo.txt.
1317	##
1318	sub Jamo_txt()
1319	{
1320	if (not open IN, "Jamo.txt") {
1321	die "$0: Jamo.txt: $!\n";
1322	}
1323	my $Short = Table->New();
1324
1325	while (<IN>)
1326	{
1327	next unless /^([0-9A-Fa-f]+)\s;\s(\w*)/;
1328	my ($code, $short) = (hex($1), $2);
1329
1330	$Short->Append($code, $short);
1331	}
1332	close IN;
1333	# $Short->Write("JamoShort.pl");
1334	}
1335
1336	##
1337	## Process Scripts.txt.
1338	##
1339	sub Scripts_txt()
1340	{
1341	my @ScriptInfo;
1342
1343	if (not open(IN, "Scripts.txt")) {
1344	die "$0: Scripts.txt: $!\n";
1345	}
1346	while (<IN>) {
1347	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s;\s(.+?)\s*\#/;
1348
1349	# Wait until all the scripts have been read since
1350	# they are not listed in numeric order.
1351	push @ScriptInfo, [ hex($1), hex($2\|\|""), $3 ];
1352	}
1353	close IN;
1354
1355	# Now append the scripts properties in their code point order.
1356
1357	my %Script;
1358	my $Scripts = Table->New();
1359
1360	for my $script (sort { $a->[0] <=> $b->[0] } @ScriptInfo)
1361	{
1362	my ($first, $last, $name) = @$script;
1363	$Scripts->Append($first, $name);
1364
1365	$Script{$name} \|\|= Table->New(Is => $name,
1366	Desc => "Script '$name'",
1367	Fuzzy => 1);
1368	$Script{$name}->Append($first, $name);
1369
1370	if ($last) {
1371	$Scripts->Extend($last);
1372	$Script{$name}->Extend($last);
1373	}
1374	}
1375
1376	# $Scripts->Write("Scripts.pl");
1377
1378	## Common is everything not explicitly assigned to a Script
1379	##
1380	## *shouldn't this be intersected with \p{Assigned}? ****
1381	##
1382	New_Prop(Is => 'Common',
1383	$Scripts->Invert,
1384	Desc => 'Pseudo-Script of codepoints not in other Unicode scripts',
1385	Fuzzy => 1);
1386	}
1387
1388	##
1389	## Given a name like "Close Punctuation", return a regex (that when applied
1390	## with /i) matches any valid form of that name (e.g. "ClosePunctuation",
1391	## "Close-Punctuation", etc.)
1392	##
1393	## Accept any space, dash, or underbar where in the official name there is
1394	## space or a dash (or underbar, but there never is).
1395	##
1396	##
1397	sub NameToRegex($)
1398	{
1399	my $Name = shift;
1400	$Name =~ s/[- _]/(?:[-_]\|\\s+)?/g;
1401	return $Name;
1402	}
1403
1404	##
1405	## Process Blocks.txt.
1406	##
1407	sub Blocks_txt()
1408	{
1409	my $Blocks = Table->New();
1410	my %Blocks;
1411
1412	if (not open IN, "Blocks.txt") {
1413	die "$0: Blocks.txt: $!\n";
1414	}
1415
1416	while (<IN>)
1417	{
1418	#next if not /Private Use$/;
1419	next if not /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s;\s(.+?)\s*$/;
1420
1421	my ($first, $last, $name) = (hex($1), hex($2), $3);
1422
1423	$Blocks->Append($first, $name);
1424
1425	$Blocks{$name} \|\|= Table->New(In => $name,
1426	Desc => "Block '$name'",
1427	Fuzzy => 1);
1428	$Blocks{$name}->Append($first, $name);
1429
1430	if ($last and $last != $first) {
1431	$Blocks->Extend($last);
1432	$Blocks{$name}->Extend($last);
1433	}
1434	}
1435	close IN;
1436
1437	# $Blocks->Write("Blocks.pl");
1438	}
1439
1440	##
1441	## Read in the PropList.txt. It contains extended properties not
1442	## listed in the UnicodeData.txt, such as 'Other_Alphabetic':
1443	## alphabetic but not of the general category L; many modifiers
1444	## belong to this extended property category: while they are not
1445	## alphabets, they are alphabetic in nature.
1446	##
1447	sub PropList_txt()
1448	{
1449	my @PropInfo;
1450
1451	if (not open IN, "PropList.txt") {
1452	die "$0: PropList.txt: $!\n";
1453	}
1454
1455	while (<IN>)
1456	{
1457	next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s;\s(.+?)\s*\#/;
1458
1459	# Wait until all the extended properties have been read since
1460	# they are not listed in numeric order.
1461	push @PropInfo, [ hex($1), hex($2\|\|""), $3 ];
1462	}
1463	close IN;
1464
1465	# Now append the extended properties in their code point order.
1466	my $Props = Table->New();
1467	my %Prop;
1468
1469	for my $prop (sort { $a->[0] <=> $b->[0] } @PropInfo)
1470	{
1471	my ($first, $last, $name) = @$prop;
1472	$Props->Append($first, $name);
1473
1474	$Prop{$name} \|\|= Table->New(Is => $name,
1475	Desc => "Extended property '$name'",
1476	Fuzzy => 1);
1477	$Prop{$name}->Append($first, $name);
1478
1479	if ($last) {
1480	$Props->Extend($last);
1481	$Prop{$name}->Extend($last);
1482	}
1483	}
1484
1485	for (keys %Prop) {
1486	(my $file = $PA_reverse{$_}) =~ tr/_//d;
1487	# XXX I'm assuming that the names from %Prop don't suffer 8.3 clashes.
1488	$BaseNames{lc $file}++;
1489	$Prop{$_}->Write(
1490	["lib","gc_sc","$file.pl"],
1491	"Binary property '$_'"
1492	);
1493	}
1494
1495	# Alphabetic is L and Other_Alphabetic.
1496	New_Prop(Is => 'Alphabetic',
1497	Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
1498	Desc => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here
1499	Fuzzy => 1);
1500
1501	# Lowercase is Ll and Other_Lowercase.
1502	New_Prop(Is => 'Lowercase',
1503	Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
1504	Desc => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here
1505	Fuzzy => 1);
1506
1507	# Uppercase is Lu and Other_Uppercase.
1508	New_Prop(Is => 'Uppercase',
1509	Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
1510	Desc => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here
1511	Fuzzy => 1);
1512
1513	# Math is Sm and Other_Math.
1514	New_Prop(Is => 'Math',
1515	Table->Merge($Cat{Sm}, $Prop{Other_Math}),
1516	Desc => '[\p{Sm}\p{OtherMath}]', # use canonical names here
1517	Fuzzy => 1);
1518
1519	# ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
1520	New_Prop(Is => 'ID_Start',
1521	Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
1522	Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]',
1523	Fuzzy => 1);
1524
1525	# ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
1526	New_Prop(Is => 'ID_Continue',
1527	Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
1528	Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]',
1529	Fuzzy => 1);
1530	}
1531
1532
1533	##
1534	## These are used in:
1535	## MakePropTestScript()
1536	## WriteAllMappings()
1537	## for making the test script.
1538	##
1539	my %FuzzyNameToTest;
1540	my %ExactNameToTest;
1541
1542
1543	## This used only for making the test script
1544	sub GenTests($$$$)
1545	{
1546	my $FH = shift;
1547	my $Prop = shift;
1548	my $MatchCode = shift;
1549	my $FailCode = shift;
1550
1551	if (defined $MatchCode) {
1552	printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode;
1553	printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode;
1554	printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode;
1555	printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode;
1556	}
1557	if (defined $FailCode) {
1558	printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode;
1559	printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode;
1560	printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode;
1561	printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode;
1562	}
1563	}
1564
1565	## This used only for making the test script
1566	sub ExpectError($$)
1567	{
1568	my $FH = shift;
1569	my $prop = shift;
1570
1571	print $FH qq/Error('\\p{$prop}');\n/;
1572	print $FH qq/Error('\\P{$prop}');\n/;
1573	}
1574
1575	## This used only for making the test script
1576	my @GoodSeps = (
1577	" ",
1578	"-",
1579	" \t ",
1580	"",
1581	"",
1582	"_",
1583	);
1584	my @BadSeps = (
1585	"--",
1586	"__",
1587	" _",
1588	"/"
1589	);
1590
1591	## This used only for making the test script
1592	sub RandomlyFuzzifyName($;$)
1593	{
1594	my $Name = shift;
1595	my $WantError = shift; ## if true, make an error
1596
1597	my @parts;
1598	for my $part (split /[-\s_]+/, $Name)
1599	{
1600	if (@parts) {
1601	if ($WantError and rand() < 0.3) {
1602	push @parts, $BadSeps[rand(@BadSeps)];
1603	$WantError = 0;
1604	} else {
1605	push @parts, $GoodSeps[rand(@GoodSeps)];
1606	}
1607	}
1608	my $switch = int rand(4);
1609	if ($switch == 0) {
1610	push @parts, uc $part;
1611	} elsif ($switch == 1) {
1612	push @parts, lc $part;
1613	} elsif ($switch == 2) {
1614	push @parts, ucfirst $part;
1615	} else {
1616	push @parts, $part;
1617	}
1618	}
1619	my $new = join('', @parts);
1620
1621	if ($WantError) {
1622	if (rand() >= 0.5) {
1623	$new .= $BadSeps[rand(@BadSeps)];
1624	} else {
1625	$new = $BadSeps[rand(@BadSeps)] . $new;
1626	}
1627	}
1628	return $new;
1629	}
1630
1631	## This used only for making the test script
1632	sub MakePropTestScript()
1633	{
1634	## this written directly -- it's huge.
1635	force_unlink ("TestProp.pl");
1636	if (not open OUT, ">TestProp.pl") {
1637	die "$0: TestProp.pl: $!\n";
1638	}
1639	print OUT <DATA>;
1640
1641	while (my ($Name, $Table) = each %ExactNameToTest)
1642	{
1643	GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode);
1644	ExpectError(*OUT, uc $Name) if uc $Name ne $Name;
1645	ExpectError(*OUT, lc $Name) if lc $Name ne $Name;
1646	}
1647
1648
1649	while (my ($Name, $Table) = each %FuzzyNameToTest)
1650	{
1651	my $Orig = $CanonicalToOrig{$Name};
1652	my %Names = (
1653	$Name => 1,
1654	$Orig => 1,
1655	RandomlyFuzzifyName($Orig) => 1
1656	);
1657
1658	for my $N (keys %Names) {
1659	GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode);
1660	}
1661
1662	ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR'));
1663	}
1664
1665	print OUT "Finished();\n";
1666	close OUT;
1667	}
1668
1669
1670	##
1671	## These are used only in:
1672	## RegisterFileForName()
1673	## WriteAllMappings()
1674	##
1675	my %Exact; ## will become %utf8::Exact;
1676	my %Canonical; ## will become %utf8::Canonical;
1677	my %CaComment; ## Comment for %Canonical entry of same key
1678
1679	##
1680	## Given info about a name and a datafile that it should be associated with,
1681	## register that assocation in %Exact and %Canonical.
1682	sub RegisterFileForName($$$$)
1683	{
1684	my $Type = shift;
1685	my $Name = shift;
1686	my $IsFuzzy = shift;
1687	my $filename = shift;
1688
1689	##
1690	## Now in details for the mapping. $Type eq 'Is' has the
1691	## Is removed, as it will be removed in utf8_heavy when this
1692	## data is being checked. In keeps its "In", but a second
1693	## sans-In record is written if it doesn't conflict with
1694	## anything already there.
1695	##
1696	if (not $IsFuzzy)
1697	{
1698	if ($Type eq 'Is') {
1699	die "oops[$Name]" if $Exact{$Name};
1700	$Exact{$Name} = $filename;
1701	} else {
1702	die "oops[$Type$Name]" if $Exact{"$Type$Name"};
1703	$Exact{"$Type$Name"} = $filename;
1704	$Exact{$Name} = $filename if not $Exact{$Name};
1705	}
1706	}
1707	else
1708	{
1709	my $CName = lc $Name;
1710	if ($Type eq 'Is') {
1711	die "oops[$CName]" if $Canonical{$CName};
1712	$Canonical{$CName} = $filename;
1713	$CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2;
1714	} else {
1715	die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"};
1716	$Canonical{lc "$Type$CName"} = $filename;
1717	$CaComment{lc "$Type$CName"} = "$Type$Name";
1718	if (not $Canonical{$CName}) {
1719	$Canonical{$CName} = $filename;
1720	$CaComment{$CName} = "$Type$Name";
1721	}
1722	}
1723	}
1724	}
1725
1726	##
1727	## Writes the info accumulated in
1728	##
1729	## %TableInfo;
1730	## %FuzzyNames;
1731	## %AliasInfo;
1732	##
1733	##
1734	sub WriteAllMappings()
1735	{
1736	my @MAP;
1737
1738	## 'Is' MUST come first, so its names have precidence over 'In's
1739	for my $Type ('Is', 'In')
1740	{
1741	my %RawNameToFile; ## a per-$Type cache
1742
1743	for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}})
1744	{
1745	## Note: $Name is already canonical
1746	my $Table = $TableInfo{$Type}->{$Name};
1747	my $IsFuzzy = $FuzzyNames{$Type}->{$Name};
1748
1749	## Need an 8.3 safe filename (which means "an 8 safe" $filename)
1750	my $filename;
1751	{
1752	## 'Is' items lose 'Is' from the basename.
1753	$filename = $Type eq 'Is' ?
1754	($PVA_reverse{sc}{$Name} \|\| $Name) :
1755	"$Type$Name";
1756
1757	$filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
1758	substr($filename, 8) = '' if length($filename) > 8;
1759
1760	##
1761	## Make sure the basename doesn't conflict with something we
1762	## might have already written. If we have, say,
1763	## InGreekExtended1
1764	## InGreekExtended2
1765	## they become
1766	## InGreekE
1767	## InGreek2
1768	##
1769	while (my $num = $BaseNames{lc $filename}++)
1770	{
1771	$num++; ## so basenames with numbers start with '2', which
1772	## just looks more natural.
1773	## Want to append $num, but if it'll make the basename longer
1774	## than 8 characters, pre-truncate $filename so that the result
1775	## is acceptable.
1776	my $delta = length($filename) + length($num) - 8;
1777	if ($delta > 0) {
1778	substr($filename, -$delta) = $num;
1779	} else {
1780	$filename .= $num;
1781	}
1782	}
1783	};
1784
1785	##
1786	## Construct a nice comment to add to the file, and build data
1787	## for the "./Properties" file along the way.
1788	##
1789	my $Comment;
1790	{
1791	my $Desc = $TableDesc{$Type}->{$Name} \|\| "";
1792	## get list of names this table is reference by
1793	my @Supported = $Name;
1794	while (my ($Orig, $Alias) = each %{ $AliasInfo{$Type} })
1795	{
1796	if ($Orig eq $Name) {
1797	push @Supported, $Alias;
1798	}
1799	}
1800
1801	my $TypeToShow = $Type eq 'Is' ? "" : $Type;
1802	my $OrigProp;
1803
1804	$Comment = "This file supports:\n";
1805	for my $N (@Supported)
1806	{
1807	my $IsFuzzy = $FuzzyNames{$Type}->{$N};
1808	my $Prop = "\\p{$TypeToShow$Name}";
1809	$OrigProp = $Prop if not $OrigProp; #cache for aliases
1810	if ($IsFuzzy) {
1811	$Comment .= "\t$Prop (and fuzzy permutations)\n";
1812	} else {
1813	$Comment .= "\t$Prop\n";
1814	}
1815	my $MyDesc = ($N eq $Name) ? $Desc : "Alias for $OrigProp ($Desc)";
1816
1817	push @MAP, sprintf("%s %-42s %s\n",
1818	$IsFuzzy ? '*' : ' ', $Prop, $MyDesc);
1819	}
1820	if ($Desc) {
1821	$Comment .= "\nMeaning: $Desc\n";
1822	}
1823
1824	}
1825	##
1826	## Okay, write the file...
1827	##
1828	$Table->Write(["lib","gc_sc","$filename.pl"], $Comment);
1829
1830	## and register it
1831	$RawNameToFile{$Name} = $filename;
1832	RegisterFileForName($Type => $Name, $IsFuzzy, $filename);
1833
1834	if ($IsFuzzy)
1835	{
1836	my $CName = CanonicalName($Type . '_'. $Name);
1837	$FuzzyNameToTest{$Name} = $Table if !$FuzzyNameToTest{$Name};
1838	$FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1839	} else {
1840	$ExactNameToTest{$Name} = $Table;
1841	}
1842
1843	}
1844
1845	## Register aliase info
1846	for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}})
1847	{
1848	my $Alias = $AliasInfo{$Type}->{$Name};
1849	my $IsFuzzy = $FuzzyNames{$Type}->{$Alias};
1850	my $filename = $RawNameToFile{$Name};
1851	die "oops [$Alias]->[$Name]" if not $filename;
1852	RegisterFileForName($Type => $Alias, $IsFuzzy, $filename);
1853
1854	my $Table = $TableInfo{$Type}->{$Name};
1855	die "oops" if not $Table;
1856	if ($IsFuzzy)
1857	{
1858	my $CName = CanonicalName($Type .'_'. $Alias);
1859	$FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias};
1860	$FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
1861	} else {
1862	$ExactNameToTest{$Alias} = $Table;
1863	}
1864	}
1865	}
1866
1867	##
1868	## Write out the property list
1869	##
1870	{
1871	my @OUT = (
1872	"##\n",
1873	"## This file created by $0\n",
1874	"## List of built-in \\p{...}/\\P{...} properties.\n",
1875	"##\n",
1876	"## '*' means name may be 'fuzzy'\n",
1877	"##\n\n",
1878	sort { substr($a,2) cmp substr($b, 2) } @MAP,
1879	);
1880	WriteIfChanged('Properties', @OUT);
1881	}
1882
1883	use Text::Tabs (); ## using this makes the files about half the size
1884
1885	## Write Exact.pl
1886	{
1887	my @OUT = (
1888	$HEADER,
1889	"##\n",
1890	"## Data in this file used by ../utf8_heavy.pl\n",
1891	"##\n\n",
1892	"## Mapping from name to filename in ./lib/gc_sc\n",
1893	"%utf8::Exact = (\n",
1894	);
1895
1896	$Exact{InGreek} = 'InGreekA'; # this is evil kludge
1897	for my $Name (sort keys %Exact)
1898	{
1899	my $File = $Exact{$Name};
1900	$Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1901	my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/);
1902	push @OUT, Text::Tabs::unexpand($Text);
1903	}
1904	push @OUT, ");\n1;\n";
1905
1906	WriteIfChanged('Exact.pl', @OUT);
1907	}
1908
1909	## Write Canonical.pl
1910	{
1911	my @OUT = (
1912	$HEADER,
1913	"##\n",
1914	"## Data in this file used by ../utf8_heavy.pl\n",
1915	"##\n\n",
1916	"## Mapping from lc(canonical name) to filename in ./lib\n",
1917	"%utf8::Canonical = (\n",
1918	);
1919	my $Trail = ""; ## used just to keep the spacing pretty
1920	for my $Name (sort keys %Canonical)
1921	{
1922	my $File = $Canonical{$Name};
1923	if ($CaComment{$Name}) {
1924	push @OUT, "\n" if not $Trail;
1925	push @OUT, " # $CaComment{$Name}\n";
1926	$Trail = "\n";
1927	} else {
1928	$Trail = "";
1929	}
1930	$Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
1931	my $Text = sprintf(" %-41s => %s,\n$Trail", $Name, qq/'$File'/);
1932	push @OUT, Text::Tabs::unexpand($Text);
1933	}
1934	push @OUT, ");\n1\n";
1935	WriteIfChanged('Canonical.pl', @OUT);
1936	}
1937
1938	MakePropTestScript() if $MakeTestScript;
1939	}
1940
1941
1942	sub SpecialCasing_txt()
1943	{
1944	#
1945	# Read in the special cases.
1946	#
1947
1948	my %CaseInfo;
1949
1950	if (not open IN, "SpecialCasing.txt") {
1951	die "$0: SpecialCasing.txt: $!\n";
1952	}
1953	while (<IN>) {
1954	next unless /^[0-9A-Fa-f]+;/;
1955	s/\#.*//;
1956	s/\s+$//;
1957
1958	my ($code, $lower, $title, $upper, $condition) = split(/\s;\s/);
1959
1960	if ($condition) { # not implemented yet
1961	print "# SKIPPING $_\n" if $Verbose;
1962	next;
1963	}
1964
1965	# Wait until all the special cases have been read since
1966	# they are not listed in numeric order.
1967	my $ix = hex($code);
1968	push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ]
1969	unless $code eq $lower;
1970	push @{$CaseInfo{Title}}, [ $ix, $code, $title ]
1971	unless $code eq $title;
1972	push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ]
1973	unless $code eq $upper;
1974	}
1975	close IN;
1976
1977	# Now write out the special cases properties in their code point order.
1978	# Prepend them to the To/{Upper,Lower,Title}.pl.
1979
1980	for my $case (qw(Lower Title Upper))
1981	{
1982	my $NormalCase = do "To/$case.pl" \|\| die "$0: $@\n";
1983
1984	my @OUT =
1985	(
1986	$HEADER, "\n",
1987	"# The key UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
1988	"%utf8::ToSpec$case =\n(\n",
1989	);
1990
1991	for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
1992	my ($ix, $code, $to) = @$prop;
1993	my $tostr =
1994	join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
1995	push @OUT, sprintf qq["%s" => "$tostr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $ix)));
1996	# Remove any single-character mappings for
1997	# the same character since we are going for
1998	# the special casing rules.
1999	$NormalCase =~ s/^$code\t\t\w+\n//m;
2000	}
2001	push @OUT, (
2002	");\n\n",
2003	"return <<'END';\n",
2004	$NormalCase,
2005	"END\n"
2006	);
2007	WriteIfChanged(["To","$case.pl"], @OUT);
2008	}
2009	}
2010
2011	#
2012	# Read in the case foldings.
2013	#
2014	# We will do full case folding, C + F + I (see CaseFolding.txt).
2015	#
2016	sub CaseFolding_txt()
2017	{
2018	if (not open IN, "CaseFolding.txt") {
2019	die "$0: CaseFolding.txt: $!\n";
2020	}
2021
2022	my $Fold = Table->New();
2023	my %Fold;
2024
2025	while (<IN>) {
2026	# Skip status 'S', simple case folding
2027	next unless /^([0-9A-Fa-f]+)\s;\s([CFI])\s;\s([0-9A-Fa-f]+(?: [0-9A-Fa-f]+))\s;/;
2028
2029	my ($code, $status, $fold) = (hex($1), $2, $3);
2030
2031	if ($status eq 'C') { # Common: one-to-one folding
2032	# No append() since several codes may fold into one.
2033	$Fold->RawAppendRange($code, $code, $fold);
2034	} else { # F: full, or I: dotted uppercase I -> dotless lowercase I
2035	$Fold{$code} = $fold;
2036	}
2037	}
2038	close IN;
2039
2040	$Fold->Write("To/Fold.pl");
2041
2042	#
2043	# Prepend the special foldings to the common foldings.
2044	#
2045	my $CommonFold = do "To/Fold.pl" \|\| die "$0: To/Fold.pl: $!\n";
2046
2047	my @OUT =
2048	(
2049	$HEADER, "\n",
2050	"# The ke UTF-8 _bytes_, the value UTF-8 (speed hack)\n",
2051	"%utf8::ToSpecFold =\n(\n",
2052	);
2053	for my $code (sort { $a <=> $b } keys %Fold) {
2054	my $foldstr =
2055	join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
2056	push @OUT, sprintf qq["%s" => "$foldstr",\n], join("", map { sprintf "\\x%02X", $_ } unpack("U0C*", pack("U", $code)));
2057	}
2058	push @OUT, (
2059	");\n\n",
2060	"return <<'END';\n",
2061	$CommonFold,
2062	"END\n",
2063	);
2064
2065	WriteIfChanged(["To","Fold.pl"], @OUT);
2066	}
2067
2068	## Do it....
2069
2070	Build_Aliases();
2071	UnicodeData_Txt();
2072	PropList_txt();
2073
2074	Scripts_txt();
2075	Blocks_txt();
2076
2077	WriteAllMappings();
2078
2079	LineBreak_Txt();
2080	ArabicShaping_txt();
2081	EastAsianWidth_txt();
2082	HangulSyllableType_txt();
2083	Jamo_txt();
2084	SpecialCasing_txt();
2085	CaseFolding_txt();
2086
2087	if ( $FileList and $MakeList ) {
2088
2089	print "Updating '$FileList'\n"
2090	if ($Verbose);
2091
2092	open my $ofh,">",$FileList
2093	or die "Can't write to '$FileList':$!";
2094	print $ofh <<"EOFHEADER";
2095	#
2096	# mktables.lst -- File list for mktables.
2097	#
2098	# Autogenerated on @{[scalar localtime]}
2099	#
2100	# - First section is input files
2101	# (mktables itself is automatically included)
2102	# - Section seperator is /^=+\$/
2103	# - Second section is a list of output files.
2104	# - Lines matching /^\\s*#/ are treated as comments
2105	# which along with blank lines are ignored.
2106	#
2107
2108	# Input files:
2109
2110	EOFHEADER
2111	my @input=("version",glob('*.txt'));
2112	print $ofh "$_\n" for
2113	@input,
2114	"\n=================================\n",
2115	"# Output files:\n",
2116	# special files
2117	"Properties";
2118
2119
2120	require File::Find;
2121	my $count=0;
2122	File::Find::find({
2123	no_chdir=>1,
2124	wanted=>sub {
2125	if (/\.pl$/) {
2126	s!^\./!!;
2127	print $ofh "$_\n";
2128	$count++;
2129	}
2130	},
2131	},".");
2132
2133	print $ofh "\n# ",scalar(@input)," input files\n",
2134	"# ",scalar($count+1)," output files\n\n",
2135	"# End list\n";
2136	close $ofh
2137	or warn "Failed to close $ofh: $!";
2138
2139	print "Filelist has ",scalar(@input)," input files and ",
2140	scalar($count+1)," output files\n"
2141	if $Verbose;
2142	}
2143	print "All done\n" if $Verbose;
2144	exit(0);
2145
2146	## TRAILING CODE IS USED BY MakePropTestScript()
2147	__DATA__
2148	use strict;
2149	use warnings;
2150
2151	my $Tests = 0;
2152	my $Fails = 0;
2153
2154	sub Expect($$$)
2155	{
2156	my $Expect = shift;
2157	my $String = shift;
2158	my $Regex = shift;
2159	my $Line = (caller)[2];
2160
2161	$Tests++;
2162	my $RegObj;
2163	my $result = eval {
2164	$RegObj = qr/$Regex/;
2165	$String =~ $RegObj ? 1 : 0
2166	};
2167
2168	if (not defined $result) {
2169	print "couldn't compile /$Regex/ on $0 line $Line: $@\n";
2170	$Fails++;
2171	} elsif ($result ^ $Expect) {
2172	print "bad result (expected $Expect) on $0 line $Line: $@\n";
2173	$Fails++;
2174	}
2175	}
2176
2177	sub Error($)
2178	{
2179	my $Regex = shift;
2180	$Tests++;
2181	if (eval { 'x' =~ qr/$Regex/; 1 }) {
2182	$Fails++;
2183	my $Line = (caller)[2];
2184	print "expected error for /$Regex/ on $0 line $Line: $@\n";
2185	}
2186	}
2187
2188	sub Finished()
2189	{
2190	if ($Fails == 0) {
2191	print "All $Tests tests passed.\n";
2192	exit(0);
2193	} else {
2194	print "$Tests tests, $Fails failed!\n";
2195	exit(-1);
2196	}
2197	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: