1 |
|
---|
2 | package Locale::Maketext::Guts;
|
---|
3 | BEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; }
|
---|
4 | # Just so we're nice and define SOMETHING in "our" package.
|
---|
5 |
|
---|
6 | package Locale::Maketext;
|
---|
7 | use strict;
|
---|
8 | use vars qw($USE_LITERALS $GUTSPATH);
|
---|
9 |
|
---|
10 | BEGIN {
|
---|
11 | $GUTSPATH = __FILE__;
|
---|
12 | *DEBUG = sub () {0} unless defined &DEBUG;
|
---|
13 | }
|
---|
14 |
|
---|
15 | use utf8;
|
---|
16 |
|
---|
17 | sub _compile {
|
---|
18 | # This big scary routine compiles an entry.
|
---|
19 | # It returns either a coderef if there's brackety bits in this, or
|
---|
20 | # otherwise a ref to a scalar.
|
---|
21 |
|
---|
22 | my $target = ref($_[0]) || $_[0];
|
---|
23 |
|
---|
24 | my(@code);
|
---|
25 | my(@c) = (''); # "chunks" -- scratch.
|
---|
26 | my $call_count = 0;
|
---|
27 | my $big_pile = '';
|
---|
28 | {
|
---|
29 | my $in_group = 0; # start out outside a group
|
---|
30 | my($m, @params); # scratch
|
---|
31 |
|
---|
32 | while($_[1] =~ # Iterate over chunks.
|
---|
33 | m<\G(
|
---|
34 | [^\~\[\]]+ # non-~[] stuff
|
---|
35 | |
|
---|
36 | ~. # ~[, ~], ~~, ~other
|
---|
37 | |
|
---|
38 | \[ # [ presumably opening a group
|
---|
39 | |
|
---|
40 | \] # ] presumably closing a group
|
---|
41 | |
|
---|
42 | ~ # terminal ~ ?
|
---|
43 | |
|
---|
44 | $
|
---|
45 | )>xgs
|
---|
46 | ) {
|
---|
47 | print " \"$1\"\n" if DEBUG > 2;
|
---|
48 |
|
---|
49 | if($1 eq '[' or $1 eq '') { # "[" or end
|
---|
50 | # Whether this is "[" or end, force processing of any
|
---|
51 | # preceding literal.
|
---|
52 | if($in_group) {
|
---|
53 | if($1 eq '') {
|
---|
54 | $target->_die_pointing($_[1], "Unterminated bracket group");
|
---|
55 | } else {
|
---|
56 | $target->_die_pointing($_[1], "You can't nest bracket groups");
|
---|
57 | }
|
---|
58 | } else {
|
---|
59 | if($1 eq '') {
|
---|
60 | print " [end-string]\n" if DEBUG > 2;
|
---|
61 | } else {
|
---|
62 | $in_group = 1;
|
---|
63 | }
|
---|
64 | die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
|
---|
65 | if(length $c[-1]) {
|
---|
66 | # Now actually processing the preceding literal
|
---|
67 | $big_pile .= $c[-1];
|
---|
68 | if($USE_LITERALS and (
|
---|
69 | (ord('A') == 65)
|
---|
70 | ? $c[-1] !~ m<[^\x20-\x7E]>s
|
---|
71 | # ASCII very safe chars
|
---|
72 | : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
|
---|
73 | # EBCDIC very safe chars
|
---|
74 | )) {
|
---|
75 | # normal case -- all very safe chars
|
---|
76 | $c[-1] =~ s/'/\\'/g;
|
---|
77 | push @code, q{ '} . $c[-1] . "',\n";
|
---|
78 | $c[-1] = ''; # reuse this slot
|
---|
79 | } else {
|
---|
80 | push @code, ' $c[' . $#c . "],\n";
|
---|
81 | push @c, ''; # new chunk
|
---|
82 | }
|
---|
83 | }
|
---|
84 | # else just ignore the empty string.
|
---|
85 | }
|
---|
86 |
|
---|
87 | } elsif($1 eq ']') { # "]"
|
---|
88 | # close group -- go back in-band
|
---|
89 | if($in_group) {
|
---|
90 | $in_group = 0;
|
---|
91 |
|
---|
92 | print " --Closing group [$c[-1]]\n" if DEBUG > 2;
|
---|
93 |
|
---|
94 | # And now process the group...
|
---|
95 |
|
---|
96 | if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
|
---|
97 | DEBUG > 2 and print " -- (Ignoring)\n";
|
---|
98 | $c[-1] = ''; # reset out chink
|
---|
99 | next;
|
---|
100 | }
|
---|
101 |
|
---|
102 | #$c[-1] =~ s/^\s+//s;
|
---|
103 | #$c[-1] =~ s/\s+$//s;
|
---|
104 | ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/
|
---|
105 |
|
---|
106 | # A bit of a hack -- we've turned "~,"'s into DELs, so turn
|
---|
107 | # 'em into real commas here.
|
---|
108 | if (ord('A') == 65) { # ASCII, etc
|
---|
109 | foreach($m, @params) { tr/\x7F/,/ }
|
---|
110 | } else { # EBCDIC (1047, 0037, POSIX-BC)
|
---|
111 | # Thanks to Peter Prymmer for the EBCDIC handling
|
---|
112 | foreach($m, @params) { tr/\x07/,/ }
|
---|
113 | }
|
---|
114 |
|
---|
115 | # Special-case handling of some method names:
|
---|
116 | if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
|
---|
117 | # Treat [_1,...] as [,_1,...], etc.
|
---|
118 | unshift @params, $m;
|
---|
119 | $m = '';
|
---|
120 | } elsif($m eq '*') {
|
---|
121 | $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
|
---|
122 | } elsif($m eq '#') {
|
---|
123 | $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
|
---|
124 | }
|
---|
125 |
|
---|
126 | # Most common case: a simple, legal-looking method name
|
---|
127 | if($m eq '') {
|
---|
128 | # 0-length method name means to just interpolate:
|
---|
129 | push @code, ' (';
|
---|
130 | } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
|
---|
131 | and $m !~ m<(?:^|\:)\d>s
|
---|
132 | # exclude starting a (sub)package or symbol with a digit
|
---|
133 | ) {
|
---|
134 | # Yes, it even supports the demented (and undocumented?)
|
---|
135 | # $obj->Foo::bar(...) syntax.
|
---|
136 | $target->_die_pointing(
|
---|
137 | $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
|
---|
138 | 2 + length($c[-1])
|
---|
139 | )
|
---|
140 | if $m =~ m/^SUPER::/s;
|
---|
141 | # Because for SUPER:: to work, we'd have to compile this into
|
---|
142 | # the right package, and that seems just not worth the bother,
|
---|
143 | # unless someone convinces me otherwise.
|
---|
144 |
|
---|
145 | push @code, ' $_[0]->' . $m . '(';
|
---|
146 | } else {
|
---|
147 | # TODO: implement something? or just too icky to consider?
|
---|
148 | $target->_die_pointing(
|
---|
149 | $_[1],
|
---|
150 | "Can't use \"$m\" as a method name in bracket group",
|
---|
151 | 2 + length($c[-1])
|
---|
152 | );
|
---|
153 | }
|
---|
154 |
|
---|
155 | pop @c; # we don't need that chunk anymore
|
---|
156 | ++$call_count;
|
---|
157 |
|
---|
158 | foreach my $p (@params) {
|
---|
159 | if($p eq '_*') {
|
---|
160 | # Meaning: all parameters except $_[0]
|
---|
161 | $code[-1] .= ' @_[1 .. $#_], ';
|
---|
162 | # and yes, that does the right thing for all @_ < 3
|
---|
163 | } elsif($p =~ m<^_(-?\d+)$>s) {
|
---|
164 | # _3 meaning $_[3]
|
---|
165 | $code[-1] .= '$_[' . (0 + $1) . '], ';
|
---|
166 | } elsif($USE_LITERALS and (
|
---|
167 | (ord('A') == 65)
|
---|
168 | ? $p !~ m<[^\x20-\x7E]>s
|
---|
169 | # ASCII very safe chars
|
---|
170 | : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
|
---|
171 | # EBCDIC very safe chars
|
---|
172 | )) {
|
---|
173 | # Normal case: a literal containing only safe characters
|
---|
174 | $p =~ s/'/\\'/g;
|
---|
175 | $code[-1] .= q{'} . $p . q{', };
|
---|
176 | } else {
|
---|
177 | # Stow it on the chunk-stack, and just refer to that.
|
---|
178 | push @c, $p;
|
---|
179 | push @code, ' $c[' . $#c . "], ";
|
---|
180 | }
|
---|
181 | }
|
---|
182 | $code[-1] .= "),\n";
|
---|
183 |
|
---|
184 | push @c, '';
|
---|
185 | } else {
|
---|
186 | $target->_die_pointing($_[1], "Unbalanced ']'");
|
---|
187 | }
|
---|
188 |
|
---|
189 | } elsif(substr($1,0,1) ne '~') {
|
---|
190 | # it's stuff not containing "~" or "[" or "]"
|
---|
191 | # i.e., a literal blob
|
---|
192 | $c[-1] .= $1;
|
---|
193 |
|
---|
194 | } elsif($1 eq '~~') { # "~~"
|
---|
195 | $c[-1] .= '~';
|
---|
196 |
|
---|
197 | } elsif($1 eq '~[') { # "~["
|
---|
198 | $c[-1] .= '[';
|
---|
199 |
|
---|
200 | } elsif($1 eq '~]') { # "~]"
|
---|
201 | $c[-1] .= ']';
|
---|
202 |
|
---|
203 | } elsif($1 eq '~,') { # "~,"
|
---|
204 | if($in_group) {
|
---|
205 | # This is a hack, based on the assumption that no-one will actually
|
---|
206 | # want a DEL inside a bracket group. Let's hope that's it's true.
|
---|
207 | if (ord('A') == 65) { # ASCII etc
|
---|
208 | $c[-1] .= "\x7F";
|
---|
209 | } else { # EBCDIC (cp 1047, 0037, POSIX-BC)
|
---|
210 | $c[-1] .= "\x07";
|
---|
211 | }
|
---|
212 | } else {
|
---|
213 | $c[-1] .= '~,';
|
---|
214 | }
|
---|
215 |
|
---|
216 | } elsif($1 eq '~') { # possible only at string-end, it seems.
|
---|
217 | $c[-1] .= '~';
|
---|
218 |
|
---|
219 | } else {
|
---|
220 | # It's a "~X" where X is not a special character.
|
---|
221 | # Consider it a literal ~ and X.
|
---|
222 | $c[-1] .= $1;
|
---|
223 | }
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | if($call_count) {
|
---|
228 | undef $big_pile; # Well, nevermind that.
|
---|
229 | } else {
|
---|
230 | # It's all literals! Ahwell, that can happen.
|
---|
231 | # So don't bother with the eval. Return a SCALAR reference.
|
---|
232 | return \$big_pile;
|
---|
233 | }
|
---|
234 |
|
---|
235 | die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
|
---|
236 | print scalar(@c), " chunks under closure\n" if DEBUG;
|
---|
237 | if(@code == 0) { # not possible?
|
---|
238 | print "Empty code\n" if DEBUG;
|
---|
239 | return \'';
|
---|
240 | } elsif(@code > 1) { # most cases, presumably!
|
---|
241 | unshift @code, "join '',\n";
|
---|
242 | }
|
---|
243 | unshift @code, "use strict; sub {\n";
|
---|
244 | push @code, "}\n";
|
---|
245 |
|
---|
246 | print @code if DEBUG;
|
---|
247 | my $sub = eval(join '', @code);
|
---|
248 | die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
|
---|
249 | return $sub;
|
---|
250 | }
|
---|
251 |
|
---|
252 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
---|
253 |
|
---|
254 | sub _die_pointing {
|
---|
255 | # This is used by _compile to throw a fatal error
|
---|
256 | my $target = shift; # class name
|
---|
257 | # ...leaving $_[0] the error-causing text, and $_[1] the error message
|
---|
258 |
|
---|
259 | my $i = index($_[0], "\n");
|
---|
260 |
|
---|
261 | my $pointy;
|
---|
262 | my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
|
---|
263 | if($pos < 1) {
|
---|
264 | $pointy = "^=== near there\n";
|
---|
265 | } else { # we need to space over
|
---|
266 | my $first_tab = index($_[0], "\t");
|
---|
267 | if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
|
---|
268 | # No tabs, or the first tab is harmlessly after where we will point to,
|
---|
269 | # AND we're far enough from the margin that we can draw a proper arrow.
|
---|
270 | $pointy = ('=' x $pos) . "^ near there\n";
|
---|
271 | } else {
|
---|
272 | # tabs screw everything up!
|
---|
273 | $pointy = substr($_[0],0,$pos);
|
---|
274 | $pointy =~ tr/\t //cd;
|
---|
275 | # make everything into whitespace, but preseving tabs
|
---|
276 | $pointy .= "^=== near there\n";
|
---|
277 | }
|
---|
278 | }
|
---|
279 |
|
---|
280 | my $errmsg = "$_[1], in\:\n$_[0]";
|
---|
281 |
|
---|
282 | if($i == -1) {
|
---|
283 | # No newline.
|
---|
284 | $errmsg .= "\n" . $pointy;
|
---|
285 | } elsif($i == (length($_[0]) - 1) ) {
|
---|
286 | # Already has a newline at end.
|
---|
287 | $errmsg .= $pointy;
|
---|
288 | } else {
|
---|
289 | # don't bother with the pointy bit, I guess.
|
---|
290 | }
|
---|
291 | Carp::croak( "$errmsg via $target, as used" );
|
---|
292 | }
|
---|
293 |
|
---|
294 | 1;
|
---|
295 |
|
---|