1 | package Filter::Simple;
|
---|
2 |
|
---|
3 | use Text::Balanced ':ALL';
|
---|
4 |
|
---|
5 | use vars qw{ $VERSION @EXPORT };
|
---|
6 |
|
---|
7 | $VERSION = '0.82';
|
---|
8 |
|
---|
9 | use Filter::Util::Call;
|
---|
10 | use Carp;
|
---|
11 |
|
---|
12 | @EXPORT = qw( FILTER FILTER_ONLY );
|
---|
13 |
|
---|
14 |
|
---|
15 | sub import {
|
---|
16 | if (@_>1) { shift; goto &FILTER }
|
---|
17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
|
---|
18 | }
|
---|
19 |
|
---|
20 | sub fail {
|
---|
21 | croak "FILTER_ONLY: ", @_;
|
---|
22 | }
|
---|
23 |
|
---|
24 | my $exql = sub {
|
---|
25 | my @bits = extract_quotelike $_[0], qr//;
|
---|
26 | return unless $bits[0];
|
---|
27 | return \@bits;
|
---|
28 | };
|
---|
29 |
|
---|
30 | my $ncws = qr/\s+/;
|
---|
31 | my $comment = qr/(?<![\$\@%])#.*/;
|
---|
32 | my $ws = qr/(?:$ncws|$comment)+/;
|
---|
33 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
|
---|
34 | my $EOP = qr/\n\n|\Z/;
|
---|
35 | my $CUT = qr/\n=cut.*$EOP/;
|
---|
36 | my $pod_or_DATA = qr/
|
---|
37 | ^=(?:head[1-4]|item) .*? $CUT
|
---|
38 | | ^=pod .*? $CUT
|
---|
39 | | ^=for .*? $EOP
|
---|
40 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
|
---|
41 | | ^__(DATA|END)__\r?\n.*
|
---|
42 | /smx;
|
---|
43 |
|
---|
44 | my %extractor_for = (
|
---|
45 | quotelike => [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ],
|
---|
46 | regex => [ $ws, $pod_or_DATA, $id, $exql ],
|
---|
47 | string => [ $ws, $pod_or_DATA, $id, $exql ],
|
---|
48 | code => [ $ws, { DONT_MATCH => $pod_or_DATA },
|
---|
49 | \&extract_variable,
|
---|
50 | $id, { DONT_MATCH => \&extract_quotelike } ],
|
---|
51 | code_no_comments
|
---|
52 | => [ { DONT_MATCH => $comment },
|
---|
53 | $ncws, { DONT_MATCH => $pod_or_DATA },
|
---|
54 | \&extract_variable,
|
---|
55 | $id, { DONT_MATCH => \&extract_quotelike } ],
|
---|
56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
|
---|
57 | executable_no_comments
|
---|
58 | => [ { DONT_MATCH => $comment },
|
---|
59 | $ncws, { DONT_MATCH => $pod_or_DATA } ],
|
---|
60 | all => [ { MATCH => qr/(?s:.*)/ } ],
|
---|
61 | );
|
---|
62 |
|
---|
63 | my %selector_for = (
|
---|
64 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
|
---|
65 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
|
---|
66 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
|
---|
67 | regex => sub { my ($t)=@_;
|
---|
68 | sub{ref() or return $_;
|
---|
69 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
|
---|
70 | return $_->[0] unless $op =~ /^(qr|m|s)/
|
---|
71 | || !$op && ($ld eq '/' || $ld eq '?');
|
---|
72 | $_ = $pat;
|
---|
73 | $t->(@_);
|
---|
74 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
|
---|
75 | return "$pre$ql";
|
---|
76 | };
|
---|
77 | },
|
---|
78 | string => sub { my ($t)=@_;
|
---|
79 | sub{ref() or return $_;
|
---|
80 | local *args = \@_;
|
---|
81 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
|
---|
82 | return $_->[0] if $op =~ /^(qr|m)/
|
---|
83 | || !$op && ($ld1 eq '/' || $ld1 eq '?');
|
---|
84 | if (!$op || $op eq 'tr' || $op eq 'y') {
|
---|
85 | local *_ = \$str1;
|
---|
86 | $t->(@args);
|
---|
87 | }
|
---|
88 | if ($op =~ /^(tr|y|s)/) {
|
---|
89 | local *_ = \$str2;
|
---|
90 | $t->(@args);
|
---|
91 | }
|
---|
92 | my $result = "$pre$op$ld1$str1$rd1";
|
---|
93 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
|
---|
94 | $result .= "$str2$rd2$flg";
|
---|
95 | return $result;
|
---|
96 | };
|
---|
97 | },
|
---|
98 | );
|
---|
99 |
|
---|
100 |
|
---|
101 | sub gen_std_filter_for {
|
---|
102 | my ($type, $transform) = @_;
|
---|
103 | return sub {
|
---|
104 | my $instr;
|
---|
105 | local @components;
|
---|
106 | for (extract_multiple($_,$extractor_for{$type})) {
|
---|
107 | if (ref()) { push @components, $_; $instr=0 }
|
---|
108 | elsif ($instr) { $components[-1] .= $_ }
|
---|
109 | else { push @components, $_; $instr=1 }
|
---|
110 | }
|
---|
111 | if ($type =~ /^code/) {
|
---|
112 | my $count = 0;
|
---|
113 | local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/;
|
---|
114 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
|
---|
115 | $_ = join "",
|
---|
116 | map { ref $_ ? $;.pack('N',$count++).$; : $_ }
|
---|
117 | @components;
|
---|
118 | @components = grep { ref $_ } @components;
|
---|
119 | $transform->(@_);
|
---|
120 | s/$extractor/${$components[unpack('N',$1)]}/g;
|
---|
121 | }
|
---|
122 | else {
|
---|
123 | my $selector = $selector_for{$type}->($transform);
|
---|
124 | $_ = join "", map $selector->(@_), @components;
|
---|
125 | }
|
---|
126 | }
|
---|
127 | };
|
---|
128 |
|
---|
129 | sub FILTER (&;$) {
|
---|
130 | my $caller = caller;
|
---|
131 | my ($filter, $terminator) = @_;
|
---|
132 | no warnings 'redefine';
|
---|
133 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
|
---|
134 | *{"${caller}::unimport"} = gen_filter_unimport($caller);
|
---|
135 | }
|
---|
136 |
|
---|
137 | sub FILTER_ONLY {
|
---|
138 | my $caller = caller;
|
---|
139 | while (@_ > 1) {
|
---|
140 | my ($what, $how) = splice(@_, 0, 2);
|
---|
141 | fail "Unknown selector: $what"
|
---|
142 | unless exists $extractor_for{$what};
|
---|
143 | fail "Filter for $what is not a subroutine reference"
|
---|
144 | unless ref $how eq 'CODE';
|
---|
145 | push @transforms, gen_std_filter_for($what,$how);
|
---|
146 | }
|
---|
147 | my $terminator = shift;
|
---|
148 |
|
---|
149 | my $multitransform = sub {
|
---|
150 | foreach my $transform ( @transforms ) {
|
---|
151 | $transform->(@_);
|
---|
152 | }
|
---|
153 | };
|
---|
154 | no warnings 'redefine';
|
---|
155 | *{"${caller}::import"} =
|
---|
156 | gen_filter_import($caller,$multitransform,$terminator);
|
---|
157 | *{"${caller}::unimport"} = gen_filter_unimport($caller);
|
---|
158 | }
|
---|
159 |
|
---|
160 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
|
---|
161 |
|
---|
162 | sub gen_filter_import {
|
---|
163 | my ($class, $filter, $terminator) = @_;
|
---|
164 | my %terminator;
|
---|
165 | my $prev_import = *{$class."::import"}{CODE};
|
---|
166 | return sub {
|
---|
167 | my ($imported_class, @args) = @_;
|
---|
168 | my $def_terminator =
|
---|
169 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
|
---|
170 | if (!defined $terminator) {
|
---|
171 | $terminator{terminator} = $def_terminator;
|
---|
172 | }
|
---|
173 | elsif (!ref $terminator || ref $terminator eq 'Regexp') {
|
---|
174 | $terminator{terminator} = $terminator;
|
---|
175 | }
|
---|
176 | elsif (ref $terminator ne 'HASH') {
|
---|
177 | croak "Terminator must be specified as scalar or hash ref"
|
---|
178 | }
|
---|
179 | elsif (!exists $terminator->{terminator}) {
|
---|
180 | $terminator{terminator} = $def_terminator;
|
---|
181 | }
|
---|
182 | filter_add(
|
---|
183 | sub {
|
---|
184 | my ($status, $lastline);
|
---|
185 | my $count = 0;
|
---|
186 | my $data = "";
|
---|
187 | while ($status = filter_read()) {
|
---|
188 | return $status if $status < 0;
|
---|
189 | if ($terminator{terminator} &&
|
---|
190 | m/$terminator{terminator}/) {
|
---|
191 | $lastline = $_;
|
---|
192 | last;
|
---|
193 | }
|
---|
194 | $data .= $_;
|
---|
195 | $count++;
|
---|
196 | $_ = "";
|
---|
197 | }
|
---|
198 | return $count if not $count;
|
---|
199 | $_ = $data;
|
---|
200 | $filter->($imported_class, @args) unless $status < 0;
|
---|
201 | if (defined $lastline) {
|
---|
202 | if (defined $terminator{becomes}) {
|
---|
203 | $_ .= $terminator{becomes};
|
---|
204 | }
|
---|
205 | elsif ($lastline =~ $def_terminator) {
|
---|
206 | $_ .= $lastline;
|
---|
207 | }
|
---|
208 | }
|
---|
209 | return $count;
|
---|
210 | }
|
---|
211 | );
|
---|
212 | if ($prev_import) {
|
---|
213 | goto &$prev_import;
|
---|
214 | }
|
---|
215 | elsif ($class->isa('Exporter')) {
|
---|
216 | $class->export_to_level(1,@_);
|
---|
217 | }
|
---|
218 | }
|
---|
219 | }
|
---|
220 |
|
---|
221 | sub gen_filter_unimport {
|
---|
222 | my ($class) = @_;
|
---|
223 | return sub {
|
---|
224 | filter_del();
|
---|
225 | goto &$prev_unimport if $prev_unimport;
|
---|
226 | }
|
---|
227 | }
|
---|
228 |
|
---|
229 | 1;
|
---|
230 |
|
---|
231 | __END__
|
---|
232 |
|
---|
233 | =head1 NAME
|
---|
234 |
|
---|
235 | Filter::Simple - Simplified source filtering
|
---|
236 |
|
---|
237 |
|
---|
238 | =head1 SYNOPSIS
|
---|
239 |
|
---|
240 | # in MyFilter.pm:
|
---|
241 |
|
---|
242 | package MyFilter;
|
---|
243 |
|
---|
244 | use Filter::Simple;
|
---|
245 |
|
---|
246 | FILTER { ... };
|
---|
247 |
|
---|
248 | # or just:
|
---|
249 | #
|
---|
250 | # use Filter::Simple sub { ... };
|
---|
251 |
|
---|
252 | # in user's code:
|
---|
253 |
|
---|
254 | use MyFilter;
|
---|
255 |
|
---|
256 | # this code is filtered
|
---|
257 |
|
---|
258 | no MyFilter;
|
---|
259 |
|
---|
260 | # this code is not
|
---|
261 |
|
---|
262 |
|
---|
263 | =head1 DESCRIPTION
|
---|
264 |
|
---|
265 | =head2 The Problem
|
---|
266 |
|
---|
267 | Source filtering is an immensely powerful feature of recent versions of Perl.
|
---|
268 | It allows one to extend the language itself (e.g. the Switch module), to
|
---|
269 | simplify the language (e.g. Language::Pythonesque), or to completely recast the
|
---|
270 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
|
---|
271 | the full power of Perl as its own, recursively applied, macro language.
|
---|
272 |
|
---|
273 | The excellent Filter::Util::Call module (by Paul Marquess) provides a
|
---|
274 | usable Perl interface to source filtering, but it is often too powerful
|
---|
275 | and not nearly as simple as it could be.
|
---|
276 |
|
---|
277 | To use the module it is necessary to do the following:
|
---|
278 |
|
---|
279 | =over 4
|
---|
280 |
|
---|
281 | =item 1.
|
---|
282 |
|
---|
283 | Download, build, and install the Filter::Util::Call module.
|
---|
284 | (If you have Perl 5.7.1 or later, this is already done for you.)
|
---|
285 |
|
---|
286 | =item 2.
|
---|
287 |
|
---|
288 | Set up a module that does a C<use Filter::Util::Call>.
|
---|
289 |
|
---|
290 | =item 3.
|
---|
291 |
|
---|
292 | Within that module, create an C<import> subroutine.
|
---|
293 |
|
---|
294 | =item 4.
|
---|
295 |
|
---|
296 | Within the C<import> subroutine do a call to C<filter_add>, passing
|
---|
297 | it either a subroutine reference.
|
---|
298 |
|
---|
299 | =item 5.
|
---|
300 |
|
---|
301 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
|
---|
302 | to "prime" $_ with source code data from the source file that will
|
---|
303 | C<use> your module. Check the status value returned to see if any
|
---|
304 | source code was actually read in.
|
---|
305 |
|
---|
306 | =item 6.
|
---|
307 |
|
---|
308 | Process the contents of $_ to change the source code in the desired manner.
|
---|
309 |
|
---|
310 | =item 7.
|
---|
311 |
|
---|
312 | Return the status value.
|
---|
313 |
|
---|
314 | =item 8.
|
---|
315 |
|
---|
316 | If the act of unimporting your module (via a C<no>) should cause source
|
---|
317 | code filtering to cease, create an C<unimport> subroutine, and have it call
|
---|
318 | C<filter_del>. Make sure that the call to C<filter_read> or
|
---|
319 | C<filter_read_exact> in step 5 will not accidentally read past the
|
---|
320 | C<no>. Effectively this limits source code filters to line-by-line
|
---|
321 | operation, unless the C<import> subroutine does some fancy
|
---|
322 | pre-pre-parsing of the source code it's filtering.
|
---|
323 |
|
---|
324 | =back
|
---|
325 |
|
---|
326 | For example, here is a minimal source code filter in a module named
|
---|
327 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
|
---|
328 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a
|
---|
329 | C<use BANG;> statement (until the next C<no BANG;> statement, if any):
|
---|
330 |
|
---|
331 | package BANG;
|
---|
332 |
|
---|
333 | use Filter::Util::Call ;
|
---|
334 |
|
---|
335 | sub import {
|
---|
336 | filter_add( sub {
|
---|
337 | my $caller = caller;
|
---|
338 | my ($status, $no_seen, $data);
|
---|
339 | while ($status = filter_read()) {
|
---|
340 | if (/^\s*no\s+$caller\s*;\s*?$/) {
|
---|
341 | $no_seen=1;
|
---|
342 | last;
|
---|
343 | }
|
---|
344 | $data .= $_;
|
---|
345 | $_ = "";
|
---|
346 | }
|
---|
347 | $_ = $data;
|
---|
348 | s/BANG\s+BANG/die 'BANG' if \$BANG/g
|
---|
349 | unless $status < 0;
|
---|
350 | $_ .= "no $class;\n" if $no_seen;
|
---|
351 | return 1;
|
---|
352 | })
|
---|
353 | }
|
---|
354 |
|
---|
355 | sub unimport {
|
---|
356 | filter_del();
|
---|
357 | }
|
---|
358 |
|
---|
359 | 1 ;
|
---|
360 |
|
---|
361 | This level of sophistication puts filtering out of the reach of
|
---|
362 | many programmers.
|
---|
363 |
|
---|
364 |
|
---|
365 | =head2 A Solution
|
---|
366 |
|
---|
367 | The Filter::Simple module provides a simplified interface to
|
---|
368 | Filter::Util::Call; one that is sufficient for most common cases.
|
---|
369 |
|
---|
370 | Instead of the above process, with Filter::Simple the task of setting up
|
---|
371 | a source code filter is reduced to:
|
---|
372 |
|
---|
373 | =over 4
|
---|
374 |
|
---|
375 | =item 1.
|
---|
376 |
|
---|
377 | Download and install the Filter::Simple module.
|
---|
378 | (If you have Perl 5.7.1 or later, this is already done for you.)
|
---|
379 |
|
---|
380 | =item 2.
|
---|
381 |
|
---|
382 | Set up a module that does a C<use Filter::Simple> and then
|
---|
383 | calls C<FILTER { ... }>.
|
---|
384 |
|
---|
385 | =item 3.
|
---|
386 |
|
---|
387 | Within the anonymous subroutine or block that is passed to
|
---|
388 | C<FILTER>, process the contents of $_ to change the source code in
|
---|
389 | the desired manner.
|
---|
390 |
|
---|
391 | =back
|
---|
392 |
|
---|
393 | In other words, the previous example, would become:
|
---|
394 |
|
---|
395 | package BANG;
|
---|
396 | use Filter::Simple;
|
---|
397 |
|
---|
398 | FILTER {
|
---|
399 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
400 | };
|
---|
401 |
|
---|
402 | 1 ;
|
---|
403 |
|
---|
404 | Note that the source code is passed as a single string, so any regex that
|
---|
405 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag.
|
---|
406 |
|
---|
407 | =head2 Disabling or changing <no> behaviour
|
---|
408 |
|
---|
409 | By default, the installed filter only filters up to a line consisting of one of
|
---|
410 | the three standard source "terminators":
|
---|
411 |
|
---|
412 | no ModuleName; # optional comment
|
---|
413 |
|
---|
414 | or:
|
---|
415 |
|
---|
416 | __END__
|
---|
417 |
|
---|
418 | or:
|
---|
419 |
|
---|
420 | __DATA__
|
---|
421 |
|
---|
422 | but this can be altered by passing a second argument to C<use Filter::Simple>
|
---|
423 | or C<FILTER> (just remember: there's I<no> comma after the initial block when
|
---|
424 | you use C<FILTER>).
|
---|
425 |
|
---|
426 | That second argument may be either a C<qr>'d regular expression (which is then
|
---|
427 | used to match the terminator line), or a defined false value (which indicates
|
---|
428 | that no terminator line should be looked for), or a reference to a hash
|
---|
429 | (in which case the terminator is the value associated with the key
|
---|
430 | C<'terminator'>.
|
---|
431 |
|
---|
432 | For example, to cause the previous filter to filter only up to a line of the
|
---|
433 | form:
|
---|
434 |
|
---|
435 | GNAB esu;
|
---|
436 |
|
---|
437 | you would write:
|
---|
438 |
|
---|
439 | package BANG;
|
---|
440 | use Filter::Simple;
|
---|
441 |
|
---|
442 | FILTER {
|
---|
443 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
444 | }
|
---|
445 | qr/^\s*GNAB\s+esu\s*;\s*?$/;
|
---|
446 |
|
---|
447 | or:
|
---|
448 |
|
---|
449 | FILTER {
|
---|
450 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
451 | }
|
---|
452 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
|
---|
453 |
|
---|
454 | and to prevent the filter's being turned off in any way:
|
---|
455 |
|
---|
456 | package BANG;
|
---|
457 | use Filter::Simple;
|
---|
458 |
|
---|
459 | FILTER {
|
---|
460 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
461 | }
|
---|
462 | ""; # or: 0
|
---|
463 |
|
---|
464 | or:
|
---|
465 |
|
---|
466 | FILTER {
|
---|
467 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
468 | }
|
---|
469 | { terminator => "" };
|
---|
470 |
|
---|
471 | B<Note that, no matter what you set the terminator pattern to,
|
---|
472 | the actual terminator itself I<must> be contained on a single source line.>
|
---|
473 |
|
---|
474 |
|
---|
475 | =head2 All-in-one interface
|
---|
476 |
|
---|
477 | Separating the loading of Filter::Simple:
|
---|
478 |
|
---|
479 | use Filter::Simple;
|
---|
480 |
|
---|
481 | from the setting up of the filtering:
|
---|
482 |
|
---|
483 | FILTER { ... };
|
---|
484 |
|
---|
485 | is useful because it allows other code (typically parser support code
|
---|
486 | or caching variables) to be defined before the filter is invoked.
|
---|
487 | However, there is often no need for such a separation.
|
---|
488 |
|
---|
489 | In those cases, it is easier to just append the filtering subroutine and
|
---|
490 | any terminator specification directly to the C<use> statement that loads
|
---|
491 | Filter::Simple, like so:
|
---|
492 |
|
---|
493 | use Filter::Simple sub {
|
---|
494 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
495 | };
|
---|
496 |
|
---|
497 | This is exactly the same as:
|
---|
498 |
|
---|
499 | use Filter::Simple;
|
---|
500 | BEGIN {
|
---|
501 | Filter::Simple::FILTER {
|
---|
502 | s/BANG\s+BANG/die 'BANG' if \$BANG/g;
|
---|
503 | };
|
---|
504 | }
|
---|
505 |
|
---|
506 | except that the C<FILTER> subroutine is not exported by Filter::Simple.
|
---|
507 |
|
---|
508 |
|
---|
509 | =head2 Filtering only specific components of source code
|
---|
510 |
|
---|
511 | One of the problems with a filter like:
|
---|
512 |
|
---|
513 | use Filter::Simple;
|
---|
514 |
|
---|
515 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
|
---|
516 |
|
---|
517 | is that it indiscriminately applies the specified transformation to
|
---|
518 | the entire text of your source program. So something like:
|
---|
519 |
|
---|
520 | warn 'BANG BANG, YOU'RE DEAD';
|
---|
521 | BANG BANG;
|
---|
522 |
|
---|
523 | will become:
|
---|
524 |
|
---|
525 | warn 'die 'BANG' if $BANG, YOU'RE DEAD';
|
---|
526 | die 'BANG' if $BANG;
|
---|
527 |
|
---|
528 | It is very common when filtering source to only want to apply the filter
|
---|
529 | to the non-character-string parts of the code, or alternatively to I<only>
|
---|
530 | the character strings.
|
---|
531 |
|
---|
532 | Filter::Simple supports this type of filtering by automatically
|
---|
533 | exporting the C<FILTER_ONLY> subroutine.
|
---|
534 |
|
---|
535 | C<FILTER_ONLY> takes a sequence of specifiers that install separate
|
---|
536 | (and possibly multiple) filters that act on only parts of the source code.
|
---|
537 | For example:
|
---|
538 |
|
---|
539 | use Filter::Simple;
|
---|
540 |
|
---|
541 | FILTER_ONLY
|
---|
542 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
|
---|
543 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
|
---|
544 |
|
---|
545 | The C<"code"> subroutine will only be used to filter parts of the source
|
---|
546 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
|
---|
547 | subroutine only filters Perl quotelikes (including here documents).
|
---|
548 |
|
---|
549 | The full list of alternatives is:
|
---|
550 |
|
---|
551 | =over
|
---|
552 |
|
---|
553 | =item C<"code">
|
---|
554 |
|
---|
555 | Filters only those sections of the source code that are not quotelikes, POD, or
|
---|
556 | C<__DATA__>.
|
---|
557 |
|
---|
558 | =item C<"code_no_comments">
|
---|
559 |
|
---|
560 | Filters only those sections of the source code that are not quotelikes, POD,
|
---|
561 | comments, or C<__DATA__>.
|
---|
562 |
|
---|
563 | =item C<"executable">
|
---|
564 |
|
---|
565 | Filters only those sections of the source code that are not POD or C<__DATA__>.
|
---|
566 |
|
---|
567 | =item C<"executable_no_comments">
|
---|
568 |
|
---|
569 | Filters only those sections of the source code that are not POD, comments, or C<__DATA__>.
|
---|
570 |
|
---|
571 | =item C<"quotelike">
|
---|
572 |
|
---|
573 | Filters only Perl quotelikes (as interpreted by
|
---|
574 | C<&Text::Balanced::extract_quotelike>).
|
---|
575 |
|
---|
576 | =item C<"string">
|
---|
577 |
|
---|
578 | Filters only the string literal parts of a Perl quotelike (i.e. the
|
---|
579 | contents of a string literal, either half of a C<tr///>, the second
|
---|
580 | half of an C<s///>).
|
---|
581 |
|
---|
582 | =item C<"regex">
|
---|
583 |
|
---|
584 | Filters only the pattern literal parts of a Perl quotelike (i.e. the
|
---|
585 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
|
---|
586 |
|
---|
587 | =item C<"all">
|
---|
588 |
|
---|
589 | Filters everything. Identical in effect to C<FILTER>.
|
---|
590 |
|
---|
591 | =back
|
---|
592 |
|
---|
593 | Except for C<< FILTER_ONLY code => sub {...} >>, each of
|
---|
594 | the component filters is called repeatedly, once for each component
|
---|
595 | found in the source code.
|
---|
596 |
|
---|
597 | Note that you can also apply two or more of the same type of filter in
|
---|
598 | a single C<FILTER_ONLY>. For example, here's a simple
|
---|
599 | macro-preprocessor that is only applied within regexes,
|
---|
600 | with a final debugging pass that prints the resulting source code:
|
---|
601 |
|
---|
602 | use Regexp::Common;
|
---|
603 | FILTER_ONLY
|
---|
604 | regex => sub { s/!\[/[^/g },
|
---|
605 | regex => sub { s/%d/$RE{num}{int}/g },
|
---|
606 | regex => sub { s/%f/$RE{num}{real}/g },
|
---|
607 | all => sub { print if $::DEBUG };
|
---|
608 |
|
---|
609 |
|
---|
610 |
|
---|
611 | =head2 Filtering only the code parts of source code
|
---|
612 |
|
---|
613 | Most source code ceases to be grammatically correct when it is broken up
|
---|
614 | into the pieces between string literals and regexes. So the C<'code'>
|
---|
615 | and C<'code_no_comments'> component filter behave slightly differently
|
---|
616 | from the other partial filters described in the previous section.
|
---|
617 |
|
---|
618 | Rather than calling the specified processor on each individual piece of
|
---|
619 | code (i.e. on the bits between quotelikes), the C<'code...'> partial
|
---|
620 | filters operate on the entire source code, but with the quotelike bits
|
---|
621 | (and, in the case of C<'code_no_comments'>, the comments) "blanked out".
|
---|
622 |
|
---|
623 | That is, a C<'code...'> filter I<replaces> each quoted string, quotelike,
|
---|
624 | regex, POD, and __DATA__ section with a placeholder. The
|
---|
625 | delimiters of this placeholder are the contents of the C<$;> variable
|
---|
626 | at the time the filter is applied (normally C<"\034">). The remaining
|
---|
627 | four bytes are a unique identifier for the component being replaced.
|
---|
628 |
|
---|
629 | This approach makes it comparatively easy to write code preprocessors
|
---|
630 | without worrying about the form or contents of strings, regexes, etc.
|
---|
631 |
|
---|
632 | For convenience, during a C<'code...'> filtering operation, Filter::Simple
|
---|
633 | provides a package variable (C<$Filter::Simple::placeholder>) that
|
---|
634 | contains a pre-compiled regex that matches any placeholder...and
|
---|
635 | captures the identifier within the placeholder. Placeholders can be
|
---|
636 | moved and re-ordered within the source code as needed.
|
---|
637 |
|
---|
638 | In addition, a second package variable (C<@Filter::Simple::components>)
|
---|
639 | contains a list of the various pieces of C<$_>, as they were originally split
|
---|
640 | up to allow placeholders to be inserted.
|
---|
641 |
|
---|
642 | Once the filtering has been applied, the original strings, regexes, POD,
|
---|
643 | etc. are re-inserted into the code, by replacing each placeholder with
|
---|
644 | the corresponding original component (from C<@components>). Note that
|
---|
645 | this means that the C<@components> variable must be treated with extreme
|
---|
646 | care within the filter. The C<@components> array stores the "back-
|
---|
647 | translations" of each placeholder inserted into C<$_>, as well as the
|
---|
648 | interstitial source code between placeholders. If the placeholder
|
---|
649 | backtranslations are altered in C<@components>, they will be similarly
|
---|
650 | changed when the placeholders are removed from C<$_> after the filter
|
---|
651 | is complete.
|
---|
652 |
|
---|
653 | For example, the following filter detects concatentated pairs of
|
---|
654 | strings/quotelikes and reverses the order in which they are
|
---|
655 | concatenated:
|
---|
656 |
|
---|
657 | package DemoRevCat;
|
---|
658 | use Filter::Simple;
|
---|
659 |
|
---|
660 | FILTER_ONLY code => sub {
|
---|
661 | my $ph = $Filter::Simple::placeholder;
|
---|
662 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
|
---|
663 | };
|
---|
664 |
|
---|
665 | Thus, the following code:
|
---|
666 |
|
---|
667 | use DemoRevCat;
|
---|
668 |
|
---|
669 | my $str = "abc" . q(def);
|
---|
670 |
|
---|
671 | print "$str\n";
|
---|
672 |
|
---|
673 | would become:
|
---|
674 |
|
---|
675 | my $str = q(def)."abc";
|
---|
676 |
|
---|
677 | print "$str\n";
|
---|
678 |
|
---|
679 | and hence print:
|
---|
680 |
|
---|
681 | defabc
|
---|
682 |
|
---|
683 |
|
---|
684 | =head2 Using Filter::Simple with an explicit C<import> subroutine
|
---|
685 |
|
---|
686 | Filter::Simple generates a special C<import> subroutine for
|
---|
687 | your module (see L<"How it works">) which would normally replace any
|
---|
688 | C<import> subroutine you might have explicitly declared.
|
---|
689 |
|
---|
690 | However, Filter::Simple is smart enough to notice your existing
|
---|
691 | C<import> and Do The Right Thing with it.
|
---|
692 | That is, if you explicitly define an C<import> subroutine in a package
|
---|
693 | that's using Filter::Simple, that C<import> subroutine will still
|
---|
694 | be invoked immediately after any filter you install.
|
---|
695 |
|
---|
696 | The only thing you have to remember is that the C<import> subroutine
|
---|
697 | I<must> be declared I<before> the filter is installed. If you use C<FILTER>
|
---|
698 | to install the filter:
|
---|
699 |
|
---|
700 | package Filter::TurnItUpTo11;
|
---|
701 |
|
---|
702 | use Filter::Simple;
|
---|
703 |
|
---|
704 | FILTER { s/(\w+)/\U$1/ };
|
---|
705 |
|
---|
706 | that will almost never be a problem, but if you install a filtering
|
---|
707 | subroutine by passing it directly to the C<use Filter::Simple>
|
---|
708 | statement:
|
---|
709 |
|
---|
710 | package Filter::TurnItUpTo11;
|
---|
711 |
|
---|
712 | use Filter::Simple sub{ s/(\w+)/\U$1/ };
|
---|
713 |
|
---|
714 | then you must make sure that your C<import> subroutine appears before
|
---|
715 | that C<use> statement.
|
---|
716 |
|
---|
717 |
|
---|
718 | =head2 Using Filter::Simple and Exporter together
|
---|
719 |
|
---|
720 | Likewise, Filter::Simple is also smart enough
|
---|
721 | to Do The Right Thing if you use Exporter:
|
---|
722 |
|
---|
723 | package Switch;
|
---|
724 | use base Exporter;
|
---|
725 | use Filter::Simple;
|
---|
726 |
|
---|
727 | @EXPORT = qw(switch case);
|
---|
728 | @EXPORT_OK = qw(given when);
|
---|
729 |
|
---|
730 | FILTER { $_ = magic_Perl_filter($_) }
|
---|
731 |
|
---|
732 | Immediately after the filter has been applied to the source,
|
---|
733 | Filter::Simple will pass control to Exporter, so it can do its magic too.
|
---|
734 |
|
---|
735 | Of course, here too, Filter::Simple has to know you're using Exporter
|
---|
736 | before it applies the filter. That's almost never a problem, but if you're
|
---|
737 | nervous about it, you can guarantee that things will work correctly by
|
---|
738 | ensuring that your C<use base Exporter> always precedes your
|
---|
739 | C<use Filter::Simple>.
|
---|
740 |
|
---|
741 |
|
---|
742 | =head2 How it works
|
---|
743 |
|
---|
744 | The Filter::Simple module exports into the package that calls C<FILTER>
|
---|
745 | (or C<use>s it directly) -- such as package "BANG" in the above example --
|
---|
746 | two automagically constructed
|
---|
747 | subroutines -- C<import> and C<unimport> -- which take care of all the
|
---|
748 | nasty details.
|
---|
749 |
|
---|
750 | In addition, the generated C<import> subroutine passes its own argument
|
---|
751 | list to the filtering subroutine, so the BANG.pm filter could easily
|
---|
752 | be made parametric:
|
---|
753 |
|
---|
754 | package BANG;
|
---|
755 |
|
---|
756 | use Filter::Simple;
|
---|
757 |
|
---|
758 | FILTER {
|
---|
759 | my ($die_msg, $var_name) = @_;
|
---|
760 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
|
---|
761 | };
|
---|
762 |
|
---|
763 | # and in some user code:
|
---|
764 |
|
---|
765 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
|
---|
766 |
|
---|
767 |
|
---|
768 | The specified filtering subroutine is called every time a C<use BANG> is
|
---|
769 | encountered, and passed all the source code following that call, up to
|
---|
770 | either the next C<no BANG;> (or whatever terminator you've set) or the
|
---|
771 | end of the source file, whichever occurs first. By default, any C<no
|
---|
772 | BANG;> call must appear by itself on a separate line, or it is ignored.
|
---|
773 |
|
---|
774 |
|
---|
775 | =head1 AUTHOR
|
---|
776 |
|
---|
777 | Damian Conway ([email protected])
|
---|
778 |
|
---|
779 | =head1 COPYRIGHT
|
---|
780 |
|
---|
781 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
|
---|
782 | This module is free software. It may be used, redistributed
|
---|
783 | and/or modified under the same terms as Perl itself.
|
---|