source: for-distributions/trunk/bin/windows/perl/lib/Filter/Simple.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago

upgrading to perl 5.8

File size: 22.4 KB
RevLine 
[14489]1package Filter::Simple;
2
3use Text::Balanced ':ALL';
4
5use vars qw{ $VERSION @EXPORT };
6
7$VERSION = '0.82';
8
9use Filter::Util::Call;
10use Carp;
11
12@EXPORT = qw( FILTER FILTER_ONLY );
13
14
15sub import {
16 if (@_>1) { shift; goto &FILTER }
17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
18}
19
20sub fail {
21 croak "FILTER_ONLY: ", @_;
22}
23
24my $exql = sub {
25 my @bits = extract_quotelike $_[0], qr//;
26 return unless $bits[0];
27 return \@bits;
28};
29
30my $ncws = qr/\s+/;
31my $comment = qr/(?<![\$\@%])#.*/;
32my $ws = qr/(?:$ncws|$comment)+/;
33my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
34my $EOP = qr/\n\n|\Z/;
35my $CUT = qr/\n=cut.*$EOP/;
36my $pod_or_DATA = qr/
37 ^=(?:head[1-4]|item) .*? $CUT
38 | ^=pod .*? $CUT
39 | ^=for .*? $EOP
40 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
41 | ^__(DATA|END)__\r?\n.*
42 /smx;
43
44my %extractor_for = (
45 quotelike => [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ],
46 regex => [ $ws, $pod_or_DATA, $id, $exql ],
47 string => [ $ws, $pod_or_DATA, $id, $exql ],
48 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
49 \&extract_variable,
50 $id, { DONT_MATCH => \&extract_quotelike } ],
51 code_no_comments
52 => [ { DONT_MATCH => $comment },
53 $ncws, { DONT_MATCH => $pod_or_DATA },
54 \&extract_variable,
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 executable_no_comments
58 => [ { DONT_MATCH => $comment },
59 $ncws, { DONT_MATCH => $pod_or_DATA } ],
60 all => [ { MATCH => qr/(?s:.*)/ } ],
61);
62
63my %selector_for = (
64 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
65 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
66 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
67 regex => sub { my ($t)=@_;
68 sub{ref() or return $_;
69 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
70 return $_->[0] unless $op =~ /^(qr|m|s)/
71 || !$op && ($ld eq '/' || $ld eq '?');
72 $_ = $pat;
73 $t->(@_);
74 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
75 return "$pre$ql";
76 };
77 },
78 string => sub { my ($t)=@_;
79 sub{ref() or return $_;
80 local *args = \@_;
81 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
82 return $_->[0] if $op =~ /^(qr|m)/
83 || !$op && ($ld1 eq '/' || $ld1 eq '?');
84 if (!$op || $op eq 'tr' || $op eq 'y') {
85 local *_ = \$str1;
86 $t->(@args);
87 }
88 if ($op =~ /^(tr|y|s)/) {
89 local *_ = \$str2;
90 $t->(@args);
91 }
92 my $result = "$pre$op$ld1$str1$rd1";
93 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
94 $result .= "$str2$rd2$flg";
95 return $result;
96 };
97 },
98);
99
100
101sub gen_std_filter_for {
102 my ($type, $transform) = @_;
103 return sub {
104 my $instr;
105 local @components;
106 for (extract_multiple($_,$extractor_for{$type})) {
107 if (ref()) { push @components, $_; $instr=0 }
108 elsif ($instr) { $components[-1] .= $_ }
109 else { push @components, $_; $instr=1 }
110 }
111 if ($type =~ /^code/) {
112 my $count = 0;
113 local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/;
114 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
115 $_ = join "",
116 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
117 @components;
118 @components = grep { ref $_ } @components;
119 $transform->(@_);
120 s/$extractor/${$components[unpack('N',$1)]}/g;
121 }
122 else {
123 my $selector = $selector_for{$type}->($transform);
124 $_ = join "", map $selector->(@_), @components;
125 }
126 }
127};
128
129sub FILTER (&;$) {
130 my $caller = caller;
131 my ($filter, $terminator) = @_;
132 no warnings 'redefine';
133 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
134 *{"${caller}::unimport"} = gen_filter_unimport($caller);
135}
136
137sub FILTER_ONLY {
138 my $caller = caller;
139 while (@_ > 1) {
140 my ($what, $how) = splice(@_, 0, 2);
141 fail "Unknown selector: $what"
142 unless exists $extractor_for{$what};
143 fail "Filter for $what is not a subroutine reference"
144 unless ref $how eq 'CODE';
145 push @transforms, gen_std_filter_for($what,$how);
146 }
147 my $terminator = shift;
148
149 my $multitransform = sub {
150 foreach my $transform ( @transforms ) {
151 $transform->(@_);
152 }
153 };
154 no warnings 'redefine';
155 *{"${caller}::import"} =
156 gen_filter_import($caller,$multitransform,$terminator);
157 *{"${caller}::unimport"} = gen_filter_unimport($caller);
158}
159
160my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
161
162sub gen_filter_import {
163 my ($class, $filter, $terminator) = @_;
164 my %terminator;
165 my $prev_import = *{$class."::import"}{CODE};
166 return sub {
167 my ($imported_class, @args) = @_;
168 my $def_terminator =
169 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
170 if (!defined $terminator) {
171 $terminator{terminator} = $def_terminator;
172 }
173 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
174 $terminator{terminator} = $terminator;
175 }
176 elsif (ref $terminator ne 'HASH') {
177 croak "Terminator must be specified as scalar or hash ref"
178 }
179 elsif (!exists $terminator->{terminator}) {
180 $terminator{terminator} = $def_terminator;
181 }
182 filter_add(
183 sub {
184 my ($status, $lastline);
185 my $count = 0;
186 my $data = "";
187 while ($status = filter_read()) {
188 return $status if $status < 0;
189 if ($terminator{terminator} &&
190 m/$terminator{terminator}/) {
191 $lastline = $_;
192 last;
193 }
194 $data .= $_;
195 $count++;
196 $_ = "";
197 }
198 return $count if not $count;
199 $_ = $data;
200 $filter->($imported_class, @args) unless $status < 0;
201 if (defined $lastline) {
202 if (defined $terminator{becomes}) {
203 $_ .= $terminator{becomes};
204 }
205 elsif ($lastline =~ $def_terminator) {
206 $_ .= $lastline;
207 }
208 }
209 return $count;
210 }
211 );
212 if ($prev_import) {
213 goto &$prev_import;
214 }
215 elsif ($class->isa('Exporter')) {
216 $class->export_to_level(1,@_);
217 }
218 }
219}
220
221sub gen_filter_unimport {
222 my ($class) = @_;
223 return sub {
224 filter_del();
225 goto &$prev_unimport if $prev_unimport;
226 }
227}
228
2291;
230
231__END__
232
233=head1 NAME
234
235Filter::Simple - Simplified source filtering
236
237
238=head1 SYNOPSIS
239
240 # in MyFilter.pm:
241
242 package MyFilter;
243
244 use Filter::Simple;
245
246 FILTER { ... };
247
248 # or just:
249 #
250 # use Filter::Simple sub { ... };
251
252 # in user's code:
253
254 use MyFilter;
255
256 # this code is filtered
257
258 no MyFilter;
259
260 # this code is not
261
262
263=head1 DESCRIPTION
264
265=head2 The Problem
266
267Source filtering is an immensely powerful feature of recent versions of Perl.
268It allows one to extend the language itself (e.g. the Switch module), to
269simplify the language (e.g. Language::Pythonesque), or to completely recast the
270language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
271the full power of Perl as its own, recursively applied, macro language.
272
273The excellent Filter::Util::Call module (by Paul Marquess) provides a
274usable Perl interface to source filtering, but it is often too powerful
275and not nearly as simple as it could be.
276
277To use the module it is necessary to do the following:
278
279=over 4
280
281=item 1.
282
283Download, build, and install the Filter::Util::Call module.
284(If you have Perl 5.7.1 or later, this is already done for you.)
285
286=item 2.
287
288Set up a module that does a C<use Filter::Util::Call>.
289
290=item 3.
291
292Within that module, create an C<import> subroutine.
293
294=item 4.
295
296Within the C<import> subroutine do a call to C<filter_add>, passing
297it either a subroutine reference.
298
299=item 5.
300
301Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
302to "prime" $_ with source code data from the source file that will
303C<use> your module. Check the status value returned to see if any
304source code was actually read in.
305
306=item 6.
307
308Process the contents of $_ to change the source code in the desired manner.
309
310=item 7.
311
312Return the status value.
313
314=item 8.
315
316If the act of unimporting your module (via a C<no>) should cause source
317code filtering to cease, create an C<unimport> subroutine, and have it call
318C<filter_del>. Make sure that the call to C<filter_read> or
319C<filter_read_exact> in step 5 will not accidentally read past the
320C<no>. Effectively this limits source code filters to line-by-line
321operation, unless the C<import> subroutine does some fancy
322pre-pre-parsing of the source code it's filtering.
323
324=back
325
326For example, here is a minimal source code filter in a module named
327BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
328to the sequence C<die 'BANG' if $BANG> in any piece of code following a
329C<use BANG;> statement (until the next C<no BANG;> statement, if any):
330
331 package BANG;
332
333 use Filter::Util::Call ;
334
335 sub import {
336 filter_add( sub {
337 my $caller = caller;
338 my ($status, $no_seen, $data);
339 while ($status = filter_read()) {
340 if (/^\s*no\s+$caller\s*;\s*?$/) {
341 $no_seen=1;
342 last;
343 }
344 $data .= $_;
345 $_ = "";
346 }
347 $_ = $data;
348 s/BANG\s+BANG/die 'BANG' if \$BANG/g
349 unless $status < 0;
350 $_ .= "no $class;\n" if $no_seen;
351 return 1;
352 })
353 }
354
355 sub unimport {
356 filter_del();
357 }
358
359 1 ;
360
361This level of sophistication puts filtering out of the reach of
362many programmers.
363
364
365=head2 A Solution
366
367The Filter::Simple module provides a simplified interface to
368Filter::Util::Call; one that is sufficient for most common cases.
369
370Instead of the above process, with Filter::Simple the task of setting up
371a source code filter is reduced to:
372
373=over 4
374
375=item 1.
376
377Download and install the Filter::Simple module.
378(If you have Perl 5.7.1 or later, this is already done for you.)
379
380=item 2.
381
382Set up a module that does a C<use Filter::Simple> and then
383calls C<FILTER { ... }>.
384
385=item 3.
386
387Within the anonymous subroutine or block that is passed to
388C<FILTER>, process the contents of $_ to change the source code in
389the desired manner.
390
391=back
392
393In other words, the previous example, would become:
394
395 package BANG;
396 use Filter::Simple;
397
398 FILTER {
399 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
400 };
401
402 1 ;
403
404Note that the source code is passed as a single string, so any regex that
405uses C<^> or C<$> to detect line boundaries will need the C</m> flag.
406
407=head2 Disabling or changing <no> behaviour
408
409By default, the installed filter only filters up to a line consisting of one of
410the three standard source "terminators":
411
412 no ModuleName; # optional comment
413
414or:
415
416 __END__
417
418or:
419
420 __DATA__
421
422but this can be altered by passing a second argument to C<use Filter::Simple>
423or C<FILTER> (just remember: there's I<no> comma after the initial block when
424you use C<FILTER>).
425
426That second argument may be either a C<qr>'d regular expression (which is then
427used to match the terminator line), or a defined false value (which indicates
428that no terminator line should be looked for), or a reference to a hash
429(in which case the terminator is the value associated with the key
430C<'terminator'>.
431
432For example, to cause the previous filter to filter only up to a line of the
433form:
434
435 GNAB esu;
436
437you would write:
438
439 package BANG;
440 use Filter::Simple;
441
442 FILTER {
443 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
444 }
445 qr/^\s*GNAB\s+esu\s*;\s*?$/;
446
447or:
448
449 FILTER {
450 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
451 }
452 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
453
454and to prevent the filter's being turned off in any way:
455
456 package BANG;
457 use Filter::Simple;
458
459 FILTER {
460 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
461 }
462 ""; # or: 0
463
464or:
465
466 FILTER {
467 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
468 }
469 { terminator => "" };
470
471B<Note that, no matter what you set the terminator pattern to,
472the actual terminator itself I<must> be contained on a single source line.>
473
474
475=head2 All-in-one interface
476
477Separating the loading of Filter::Simple:
478
479 use Filter::Simple;
480
481from the setting up of the filtering:
482
483 FILTER { ... };
484
485is useful because it allows other code (typically parser support code
486or caching variables) to be defined before the filter is invoked.
487However, there is often no need for such a separation.
488
489In those cases, it is easier to just append the filtering subroutine and
490any terminator specification directly to the C<use> statement that loads
491Filter::Simple, like so:
492
493 use Filter::Simple sub {
494 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
495 };
496
497This is exactly the same as:
498
499 use Filter::Simple;
500 BEGIN {
501 Filter::Simple::FILTER {
502 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
503 };
504 }
505
506except that the C<FILTER> subroutine is not exported by Filter::Simple.
507
508
509=head2 Filtering only specific components of source code
510
511One of the problems with a filter like:
512
513 use Filter::Simple;
514
515 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
516
517is that it indiscriminately applies the specified transformation to
518the entire text of your source program. So something like:
519
520 warn 'BANG BANG, YOU'RE DEAD';
521 BANG BANG;
522
523will become:
524
525 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
526 die 'BANG' if $BANG;
527
528It is very common when filtering source to only want to apply the filter
529to the non-character-string parts of the code, or alternatively to I<only>
530the character strings.
531
532Filter::Simple supports this type of filtering by automatically
533exporting the C<FILTER_ONLY> subroutine.
534
535C<FILTER_ONLY> takes a sequence of specifiers that install separate
536(and possibly multiple) filters that act on only parts of the source code.
537For example:
538
539 use Filter::Simple;
540
541 FILTER_ONLY
542 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
543 quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
544
545The C<"code"> subroutine will only be used to filter parts of the source
546code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
547subroutine only filters Perl quotelikes (including here documents).
548
549The full list of alternatives is:
550
551=over
552
553=item C<"code">
554
555Filters only those sections of the source code that are not quotelikes, POD, or
556C<__DATA__>.
557
558=item C<"code_no_comments">
559
560Filters only those sections of the source code that are not quotelikes, POD,
561comments, or C<__DATA__>.
562
563=item C<"executable">
564
565Filters only those sections of the source code that are not POD or C<__DATA__>.
566
567=item C<"executable_no_comments">
568
569Filters only those sections of the source code that are not POD, comments, or C<__DATA__>.
570
571=item C<"quotelike">
572
573Filters only Perl quotelikes (as interpreted by
574C<&Text::Balanced::extract_quotelike>).
575
576=item C<"string">
577
578Filters only the string literal parts of a Perl quotelike (i.e. the
579contents of a string literal, either half of a C<tr///>, the second
580half of an C<s///>).
581
582=item C<"regex">
583
584Filters only the pattern literal parts of a Perl quotelike (i.e. the
585contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
586
587=item C<"all">
588
589Filters everything. Identical in effect to C<FILTER>.
590
591=back
592
593Except for C<< FILTER_ONLY code => sub {...} >>, each of
594the component filters is called repeatedly, once for each component
595found in the source code.
596
597Note that you can also apply two or more of the same type of filter in
598a single C<FILTER_ONLY>. For example, here's a simple
599macro-preprocessor that is only applied within regexes,
600with a final debugging pass that prints the resulting source code:
601
602 use Regexp::Common;
603 FILTER_ONLY
604 regex => sub { s/!\[/[^/g },
605 regex => sub { s/%d/$RE{num}{int}/g },
606 regex => sub { s/%f/$RE{num}{real}/g },
607 all => sub { print if $::DEBUG };
608
609
610
611=head2 Filtering only the code parts of source code
612
613Most source code ceases to be grammatically correct when it is broken up
614into the pieces between string literals and regexes. So the C<'code'>
615and C<'code_no_comments'> component filter behave slightly differently
616from the other partial filters described in the previous section.
617
618Rather than calling the specified processor on each individual piece of
619code (i.e. on the bits between quotelikes), the C<'code...'> partial
620filters operate on the entire source code, but with the quotelike bits
621(and, in the case of C<'code_no_comments'>, the comments) "blanked out".
622
623That is, a C<'code...'> filter I<replaces> each quoted string, quotelike,
624regex, POD, and __DATA__ section with a placeholder. The
625delimiters of this placeholder are the contents of the C<$;> variable
626at the time the filter is applied (normally C<"\034">). The remaining
627four bytes are a unique identifier for the component being replaced.
628
629This approach makes it comparatively easy to write code preprocessors
630without worrying about the form or contents of strings, regexes, etc.
631
632For convenience, during a C<'code...'> filtering operation, Filter::Simple
633provides a package variable (C<$Filter::Simple::placeholder>) that
634contains a pre-compiled regex that matches any placeholder...and
635captures the identifier within the placeholder. Placeholders can be
636moved and re-ordered within the source code as needed.
637
638In addition, a second package variable (C<@Filter::Simple::components>)
639contains a list of the various pieces of C<$_>, as they were originally split
640up to allow placeholders to be inserted.
641
642Once the filtering has been applied, the original strings, regexes, POD,
643etc. are re-inserted into the code, by replacing each placeholder with
644the corresponding original component (from C<@components>). Note that
645this means that the C<@components> variable must be treated with extreme
646care within the filter. The C<@components> array stores the "back-
647translations" of each placeholder inserted into C<$_>, as well as the
648interstitial source code between placeholders. If the placeholder
649backtranslations are altered in C<@components>, they will be similarly
650changed when the placeholders are removed from C<$_> after the filter
651is complete.
652
653For example, the following filter detects concatentated pairs of
654strings/quotelikes and reverses the order in which they are
655concatenated:
656
657 package DemoRevCat;
658 use Filter::Simple;
659
660 FILTER_ONLY code => sub {
661 my $ph = $Filter::Simple::placeholder;
662 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
663 };
664
665Thus, the following code:
666
667 use DemoRevCat;
668
669 my $str = "abc" . q(def);
670
671 print "$str\n";
672
673would become:
674
675 my $str = q(def)."abc";
676
677 print "$str\n";
678
679and hence print:
680
681 defabc
682
683
684=head2 Using Filter::Simple with an explicit C<import> subroutine
685
686Filter::Simple generates a special C<import> subroutine for
687your module (see L<"How it works">) which would normally replace any
688C<import> subroutine you might have explicitly declared.
689
690However, Filter::Simple is smart enough to notice your existing
691C<import> and Do The Right Thing with it.
692That is, if you explicitly define an C<import> subroutine in a package
693that's using Filter::Simple, that C<import> subroutine will still
694be invoked immediately after any filter you install.
695
696The only thing you have to remember is that the C<import> subroutine
697I<must> be declared I<before> the filter is installed. If you use C<FILTER>
698to install the filter:
699
700 package Filter::TurnItUpTo11;
701
702 use Filter::Simple;
703
704 FILTER { s/(\w+)/\U$1/ };
705
706that will almost never be a problem, but if you install a filtering
707subroutine by passing it directly to the C<use Filter::Simple>
708statement:
709
710 package Filter::TurnItUpTo11;
711
712 use Filter::Simple sub{ s/(\w+)/\U$1/ };
713
714then you must make sure that your C<import> subroutine appears before
715that C<use> statement.
716
717
718=head2 Using Filter::Simple and Exporter together
719
720Likewise, Filter::Simple is also smart enough
721to Do The Right Thing if you use Exporter:
722
723 package Switch;
724 use base Exporter;
725 use Filter::Simple;
726
727 @EXPORT = qw(switch case);
728 @EXPORT_OK = qw(given when);
729
730 FILTER { $_ = magic_Perl_filter($_) }
731
732Immediately after the filter has been applied to the source,
733Filter::Simple will pass control to Exporter, so it can do its magic too.
734
735Of course, here too, Filter::Simple has to know you're using Exporter
736before it applies the filter. That's almost never a problem, but if you're
737nervous about it, you can guarantee that things will work correctly by
738ensuring that your C<use base Exporter> always precedes your
739C<use Filter::Simple>.
740
741
742=head2 How it works
743
744The Filter::Simple module exports into the package that calls C<FILTER>
745(or C<use>s it directly) -- such as package "BANG" in the above example --
746two automagically constructed
747subroutines -- C<import> and C<unimport> -- which take care of all the
748nasty details.
749
750In addition, the generated C<import> subroutine passes its own argument
751list to the filtering subroutine, so the BANG.pm filter could easily
752be made parametric:
753
754 package BANG;
755
756 use Filter::Simple;
757
758 FILTER {
759 my ($die_msg, $var_name) = @_;
760 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
761 };
762
763 # and in some user code:
764
765 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
766
767
768The specified filtering subroutine is called every time a C<use BANG> is
769encountered, and passed all the source code following that call, up to
770either the next C<no BANG;> (or whatever terminator you've set) or the
771end of the source file, whichever occurs first. By default, any C<no
772BANG;> call must appear by itself on a separate line, or it is ignored.
773
774
775=head1 AUTHOR
776
777Damian Conway ([email protected])
778
779=head1 COPYRIGHT
780
781 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
782 This module is free software. It may be used, redistributed
783 and/or modified under the same terms as Perl itself.
Note: See TracBrowser for help on using the repository browser.