Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: for-distributions/trunk/bin/windows/perl/lib/Pod/Select.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago
upgrading to perl 5.8
File size: 23.6 KB

Line
1	#############################################################################
2	# Pod/Select.pm -- function to select portions of POD docs
3	#
4	# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
5	# This file is part of "PodParser". PodParser is free software;
6	# you can redistribute it and/or modify it under the same terms
7	# as Perl itself.
8	#############################################################################
9
10	package Pod::Select;
11
12	use vars qw($VERSION);
13	$VERSION = 1.30; ## Current version of this package
14	require 5.005; ## requires this Perl version or later
15
16	#############################################################################
17
18	=head1 NAME
19
20	Pod::Select, podselect() - extract selected sections of POD from input
21
22	=head1 SYNOPSIS
23
24	use Pod::Select;
25
26	## Select all the POD sections for each file in @filelist
27	## and print the result on standard output.
28	podselect(@filelist);
29
30	## Same as above, but write to tmp.out
31	podselect({-output => "tmp.out"}, @filelist):
32
33	## Select from the given filelist, only those POD sections that are
34	## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
35	podselect({-sections => ["NAME\|SYNOPSIS", "OPTIONS"]}, @filelist):
36
37	## Select the "DESCRIPTION" section of the PODs from STDIN and write
38	## the result to STDERR.
39	podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN);
40
41	or
42
43	use Pod::Select;
44
45	## Create a parser object for selecting POD sections from the input
46	$parser = new Pod::Select();
47
48	## Select all the POD sections for each file in @filelist
49	## and print the result to tmp.out.
50	$parser->parse_from_file("<&STDIN", "tmp.out");
51
52	## Select from the given filelist, only those POD sections that are
53	## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
54	$parser->select("NAME\|SYNOPSIS", "OPTIONS");
55	for (@filelist) { $parser->parse_from_file($_); }
56
57	## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from
58	## STDIN and write the result to STDERR.
59	$parser->select("DESCRIPTION");
60	$parser->add_selection("SEE ALSO");
61	$parser->parse_from_filehandle(\STDIN, \STDERR);
62
63	=head1 REQUIRES
64
65	perl5.005, Pod::Parser, Exporter, Carp
66
67	=head1 EXPORTS
68
69	podselect()
70
71	=head1 DESCRIPTION
72
73	B<podselect()> is a function which will extract specified sections of
74	pod documentation from an input stream. This ability is provided by the
75	B<Pod::Select> module which is a subclass of B<Pod::Parser>.
76	B<Pod::Select> provides a method named B<select()> to specify the set of
77	POD sections to select for processing/printing. B<podselect()> merely
78	creates a B<Pod::Select> object and then invokes the B<podselect()>
79	followed by B<parse_from_file()>.
80
81	=head1 SECTION SPECIFICATIONS
82
83	B<podselect()> and B<Pod::Select::select()> may be given one or more
84	"section specifications" to restrict the text processed to only the
85	desired set of sections and their corresponding subsections. A section
86	specification is a string containing one or more Perl-style regular
87	expressions separated by forward slashes ("/"). If you need to use a
88	forward slash literally within a section title you can escape it with a
89	backslash ("\/").
90
91	The formal syntax of a section specification is:
92
93	=over 4
94
95	=item *
96
97	I<head1-title-regex>/I<head2-title-regex>/...
98
99	=back
100
101	Any omitted or empty regular expressions will default to ".*".
102	Please note that each regular expression given is implicitly
103	anchored by adding "^" and "$" to the beginning and end. Also, if a
104	given regular expression starts with a "!" character, then the
105	expression is I<negated> (so C<!foo> would match anything I<except>
106	C<foo>).
107
108	Some example section specifications follow.
109
110	=over 4
111
112	=item *
113
114	Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections:
115
116	C<NAME\|SYNOPSIS>
117
118	=item *
119
120	Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION>
121	section:
122
123	C<DESCRIPTION/Question\|Answer>
124
125	=item *
126
127	Match the C<Comments> subsection of I<all> sections:
128
129	C</Comments>
130
131	=item *
132
133	Match all subsections of C<DESCRIPTION> I<except> for C<Comments>:
134
135	C<DESCRIPTION/!Comments>
136
137	=item *
138
139	Match the C<DESCRIPTION> section but do I<not> match any of its subsections:
140
141	C<DESCRIPTION/!.+>
142
143	=item *
144
145	Match all top level sections but none of their subsections:
146
147	C</!.+>
148
149	=back
150
151	=begin _NOT_IMPLEMENTED_
152
153	=head1 RANGE SPECIFICATIONS
154
155	B<podselect()> and B<Pod::Select::select()> may be given one or more
156	"range specifications" to restrict the text processed to only the
157	desired ranges of paragraphs in the desired set of sections. A range
158	specification is a string containing a single Perl-style regular
159	expression (a regex), or else two Perl-style regular expressions
160	(regexs) separated by a ".." (Perl's "range" operator is "..").
161	The regexs in a range specification are delimited by forward slashes
162	("/"). If you need to use a forward slash literally within a regex you
163	can escape it with a backslash ("\/").
164
165	The formal syntax of a range specification is:
166
167	=over 4
168
169	=item *
170
171	/I<start-range-regex>/[../I<end-range-regex>/]
172
173	=back
174
175	Where each the item inside square brackets (the ".." followed by the
176	end-range-regex) is optional. Each "range-regex" is of the form:
177
178	=cmd-expr text-expr
179
180	Where I<cmd-expr> is intended to match the name of one or more POD
181	commands, and I<text-expr> is intended to match the paragraph text for
182	the command. If a range-regex is supposed to match a POD command, then
183	the first character of the regex (the one after the initial '/')
184	absolutely I<must> be a single '=' character; it may not be anything
185	else (not even a regex meta-character) if it is supposed to match
186	against the name of a POD command.
187
188	If no I<=cmd-expr> is given then the text-expr will be matched against
189	plain textblocks unless it is preceded by a space, in which case it is
190	matched against verbatim text-blocks. If no I<text-expr> is given then
191	only the command-portion of the paragraph is matched against.
192
193	Note that these two expressions are each implicitly anchored. This
194	means that when matching against the command-name, there will be an
195	implicit '^' and '$' around the given I<=cmd-expr>; and when matching
196	against the paragraph text there will be an implicit '\A' and '\Z'
197	around the given I<text-expr>.
198
199	Unlike with section-specs, the '!' character does I<not> have any special
200	meaning (negation or otherwise) at the beginning of a range-spec!
201
202	Some example range specifications follow.
203
204	=over 4
205
206	=item
207	Match all C<=for html> paragraphs:
208
209	C</=for html/>
210
211	=item
212	Match all paragraphs between C<=begin html> and C<=end html>
213	(note that this will I<not> work correctly if such sections
214	are nested):
215
216	C</=begin html/../=end html/>
217
218	=item
219	Match all paragraphs between the given C<=item> name until the end of the
220	current section:
221
222	C</=item mine/../=head\d/>
223
224	=item
225	Match all paragraphs between the given C<=item> until the next item, or
226	until the end of the itemized list (note that this will I<not> work as
227	desired if the item contains an itemized list nested within it):
228
229	C</=item mine/../=(item\|back)/>
230
231	=back
232
233	=end _NOT_IMPLEMENTED_
234
235	=cut
236
237	#############################################################################
238
239	use strict;
240	#use diagnostics;
241	use Carp;
242	use Pod::Parser 1.04;
243	use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL);
244
245	@ISA = qw(Pod::Parser);
246	@EXPORT = qw(&podselect);
247
248	## Maximum number of heading levels supported for '=headN' directives
249	*MAX_HEADING_LEVEL = \3;
250
251	#############################################################################
252
253	=head1 OBJECT METHODS
254
255	The following methods are provided in this module. Each one takes a
256	reference to the object itself as an implicit first parameter.
257
258	=cut
259
260	##---------------------------------------------------------------------------
261
262	## =begin _PRIVATE_
263	##
264	## =head1 B<_init_headings()>
265	##
266	## Initialize the current set of active section headings.
267	##
268	## =cut
269	##
270	## =end _PRIVATE_
271
272	use vars qw(%myData @section_headings);
273
274	sub _init_headings {
275	my $self = shift;
276	local *myData = $self;
277
278	## Initialize current section heading titles if necessary
279	unless (defined $myData{_SECTION_HEADINGS}) {
280	local *section_headings = $myData{_SECTION_HEADINGS} = [];
281	for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
282	$section_headings[$i] = '';
283	}
284	}
285	}
286
287	##---------------------------------------------------------------------------
288
289	=head1 B<curr_headings()>
290
291	($head1, $head2, $head3, ...) = $parser->curr_headings();
292	$head1 = $parser->curr_headings(1);
293
294	This method returns a list of the currently active section headings and
295	subheadings in the document being parsed. The list of headings returned
296	corresponds to the most recently parsed paragraph of the input.
297
298	If an argument is given, it must correspond to the desired section
299	heading number, in which case only the specified section heading is
300	returned. If there is no current section heading at the specified
301	level, then C<undef> is returned.
302
303	=cut
304
305	sub curr_headings {
306	my $self = shift;
307	$self->_init_headings() unless (defined $self->{_SECTION_HEADINGS});
308	my @headings = @{ $self->{_SECTION_HEADINGS} };
309	return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings;
310	}
311
312	##---------------------------------------------------------------------------
313
314	=head1 B<select()>
315
316	$parser->select($section_spec1,$section_spec2,...);
317
318	This method is used to select the particular sections and subsections of
319	POD documentation that are to be printed and/or processed. The existing
320	set of selected sections is I<replaced> with the given set of sections.
321	See B<add_selection()> for adding to the current set of selected
322	sections.
323
324	Each of the C<$section_spec> arguments should be a section specification
325	as described in L<"SECTION SPECIFICATIONS">. The section specifications
326	are parsed by this method and the resulting regular expressions are
327	stored in the invoking object.
328
329	If no C<$section_spec> arguments are given, then the existing set of
330	selected sections is cleared out (which means C<all> sections will be
331	processed).
332
333	This method should I<not> normally be overridden by subclasses.
334
335	=cut
336
337	use vars qw(@selected_sections);
338
339	sub select {
340	my $self = shift;
341	my @sections = @_;
342	local *myData = $self;
343	local $_;
344
345	### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?)
346
347	##---------------------------------------------------------------------
348	## The following is a blatant hack for backward compatibility, and for
349	## implementing add_selection(). If the first argument is the
350	## string "+", then the remaining section specifications are added
351	## to the current set of selections; otherwise the given section
352	## specifications will replace the current set of selections.
353	##
354	## This should probably be fixed someday, but for the present time,
355	## it seems incredibly unlikely that "+" would ever correspond to
356	## a legitimate section heading
357	##---------------------------------------------------------------------
358	my $add = ($sections[0] eq "+") ? shift(@sections) : "";
359
360	## Reset the set of sections to use
361	unless (@sections > 0) {
362	delete $myData{_SELECTED_SECTIONS} unless ($add);
363	return;
364	}
365	$myData{_SELECTED_SECTIONS} = []
366	unless ($add && exists $myData{_SELECTED_SECTIONS});
367	local *selected_sections = $myData{_SELECTED_SECTIONS};
368
369	## Compile each spec
370	my $spec;
371	for $spec (@sections) {
372	if ( defined($_ = &_compile_section_spec($spec)) ) {
373	## Store them in our sections array
374	push(@selected_sections, $_);
375	}
376	else {
377	carp "Ignoring section spec \"$spec\"!\n";
378	}
379	}
380	}
381
382	##---------------------------------------------------------------------------
383
384	=head1 B<add_selection()>
385
386	$parser->add_selection($section_spec1,$section_spec2,...);
387
388	This method is used to add to the currently selected sections and
389	subsections of POD documentation that are to be printed and/or
390	processed. See <select()> for replacing the currently selected sections.
391
392	Each of the C<$section_spec> arguments should be a section specification
393	as described in L<"SECTION SPECIFICATIONS">. The section specifications
394	are parsed by this method and the resulting regular expressions are
395	stored in the invoking object.
396
397	This method should I<not> normally be overridden by subclasses.
398
399	=cut
400
401	sub add_selection {
402	my $self = shift;
403	$self->select("+", @_);
404	}
405
406	##---------------------------------------------------------------------------
407
408	=head1 B<clear_selections()>
409
410	$parser->clear_selections();
411
412	This method takes no arguments, it has the exact same effect as invoking
413	<select()> with no arguments.
414
415	=cut
416
417	sub clear_selections {
418	my $self = shift;
419	$self->select();
420	}
421
422	##---------------------------------------------------------------------------
423
424	=head1 B<match_section()>
425
426	$boolean = $parser->match_section($heading1,$heading2,...);
427
428	Returns a value of true if the given section and subsection heading
429	titles match any of the currently selected section specifications in
430	effect from prior calls to B<select()> and B<add_selection()> (or if
431	there are no explictly selected/deselected sections).
432
433	The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of
434	the corresponding sections, subsections, etc. to try and match. If
435	C<$headingN> is omitted then it defaults to the current corresponding
436	section heading title in the input.
437
438	This method should I<not> normally be overridden by subclasses.
439
440	=cut
441
442	sub match_section {
443	my $self = shift;
444	my (@headings) = @_;
445	local *myData = $self;
446
447	## Return true if no restrictions were explicitly specified
448	my $selections = (exists $myData{_SELECTED_SECTIONS})
449	? $myData{_SELECTED_SECTIONS} : undef;
450	return 1 unless ((defined $selections) && (@{$selections} > 0));
451
452	## Default any unspecified sections to the current one
453	my @current_headings = $self->curr_headings();
454	for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
455	(defined $headings[$i]) or $headings[$i] = $current_headings[$i];
456	}
457
458	## Look for a match against the specified section expressions
459	my ($section_spec, $regex, $negated, $match);
460	for $section_spec ( @{$selections} ) {
461	##------------------------------------------------------
462	## Each portion of this spec must match in order for
463	## the spec to be matched. So we will start with a
464	## match-value of 'true' and logically 'and' it with
465	## the results of matching a given element of the spec.
466	##------------------------------------------------------
467	$match = 1;
468	for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
469	$regex = $section_spec->[$i];
470	$negated = ($regex =~ s/^\!//);
471	$match &= ($negated ? ($headings[$i] !~ /${regex}/)
472	: ($headings[$i] =~ /${regex}/));
473	last unless ($match);
474	}
475	return 1 if ($match);
476	}
477	return 0; ## no match
478	}
479
480	##---------------------------------------------------------------------------
481
482	=head1 B<is_selected()>
483
484	$boolean = $parser->is_selected($paragraph);
485
486	This method is used to determine if the block of text given in
487	C<$paragraph> falls within the currently selected set of POD sections
488	and subsections to be printed or processed. This method is also
489	responsible for keeping track of the current input section and
490	subsections. It is assumed that C<$paragraph> is the most recently read
491	(but not yet processed) input paragraph.
492
493	The value returned will be true if the C<$paragraph> and the rest of the
494	text in the same section as C<$paragraph> should be selected (included)
495	for processing; otherwise a false value is returned.
496
497	=cut
498
499	sub is_selected {
500	my ($self, $paragraph) = @_;
501	local $_;
502	local *myData = $self;
503
504	$self->_init_headings() unless (defined $myData{_SECTION_HEADINGS});
505
506	## Keep track of current sections levels and headings
507	$_ = $paragraph;
508	if (/^=((?:sub))(?:head(?:ing)?\|sec(?:tion)?)(\d)\s+(.)\s$/)
509	{
510	## This is a section heading command
511	my ($level, $heading) = ($2, $3);
512	$level = 1 + (length($1) / 3) if ((! length $level) \|\| (length $1));
513	## Reset the current section heading at this level
514	$myData{_SECTION_HEADINGS}->[$level - 1] = $heading;
515	## Reset subsection headings of this one to empty
516	for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) {
517	$myData{_SECTION_HEADINGS}->[$i] = '';
518	}
519	}
520
521	return $self->match_section();
522	}
523
524	#############################################################################
525
526	=head1 EXPORTED FUNCTIONS
527
528	The following functions are exported by this module. Please note that
529	these are functions (not methods) and therefore C<do not> take an
530	implicit first argument.
531
532	=cut
533
534	##---------------------------------------------------------------------------
535
536	=head1 B<podselect()>
537
538	podselect(\%options,@filelist);
539
540	B<podselect> will print the raw (untranslated) POD paragraphs of all
541	POD sections in the given input files specified by C<@filelist>
542	according to the given options.
543
544	If any argument to B<podselect> is a reference to a hash
545	(associative array) then the values with the following keys are
546	processed as follows:
547
548	=over 4
549
550	=item B<-output>
551
552	A string corresponding to the desired output file (or ">&STDOUT"
553	or ">&STDERR"). The default is to use standard output.
554
555	=item B<-sections>
556
557	A reference to an array of sections specifications (as described in
558	L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD
559	sections and subsections to be selected from input. If no section
560	specifications are given, then all sections of the PODs are used.
561
562	=begin _NOT_IMPLEMENTED_
563
564	=item B<-ranges>
565
566	A reference to an array of range specifications (as described in
567	L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD
568	paragraphs to be selected from the desired input sections. If no range
569	specifications are given, then all paragraphs of the desired sections
570	are used.
571
572	=end _NOT_IMPLEMENTED_
573
574	=back
575
576	All other arguments should correspond to the names of input files
577	containing POD sections. A file name of "-" or "<&STDIN" will
578	be interpeted to mean standard input (which is the default if no
579	filenames are given).
580
581	=cut
582
583	sub podselect {
584	my(@argv) = @_;
585	my %defaults = ();
586	my $pod_parser = new Pod::Select(%defaults);
587	my $num_inputs = 0;
588	my $output = ">&STDOUT";
589	my %opts;
590	local $_;
591	for (@argv) {
592	if (ref($_)) {
593	next unless (ref($_) eq 'HASH');
594	%opts = (%defaults, %{$_});
595
596	##-------------------------------------------------------------
597	## Need this for backward compatibility since we formerly used
598	## options that were all uppercase words rather than ones that
599	## looked like Unix command-line options.
600	## to be uppercase keywords)
601	##-------------------------------------------------------------
602	%opts = map {
603	my ($key, $val) = (lc $_, $opts{$_});
604	$key =~ s/^(?=\w)/-/;
605	$key =~ /^-se[cl]/ and $key = '-sections';
606	#! $key eq '-range' and $key .= 's';
607	($key => $val);
608	} (keys %opts);
609
610	## Process the options
611	(exists $opts{'-output'}) and $output = $opts{'-output'};
612
613	## Select the desired sections
614	$pod_parser->select(@{ $opts{'-sections'} })
615	if ( (defined $opts{'-sections'})
616	&& ((ref $opts{'-sections'}) eq 'ARRAY') );
617
618	#! ## Select the desired paragraph ranges
619	#! $pod_parser->select(@{ $opts{'-ranges'} })
620	#! if ( (defined $opts{'-ranges'})
621	#! && ((ref $opts{'-ranges'}) eq 'ARRAY') );
622	}
623	else {
624	$pod_parser->parse_from_file($_, $output);
625	++$num_inputs;
626	}
627	}
628	$pod_parser->parse_from_file("-") unless ($num_inputs > 0);
629	}
630
631	#############################################################################
632
633	=head1 PRIVATE METHODS AND DATA
634
635	B<Pod::Select> makes uses a number of internal methods and data fields
636	which clients should not need to see or use. For the sake of avoiding
637	name collisions with client data and methods, these methods and fields
638	are briefly discussed here. Determined hackers may obtain further
639	information about them by reading the B<Pod::Select> source code.
640
641	Private data fields are stored in the hash-object whose reference is
642	returned by the B<new()> constructor for this class. The names of all
643	private methods and data-fields used by B<Pod::Select> begin with a
644	prefix of "_" and match the regular expression C</^_\w+$/>.
645
646	=cut
647
648	##---------------------------------------------------------------------------
649
650	=begin _PRIVATE_
651
652	=head1 B<_compile_section_spec()>
653
654	$listref = $parser->_compile_section_spec($section_spec);
655
656	This function (note it is a function and I<not> a method) takes a
657	section specification (as described in L<"SECTION SPECIFICATIONS">)
658	given in C<$section_sepc>, and compiles it into a list of regular
659	expressions. If C<$section_spec> has no syntax errors, then a reference
660	to the list (array) of corresponding regular expressions is returned;
661	otherwise C<undef> is returned and an error message is printed (using
662	B<carp>) for each invalid regex.
663
664	=end _PRIVATE_
665
666	=cut
667
668	sub _compile_section_spec {
669	my ($section_spec) = @_;
670	my (@regexs, $negated);
671
672	## Compile the spec into a list of regexs
673	local $_ = $section_spec;
674	s\|\\\\\|\001\|g; ## handle escaped backward slashes
675	s\|\\/\|\002\|g; ## handle escaped forward slashes
676
677	## Parse the regexs for the heading titles
678	@regexs = split('/', $_, $MAX_HEADING_LEVEL);
679
680	## Set default regex for ommitted levels
681	for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
682	$regexs[$i] = '.*' unless ((defined $regexs[$i])
683	&& (length $regexs[$i]));
684	}
685	## Modify the regexs as needed and validate their syntax
686	my $bad_regexs = 0;
687	for (@regexs) {
688	$_ .= '.+' if ($_ eq '!');
689	s\|\001\|\\\\\|g; ## restore escaped backward slashes
690	s\|\002\|\\/\|g; ## restore escaped forward slashes
691	$negated = s/^\!//; ## check for negation
692	eval "/$_/"; ## check regex syntax
693	if ($@) {
694	++$bad_regexs;
695	carp "Bad regular expression /$_/ in \"$section_spec\": $@\n";
696	}
697	else {
698	## Add the forward and rear anchors (and put the negator back)
699	$_ = '^' . $_ unless (/^\^/);
700	$_ = $_ . '$' unless (/\$$/);
701	$_ = '!' . $_ if ($negated);
702	}
703	}
704	return (! $bad_regexs) ? [ @regexs ] : undef;
705	}
706
707	##---------------------------------------------------------------------------
708
709	=begin _PRIVATE_
710
711	=head2 $self->{_SECTION_HEADINGS}
712
713	A reference to an array of the current section heading titles for each
714	heading level (note that the first heading level title is at index 0).
715
716	=end _PRIVATE_
717
718	=cut
719
720	##---------------------------------------------------------------------------
721
722	=begin _PRIVATE_
723
724	=head2 $self->{_SELECTED_SECTIONS}
725
726	A reference to an array of references to arrays. Each subarray is a list
727	of anchored regular expressions (preceded by a "!" if the expression is to
728	be negated). The index of the expression in the subarray should correspond
729	to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}>
730	that it is to be matched against.
731
732	=end _PRIVATE_
733
734	=cut
735
736	#############################################################################
737
738	=head1 SEE ALSO
739
740	L<Pod::Parser>
741
742	=head1 AUTHOR
743
744	Please report bugs using L<http://rt.cpan.org>.
745
746	Brad Appleton E<lt>[email protected]<gt>
747
748	Based on code for B<pod2text> written by
749	Tom Christiansen E<lt>[email protected]<gt>
750
751	=cut
752
753	1;
754	# vim: ts=4 sw=4 et

Note: See TracBrowser for help on using the repository browser.

Download in other formats: