source: for-distributions/trunk/bin/windows/perl/lib/Pod/ParseUtils.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago

upgrading to perl 5.8

File size: 20.3 KB
Line 
1#############################################################################
2# Pod/ParseUtils.pm -- helpers for POD parsing and conversion
3#
4# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::ParseUtils;
11
12use vars qw($VERSION);
13$VERSION = 1.33; ## Current version of this package
14require 5.005; ## requires this Perl version or later
15
16=head1 NAME
17
18Pod::ParseUtils - helpers for POD parsing and conversion
19
20=head1 SYNOPSIS
21
22 use Pod::ParseUtils;
23
24 my $list = new Pod::List;
25 my $link = Pod::Hyperlink->new('Pod::Parser');
26
27=head1 DESCRIPTION
28
29B<Pod::ParseUtils> contains a few object-oriented helper packages for
30POD parsing and processing (i.e. in POD formatters and translators).
31
32=cut
33
34#-----------------------------------------------------------------------------
35# Pod::List
36#
37# class to hold POD list info (=over, =item, =back)
38#-----------------------------------------------------------------------------
39
40package Pod::List;
41
42use Carp;
43
44=head2 Pod::List
45
46B<Pod::List> can be used to hold information about POD lists
47(written as =over ... =item ... =back) for further processing.
48The following methods are available:
49
50=over 4
51
52=item Pod::List-E<gt>new()
53
54Create a new list object. Properties may be specified through a hash
55reference like this:
56
57 my $list = Pod::List->new({ -start => $., -indent => 4 });
58
59See the individual methods/properties for details.
60
61=cut
62
63sub new {
64 my $this = shift;
65 my $class = ref($this) || $this;
66 my %params = @_;
67 my $self = {%params};
68 bless $self, $class;
69 $self->initialize();
70 return $self;
71}
72
73sub initialize {
74 my $self = shift;
75 $self->{-file} ||= 'unknown';
76 $self->{-start} ||= 'unknown';
77 $self->{-indent} ||= 4; # perlpod: "should be the default"
78 $self->{_items} = [];
79 $self->{-type} ||= '';
80}
81
82=item $list-E<gt>file()
83
84Without argument, retrieves the file name the list is in. This must
85have been set before by either specifying B<-file> in the B<new()>
86method or by calling the B<file()> method with a scalar argument.
87
88=cut
89
90# The POD file name the list appears in
91sub file {
92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
93}
94
95=item $list-E<gt>start()
96
97Without argument, retrieves the line number where the list started.
98This must have been set before by either specifying B<-start> in the
99B<new()> method or by calling the B<start()> method with a scalar
100argument.
101
102=cut
103
104# The line in the file the node appears
105sub start {
106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
107}
108
109=item $list-E<gt>indent()
110
111Without argument, retrieves the indent level of the list as specified
112in C<=over n>. This must have been set before by either specifying
113B<-indent> in the B<new()> method or by calling the B<indent()> method
114with a scalar argument.
115
116=cut
117
118# indent level
119sub indent {
120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
121}
122
123=item $list-E<gt>type()
124
125Without argument, retrieves the list type, which can be an arbitrary value,
126e.g. C<OL>, C<UL>, ... when thinking the HTML way.
127This must have been set before by either specifying
128B<-type> in the B<new()> method or by calling the B<type()> method
129with a scalar argument.
130
131=cut
132
133# The type of the list (UL, OL, ...)
134sub type {
135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
136}
137
138=item $list-E<gt>rx()
139
140Without argument, retrieves a regular expression for simplifying the
141individual item strings once the list type has been determined. Usage:
142E.g. when converting to HTML, one might strip the leading number in
143an ordered list as C<E<lt>OLE<gt>> already prints numbers itself.
144This must have been set before by either specifying
145B<-rx> in the B<new()> method or by calling the B<rx()> method
146with a scalar argument.
147
148=cut
149
150# The regular expression to simplify the items
151sub rx {
152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
153}
154
155=item $list-E<gt>item()
156
157Without argument, retrieves the array of the items in this list.
158The items may be represented by any scalar.
159If an argument has been given, it is pushed on the list of items.
160
161=cut
162
163# The individual =items of this list
164sub item {
165 my ($self,$item) = @_;
166 if(defined $item) {
167 push(@{$self->{_items}}, $item);
168 return $item;
169 }
170 else {
171 return @{$self->{_items}};
172 }
173}
174
175=item $list-E<gt>parent()
176
177Without argument, retrieves information about the parent holding this
178list, which is represented as an arbitrary scalar.
179This must have been set before by either specifying
180B<-parent> in the B<new()> method or by calling the B<parent()> method
181with a scalar argument.
182
183=cut
184
185# possibility for parsers/translators to store information about the
186# lists's parent object
187sub parent {
188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
189}
190
191=item $list-E<gt>tag()
192
193Without argument, retrieves information about the list tag, which can be
194any scalar.
195This must have been set before by either specifying
196B<-tag> in the B<new()> method or by calling the B<tag()> method
197with a scalar argument.
198
199=back
200
201=cut
202
203# possibility for parsers/translators to store information about the
204# list's object
205sub tag {
206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag};
207}
208
209#-----------------------------------------------------------------------------
210# Pod::Hyperlink
211#
212# class to manipulate POD hyperlinks (L<>)
213#-----------------------------------------------------------------------------
214
215package Pod::Hyperlink;
216
217=head2 Pod::Hyperlink
218
219B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage:
220
221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
222
223The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
224C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
225different parts of a POD hyperlink for further processing. It can also be
226used to construct hyperlinks.
227
228=over 4
229
230=item Pod::Hyperlink-E<gt>new()
231
232The B<new()> method can either be passed a set of key/value pairs or a single
233scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
234of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
235failure, the error message is stored in C<$@>.
236
237=cut
238
239use Carp;
240
241sub new {
242 my $this = shift;
243 my $class = ref($this) || $this;
244 my $self = +{};
245 bless $self, $class;
246 $self->initialize();
247 if(defined $_[0]) {
248 if(ref($_[0])) {
249 # called with a list of parameters
250 %$self = %{$_[0]};
251 $self->_construct_text();
252 }
253 else {
254 # called with L<> contents
255 return undef unless($self->parse($_[0]));
256 }
257 }
258 return $self;
259}
260
261sub initialize {
262 my $self = shift;
263 $self->{-line} ||= 'undef';
264 $self->{-file} ||= 'undef';
265 $self->{-page} ||= '';
266 $self->{-node} ||= '';
267 $self->{-alttext} ||= '';
268 $self->{-type} ||= 'undef';
269 $self->{_warnings} = [];
270}
271
272=item $link-E<gt>parse($string)
273
274This method can be used to (re)parse a (new) hyperlink, i.e. the contents
275of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
276Warnings are stored in the B<warnings> property.
277E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point
278to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
279section can simply be dropped.
280
281=cut
282
283sub parse {
284 my $self = shift;
285 local($_) = $_[0];
286 # syntax check the link and extract destination
287 my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0);
288
289 $self->{_warnings} = [];
290
291 # collapse newlines with whitespace
292 s/\s*\n+\s*/ /g;
293
294 # strip leading/trailing whitespace
295 if(s/^[\s\n]+//) {
296 $self->warning("ignoring leading whitespace in link");
297 }
298 if(s/[\s\n]+$//) {
299 $self->warning("ignoring trailing whitespace in link");
300 }
301 unless(length($_)) {
302 _invalid_link("empty link");
303 return undef;
304 }
305
306 ## Check for different possibilities. This is tedious and error-prone
307 # we match all possibilities (alttext, page, section/item)
308 #warn "DEBUG: link=$_\n";
309
310 # only page
311 # problem: a lot of people use (), or (1) or the like to indicate
312 # man page sections. But this collides with L<func()> that is supposed
313 # to point to an internal funtion...
314 my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)';
315 # page name only
316 if(m!^($page_rx)$!o) {
317 $page = $1;
318 $type = 'page';
319 }
320 # alttext, page and "section"
321 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
322 ($alttext, $page, $node) = ($1, $2, $3);
323 $type = 'section';
324 $quoted = 1; #... therefore | and / are allowed
325 }
326 # alttext and page
327 elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
328 ($alttext, $page) = ($1, $2);
329 $type = 'page';
330 }
331 # alttext and "section"
332 elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
333 ($alttext, $node) = ($1,$2);
334 $type = 'section';
335 $quoted = 1;
336 }
337 # page and "section"
338 elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
339 ($page, $node) = ($1, $2);
340 $type = 'section';
341 $quoted = 1;
342 }
343 # page and item
344 elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
345 ($page, $node) = ($1, $2);
346 $type = 'item';
347 }
348 # only "section"
349 elsif(m!^/?"(.+)"$!) {
350 $node = $1;
351 $type = 'section';
352 $quoted = 1;
353 }
354 # only item
355 elsif(m!^\s*/(.+)$!) {
356 $node = $1;
357 $type = 'item';
358 }
359 # non-standard: Hyperlink
360 elsif(m!^(\w+:[^:\s]\S*)$!i) {
361 $node = $1;
362 $type = 'hyperlink';
363 }
364 # alttext, page and item
365 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
366 ($alttext, $page, $node) = ($1, $2, $3);
367 $type = 'item';
368 }
369 # alttext and item
370 elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
371 ($alttext, $node) = ($1,$2);
372 }
373 # nonstandard: alttext and hyperlink
374 elsif(m!^(.*?)\s*[|]\s*(\w+:[^:\s]\S*)$!) {
375 ($alttext, $node) = ($1,$2);
376 $type = 'hyperlink';
377 }
378 # must be an item or a "malformed" section (without "")
379 else {
380 $node = $_;
381 $type = 'item';
382 }
383 # collapse whitespace in nodes
384 $node =~ s/\s+/ /gs;
385
386 # empty alternative text expands to node name
387 if(defined $alttext) {
388 if(!length($alttext)) {
389 $alttext = $node | $page;
390 }
391 }
392 else {
393 $alttext = '';
394 }
395
396 if($page =~ /[(]\w*[)]$/) {
397 $self->warning("(section) in '$page' deprecated");
398 }
399 if(!$quoted && $node =~ m:[|/]: && $type ne 'hyperlink') {
400 $self->warning("node '$node' contains non-escaped | or /");
401 }
402 if($alttext =~ m:[|/]:) {
403 $self->warning("alternative text '$node' contains non-escaped | or /");
404 }
405 $self->{-page} = $page;
406 $self->{-node} = $node;
407 $self->{-alttext} = $alttext;
408 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n";
409 $self->{-type} = $type;
410 $self->_construct_text();
411 1;
412}
413
414sub _construct_text {
415 my $self = shift;
416 my $alttext = $self->alttext();
417 my $type = $self->type();
418 my $section = $self->node();
419 my $page = $self->page();
420 my $page_ext = '';
421 $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
422 if($alttext) {
423 $self->{_text} = $alttext;
424 }
425 elsif($type eq 'hyperlink') {
426 $self->{_text} = $section;
427 }
428 else {
429 $self->{_text} = ($section || '') .
430 (($page && $section) ? ' in ' : '') .
431 "$page$page_ext";
432 }
433 # for being marked up later
434 # use the non-standard markers P<> and Q<>, so that the resulting
435 # text can be parsed by the translators. It's their job to put
436 # the correct hypertext around the linktext
437 if($alttext) {
438 $self->{_markup} = "Q<$alttext>";
439 }
440 elsif($type eq 'hyperlink') {
441 $self->{_markup} = "Q<$section>";
442 }
443 else {
444 $self->{_markup} = (!$section ? '' : "Q<$section>") .
445 ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : '');
446 }
447}
448
449=item $link-E<gt>markup($string)
450
451Set/retrieve the textual value of the link. This string contains special
452markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
453translator's interior sequence expansion engine to the
454formatter-specific code to highlight/activate the hyperlink. The details
455have to be implemented in the translator.
456
457=cut
458
459#' retrieve/set markuped text
460sub markup {
461 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
462}
463
464=item $link-E<gt>text()
465
466This method returns the textual representation of the hyperlink as above,
467but without markers (read only). Depending on the link type this is one of
468the following alternatives (the + and * denote the portions of the text
469that are marked up):
470
471 +perl+ L<perl>
472 *$|* in +perlvar+ L<perlvar/$|>
473 *OPTIONS* in +perldoc+ L<perldoc/"OPTIONS">
474 *DESCRIPTION* L<"DESCRIPTION">
475
476=cut
477
478# The complete link's text
479sub text {
480 $_[0]->{_text};
481}
482
483=item $link-E<gt>warning()
484
485After parsing, this method returns any warnings encountered during the
486parsing process.
487
488=cut
489
490# Set/retrieve warnings
491sub warning {
492 my $self = shift;
493 if(@_) {
494 push(@{$self->{_warnings}}, @_);
495 return @_;
496 }
497 return @{$self->{_warnings}};
498}
499
500=item $link-E<gt>file()
501
502=item $link-E<gt>line()
503
504Just simple slots for storing information about the line and the file
505the link was encountered in. Has to be filled in manually.
506
507=cut
508
509# The line in the file the link appears
510sub line {
511 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
512}
513
514# The POD file name the link appears in
515sub file {
516 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
517}
518
519=item $link-E<gt>page()
520
521This method sets or returns the POD page this link points to.
522
523=cut
524
525# The POD page the link appears on
526sub page {
527 if (@_ > 1) {
528 $_[0]->{-page} = $_[1];
529 $_[0]->_construct_text();
530 }
531 $_[0]->{-page};
532}
533
534=item $link-E<gt>node()
535
536As above, but the destination node text of the link.
537
538=cut
539
540# The link destination
541sub node {
542 if (@_ > 1) {
543 $_[0]->{-node} = $_[1];
544 $_[0]->_construct_text();
545 }
546 $_[0]->{-node};
547}
548
549=item $link-E<gt>alttext()
550
551Sets or returns an alternative text specified in the link.
552
553=cut
554
555# Potential alternative text
556sub alttext {
557 if (@_ > 1) {
558 $_[0]->{-alttext} = $_[1];
559 $_[0]->_construct_text();
560 }
561 $_[0]->{-alttext};
562}
563
564=item $link-E<gt>type()
565
566The node type, either C<section> or C<item>. As an unofficial type,
567there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
568
569=cut
570
571# The type: item or headn
572sub type {
573 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
574}
575
576=item $link-E<gt>link()
577
578Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
579
580=back
581
582=cut
583
584# The link itself
585sub link {
586 my $self = shift;
587 my $link = $self->page() || '';
588 if($self->node()) {
589 my $node = $self->node();
590 $text =~ s/\|/E<verbar>/g;
591 $text =~ s:/:E<sol>:g;
592 if($self->type() eq 'section') {
593 $link .= ($link ? '/' : '') . '"' . $node . '"';
594 }
595 elsif($self->type() eq 'hyperlink') {
596 $link = $self->node();
597 }
598 else { # item
599 $link .= '/' . $node;
600 }
601 }
602 if($self->alttext()) {
603 my $text = $self->alttext();
604 $text =~ s/\|/E<verbar>/g;
605 $text =~ s:/:E<sol>:g;
606 $link = "$text|$link";
607 }
608 $link;
609}
610
611sub _invalid_link {
612 my ($msg) = @_;
613 # this sets @_
614 #eval { die "$msg\n" };
615 #chomp $@;
616 $@ = $msg; # this seems to work, too!
617 undef;
618}
619
620#-----------------------------------------------------------------------------
621# Pod::Cache
622#
623# class to hold POD page details
624#-----------------------------------------------------------------------------
625
626package Pod::Cache;
627
628=head2 Pod::Cache
629
630B<Pod::Cache> holds information about a set of POD documents,
631especially the nodes for hyperlinks.
632The following methods are available:
633
634=over 4
635
636=item Pod::Cache-E<gt>new()
637
638Create a new cache object. This object can hold an arbitrary number of
639POD documents of class Pod::Cache::Item.
640
641=cut
642
643sub new {
644 my $this = shift;
645 my $class = ref($this) || $this;
646 my $self = [];
647 bless $self, $class;
648 return $self;
649}
650
651=item $cache-E<gt>item()
652
653Add a new item to the cache. Without arguments, this method returns a
654list of all cache elements.
655
656=cut
657
658sub item {
659 my ($self,%param) = @_;
660 if(%param) {
661 my $item = Pod::Cache::Item->new(%param);
662 push(@$self, $item);
663 return $item;
664 }
665 else {
666 return @{$self};
667 }
668}
669
670=item $cache-E<gt>find_page($name)
671
672Look for a POD document named C<$name> in the cache. Returns the
673reference to the corresponding Pod::Cache::Item object or undef if
674not found.
675
676=back
677
678=cut
679
680sub find_page {
681 my ($self,$page) = @_;
682 foreach(@$self) {
683 if($_->page() eq $page) {
684 return $_;
685 }
686 }
687 undef;
688}
689
690package Pod::Cache::Item;
691
692=head2 Pod::Cache::Item
693
694B<Pod::Cache::Item> holds information about individual POD documents,
695that can be grouped in a Pod::Cache object.
696It is intended to hold information about the hyperlink nodes of POD
697documents.
698The following methods are available:
699
700=over 4
701
702=item Pod::Cache::Item-E<gt>new()
703
704Create a new object.
705
706=cut
707
708sub new {
709 my $this = shift;
710 my $class = ref($this) || $this;
711 my %params = @_;
712 my $self = {%params};
713 bless $self, $class;
714 $self->initialize();
715 return $self;
716}
717
718sub initialize {
719 my $self = shift;
720 $self->{-nodes} = [] unless(defined $self->{-nodes});
721}
722
723=item $cacheitem-E<gt>page()
724
725Set/retrieve the POD document name (e.g. "Pod::Parser").
726
727=cut
728
729# The POD page
730sub page {
731 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
732}
733
734=item $cacheitem-E<gt>description()
735
736Set/retrieve the POD short description as found in the C<=head1 NAME>
737section.
738
739=cut
740
741# The POD description, taken out of NAME if present
742sub description {
743 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
744}
745
746=item $cacheitem-E<gt>path()
747
748Set/retrieve the POD file storage path.
749
750=cut
751
752# The file path
753sub path {
754 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
755}
756
757=item $cacheitem-E<gt>file()
758
759Set/retrieve the POD file name.
760
761=cut
762
763# The POD file name
764sub file {
765 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
766}
767
768=item $cacheitem-E<gt>nodes()
769
770Add a node (or a list of nodes) to the document's node list. Note that
771the order is kept, i.e. start with the first node and end with the last.
772If no argument is given, the current list of nodes is returned in the
773same order the nodes have been added.
774A node can be any scalar, but usually is a pair of node string and
775unique id for the C<find_node> method to work correctly.
776
777=cut
778
779# The POD nodes
780sub nodes {
781 my ($self,@nodes) = @_;
782 if(@nodes) {
783 push(@{$self->{-nodes}}, @nodes);
784 return @nodes;
785 }
786 else {
787 return @{$self->{-nodes}};
788 }
789}
790
791=item $cacheitem-E<gt>find_node($name)
792
793Look for a node or index entry named C<$name> in the object.
794Returns the unique id of the node (i.e. the second element of the array
795stored in the node arry) or undef if not found.
796
797=cut
798
799sub find_node {
800 my ($self,$node) = @_;
801 my @search;
802 push(@search, @{$self->{-nodes}}) if($self->{-nodes});
803 push(@search, @{$self->{-idx}}) if($self->{-idx});
804 foreach(@search) {
805 if($_->[0] eq $node) {
806 return $_->[1]; # id
807 }
808 }
809 undef;
810}
811
812=item $cacheitem-E<gt>idx()
813
814Add an index entry (or a list of them) to the document's index list. Note that
815the order is kept, i.e. start with the first node and end with the last.
816If no argument is given, the current list of index entries is returned in the
817same order the entries have been added.
818An index entry can be any scalar, but usually is a pair of string and
819unique id.
820
821=back
822
823=cut
824
825# The POD index entries
826sub idx {
827 my ($self,@idx) = @_;
828 if(@idx) {
829 push(@{$self->{-idx}}, @idx);
830 return @idx;
831 }
832 else {
833 return @{$self->{-idx}};
834 }
835}
836
837=head1 AUTHOR
838
839Please report bugs using L<http://rt.cpan.org>.
840
841Marek Rouchal E<lt>[email protected]<gt>, borrowing
842a lot of things from L<pod2man> and L<pod2roff> as well as other POD
843processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.
844
845=head1 SEE ALSO
846
847L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>,
848L<pod2html>
849
850=cut
851
8521;
Note: See TracBrowser for help on using the repository browser.