source: for-distributions/trunk/bin/windows/perl/lib/Pod/Checker.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago

upgrading to perl 5.8

File size: 39.7 KB
Line 
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
13$VERSION = 1.43; ## Current version of this package
14require 5.005; ## requires this Perl version or later
15
16use Pod::ParseUtils; ## for hyperlinks and lists
17
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24 use Pod::Checker;
25
26 $syntax_okay = podchecker($filepath, $outputpath, %options);
27
28 my $checker = new Pod::Checker %options;
29 $checker->parse_from_file($filepath, \*STDERR);
30
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
35indicating a file-path, or else a reference to an open filehandle.
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
39=head2 podchecker()
40
41This function can take a hash of options:
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
49
50=back
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
56Curious/ambitious users are welcome to propose additional features they wish
57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
58consistent with L<perlpod>.
59
60The following checks are currently performed:
61
62=over 4
63
64=item *
65
66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
82
83=item *
84
85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
86
87=item *
88
89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
91
92=item *
93
94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
97
98=back
99
100=head1 DIAGNOSTICS
101
102=head2 Errors
103
104=over 4
105
106=item * empty =headn
107
108A heading (C<=head1> or C<=head2>) without any text? That ain't no
109heading!
110
111=item * =over on line I<N> without closing =back
112
113The C<=over> command does not have a corresponding C<=back> before the
114next heading (C<=head1> or C<=head2>) or the end of the file.
115
116=item * =item without previous =over
117
118=item * =back without previous =over
119
120An C<=item> or C<=back> command has been found outside a
121C<=over>/C<=back> block.
122
123=item * No argument for =begin
124
125A C<=begin> command was found that is not followed by the formatter
126specification.
127
128=item * =end without =begin
129
130A standalone C<=end> command was found.
131
132=item * Nested =begin's
133
134There were at least two consecutive C<=begin> commands without
135the corresponding C<=end>. Only one C<=begin> may be active at
136a time.
137
138=item * =for without formatter specification
139
140There is no specification of the formatter after the C<=for> command.
141
142=item * unresolved internal link I<NAME>
143
144The given link to I<NAME> does not have a matching node in the current
145POD. This also happend when a single word node name is not enclosed in
146C<"">.
147
148=item * Unknown command "I<CMD>"
149
150An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
152C<=for>, C<=pod>, C<=cut>
153
154=item * Unknown interior-sequence "I<SEQ>"
155
156An invalid markup command has been encountered. Valid are:
157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
159C<ZE<lt>E<gt>>
160
161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
162
163Two nested identical markup commands have been found. Generally this
164does not make sense.
165
166=item * garbled entity I<STRING>
167
168The I<STRING> found cannot be interpreted as a character entity.
169
170=item * Entity number out of range
171
172An entity specified by number (dec, hex, oct) is out of range (1-255).
173
174=item * malformed link LE<lt>E<gt>
175
176The link found cannot be parsed because it does not conform to the
177syntax described in L<perlpod>.
178
179=item * nonempty ZE<lt>E<gt>
180
181The C<ZE<lt>E<gt>> sequence is supposed to be empty.
182
183=item * empty XE<lt>E<gt>
184
185The index entry specified contains nothing but whitespace.
186
187=item * Spurious text after =pod / =cut
188
189The commands C<=pod> and C<=cut> do not take any arguments.
190
191=item * Spurious character(s) after =back
192
193The C<=back> command does not take any arguments.
194
195=back
196
197=head2 Warnings
198
199These may not necessarily cause trouble, but indicate mediocre style.
200
201=over 4
202
203=item * multiple occurrence of link target I<name>
204
205The POD file has some C<=item> and/or C<=head> commands that have
206the same text. Potential hyperlinks to such a text cannot be unique then.
207This warning is printed only with warning level greater than one.
208
209=item * line containing nothing but whitespace in paragraph
210
211There is some whitespace on a seemingly empty line. POD is very sensitive
212to such things, so this is flagged. B<vi> users switch on the B<list>
213option to avoid this problem.
214
215=begin _disabled_
216
217=item * file does not start with =head
218
219The file starts with a different POD directive than head.
220This is most probably something you do not want.
221
222=end _disabled_
223
224=item * previous =item has no contents
225
226There is a list C<=item> right above the flagged line that has no
227text contents. You probably want to delete empty items.
228
229=item * preceding non-item paragraph(s)
230
231A list introduced by C<=over> starts with a text or verbatim paragraph,
232but continues with C<=item>s. Move the non-item paragraph out of the
233C<=over>/C<=back> block.
234
235=item * =item type mismatch (I<one> vs. I<two>)
236
237A list started with e.g. a bulletted C<=item> and continued with a
238numbered one. This is obviously inconsistent. For most translators the
239type of the I<first> C<=item> determines the type of the list.
240
241=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
242
243Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
244can potentially cause errors as they could be misinterpreted as
245markup commands. This is only printed when the -warnings level is
246greater than 1.
247
248=item * Unknown entity
249
250A character entity was found that does not belong to the standard
251ISO set or the POD specials C<verbar> and C<sol>.
252
253=item * No items in =over
254
255The list opened with C<=over> does not contain any items.
256
257=item * No argument for =item
258
259C<=item> without any parameters is deprecated. It should either be followed
260by C<*> to indicate an unordered list, by a number (optionally followed
261by a dot) to indicate an ordered (numbered) list or simple text for a
262definition list.
263
264=item * empty section in previous paragraph
265
266The previous section (introduced by a C<=head> command) does not contain
267any text. This usually indicates that something is missing. Note: A
268C<=head1> followed immediately by C<=head2> does not trigger this warning.
269
270=item * Verbatim paragraph in NAME section
271
272The NAME section (C<=head1 NAME>) should consist of a single paragraph
273with the script/module name, followed by a dash `-' and a very short
274description of what the thing is good for.
275
276=item * =headI<n> without preceding higher level
277
278For example if there is a C<=head2> in the POD file prior to a
279C<=head1>.
280
281=back
282
283=head2 Hyperlinks
284
285There are some warnings wrt. malformed hyperlinks.
286
287=over 4
288
289=item * ignoring leading/trailing whitespace in link
290
291There is whitespace at the beginning or the end of the contents of
292LE<lt>...E<gt>.
293
294=item * (section) in '$page' deprecated
295
296There is a section detected in the page name of LE<lt>...E<gt>, e.g.
297C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
298Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
299to expand this to appropriate code. For links to (builtin) functions,
300please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
301
302=item * alternative text/node '%s' contains non-escaped | or /
303
304The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
305Although the hyperlink parser does its best to determine which "/" is
306text and which is a delimiter in case of doubt, one ought to escape
307these literal characters like this:
308
309 / E<sol>
310 | E<verbar>
311
312=back
313
314=head1 RETURN VALUE
315
316B<podchecker> returns the number of POD syntax errors found or -1 if
317there were no POD commands at all found in the file.
318
319=head1 EXAMPLES
320
321See L</SYNOPSIS>
322
323=head1 INTERFACE
324
325While checking, this module collects document properties, e.g. the nodes
326for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
327POD translators can use this feature to syntax-check and get the nodes in
328a first pass before actually starting to convert. This is expensive in terms
329of execution time, but allows for very robust conversions.
330
331Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
332method to print errors and warnings. The summary output (e.g.
333"Pod syntax OK") has been dropped from the module and has been included in
334B<podchecker> (the script). This allows users of B<Pod::Checker> to
335control completely the output behaviour. Users of B<podchecker> (the script)
336get the well-known behaviour.
337
338=cut
339
340#############################################################################
341
342use strict;
343#use diagnostics;
344use Carp;
345use Exporter;
346use Pod::Parser;
347
348use vars qw(@ISA @EXPORT);
349@ISA = qw(Pod::Parser);
350@EXPORT = qw(&podchecker);
351
352use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
353
354my %VALID_COMMANDS = (
355 'pod' => 1,
356 'cut' => 1,
357 'head1' => 1,
358 'head2' => 1,
359 'head3' => 1,
360 'head4' => 1,
361 'over' => 1,
362 'back' => 1,
363 'item' => 1,
364 'for' => 1,
365 'begin' => 1,
366 'end' => 1,
367);
368
369my %VALID_SEQUENCES = (
370 'I' => 1,
371 'B' => 1,
372 'S' => 1,
373 'C' => 1,
374 'L' => 1,
375 'F' => 1,
376 'X' => 1,
377 'Z' => 1,
378 'E' => 1,
379);
380
381# stolen from HTML::Entities
382my %ENTITIES = (
383 # Some normal chars that have special meaning in SGML context
384 amp => '&', # ampersand
385'gt' => '>', # greater than
386'lt' => '<', # less than
387 quot => '"', # double quote
388
389 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
390 AElig => 'Æ', # capital AE diphthong (ligature)
391 Aacute => 'Á', # capital A, acute accent
392 Acirc => 'Â', # capital A, circumflex accent
393 Agrave => 'À', # capital A, grave accent
394 Aring => 'Å', # capital A, ring
395 Atilde => 'Ã', # capital A, tilde
396 Auml => 'Ä', # capital A, dieresis or umlaut mark
397 Ccedil => 'Ç', # capital C, cedilla
398 ETH => 'Ð', # capital Eth, Icelandic
399 Eacute => 'É', # capital E, acute accent
400 Ecirc => 'Ê', # capital E, circumflex accent
401 Egrave => 'È', # capital E, grave accent
402 Euml => 'Ë', # capital E, dieresis or umlaut mark
403 Iacute => 'Í', # capital I, acute accent
404 Icirc => 'Î', # capital I, circumflex accent
405 Igrave => 'Ì', # capital I, grave accent
406 Iuml => 'Ï', # capital I, dieresis or umlaut mark
407 Ntilde => 'Ñ', # capital N, tilde
408 Oacute => 'Ó', # capital O, acute accent
409 Ocirc => 'Ô', # capital O, circumflex accent
410 Ograve => 'Ò', # capital O, grave accent
411 Oslash => 'Ø', # capital O, slash
412 Otilde => 'Õ', # capital O, tilde
413 Ouml => 'Ö', # capital O, dieresis or umlaut mark
414 THORN => 'Þ', # capital THORN, Icelandic
415 Uacute => 'Ú', # capital U, acute accent
416 Ucirc => 'Û', # capital U, circumflex accent
417 Ugrave => 'Ù', # capital U, grave accent
418 Uuml => 'Ü', # capital U, dieresis or umlaut mark
419 Yacute => 'Ý', # capital Y, acute accent
420 aacute => 'á', # small a, acute accent
421 acirc => 'â', # small a, circumflex accent
422 aelig => 'æ', # small ae diphthong (ligature)
423 agrave => 'à', # small a, grave accent
424 aring => 'å', # small a, ring
425 atilde => 'ã', # small a, tilde
426 auml => 'ä', # small a, dieresis or umlaut mark
427 ccedil => 'ç', # small c, cedilla
428 eacute => 'é', # small e, acute accent
429 ecirc => 'ê', # small e, circumflex accent
430 egrave => 'è', # small e, grave accent
431 eth => 'ð', # small eth, Icelandic
432 euml => 'ë', # small e, dieresis or umlaut mark
433 iacute => 'í', # small i, acute accent
434 icirc => 'î', # small i, circumflex accent
435 igrave => 'ì', # small i, grave accent
436 iuml => 'ï', # small i, dieresis or umlaut mark
437 ntilde => 'ñ', # small n, tilde
438 oacute => 'ó', # small o, acute accent
439 ocirc => 'ô', # small o, circumflex accent
440 ograve => 'ò', # small o, grave accent
441 oslash => 'ø', # small o, slash
442 otilde => 'õ', # small o, tilde
443 ouml => 'ö', # small o, dieresis or umlaut mark
444 szlig => 'ß', # small sharp s, German (sz ligature)
445 thorn => 'þ', # small thorn, Icelandic
446 uacute => 'ú', # small u, acute accent
447 ucirc => 'û', # small u, circumflex accent
448 ugrave => 'ù', # small u, grave accent
449 uuml => 'ü', # small u, dieresis or umlaut mark
450 yacute => 'ý', # small y, acute accent
451 yuml => 'ÿ', # small y, dieresis or umlaut mark
452
453 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
454 copy => '©', # copyright sign
455 reg => '®', # registered sign
456 nbsp => "\240", # non breaking space
457
458 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
459 iexcl => '¡',
460 cent => '¢',
461 pound => '£',
462 curren => '€',
463 yen => '¥',
464 brvbar => 'Š',
465 sect => '§',
466 uml => 'š',
467 ordf => 'ª',
468 laquo => '«',
469'not' => '¬', # not is a keyword in perl
470 shy => '­',
471 macr => '¯',
472 deg => '°',
473 plusmn => '±',
474 sup1 => '¹',
475 sup2 => '²',
476 sup3 => '³',
477 acute => 'Ž',
478 micro => 'µ',
479 para => '¶',
480 middot => '·',
481 cedil => 'ž',
482 ordm => 'º',
483 raquo => '»',
484 frac14 => 'Œ',
485 frac12 => 'œ',
486 frac34 => 'Ÿ',
487 iquest => '¿',
488'times' => '×', # times is a keyword in perl
489 divide => '÷',
490
491# some POD special entities
492 verbar => '|',
493 sol => '/'
494);
495
496##---------------------------------------------------------------------------
497
498##---------------------------------
499## Function definitions begin here
500##---------------------------------
501
502sub podchecker( $ ; $ % ) {
503 my ($infile, $outfile, %options) = @_;
504 local $_;
505
506 ## Set defaults
507 $infile ||= \*STDIN;
508 $outfile ||= \*STDERR;
509
510 ## Now create a pod checker
511 my $checker = new Pod::Checker(%options);
512
513 ## Now check the pod document for errors
514 $checker->parse_from_file($infile, $outfile);
515
516 ## Return the number of errors found
517 return $checker->num_errors();
518}
519
520##---------------------------------------------------------------------------
521
522##-------------------------------
523## Method definitions begin here
524##-------------------------------
525
526##################################
527
528=over 4
529
530=item C<Pod::Checker-E<gt>new( %options )>
531
532Return a reference to a new Pod::Checker object that inherits from
533Pod::Parser and is used for calling the required methods later. The
534following options are recognized:
535
536C<-warnings =E<gt> num>
537 Print warnings if C<num> is true. The higher the value of C<num>,
538the more warnings are printed. Currently there are only levels 1 and 2.
539
540C<-quiet =E<gt> num>
541 If C<num> is true, do not print any errors/warnings. This is useful
542when Pod::Checker is used to munge POD code into plain text from within
543POD formatters.
544
545=cut
546
547## sub new {
548## my $this = shift;
549## my $class = ref($this) || $this;
550## my %params = @_;
551## my $self = {%params};
552## bless $self, $class;
553## $self->initialize();
554## return $self;
555## }
556
557sub initialize {
558 my $self = shift;
559 ## Initialize number of errors, and setup an error function to
560 ## increment this number and then print to the designated output.
561 $self->{_NUM_ERRORS} = 0;
562 $self->{_NUM_WARNINGS} = 0;
563 $self->{-quiet} ||= 0;
564 # set the error handling subroutine
565 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
566 $self->{_commands} = 0; # total number of POD commands encountered
567 $self->{_list_stack} = []; # stack for nested lists
568 $self->{_have_begin} = ''; # stores =begin
569 $self->{_links} = []; # stack for internal hyperlinks
570 $self->{_nodes} = []; # stack for =head/=item nodes
571 $self->{_index} = []; # text in X<>
572 # print warnings?
573 $self->{-warnings} = 1 unless(defined $self->{-warnings});
574 $self->{_current_head1} = ''; # the current =head1 block
575 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
576}
577
578##################################
579
580=item C<$checker-E<gt>poderror( @args )>
581
582=item C<$checker-E<gt>poderror( {%opts}, @args )>
583
584Internal method for printing errors and warnings. If no options are
585given, simply prints "@_". The following options are recognized and used
586to form the output:
587
588 -msg
589
590A message to print prior to C<@args>.
591
592 -line
593
594The line number the error occurred in.
595
596 -file
597
598The file (name) the error occurred in.
599
600 -severity
601
602The error level, should be 'WARNING' or 'ERROR'.
603
604=cut
605
606# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
607sub poderror {
608 my $self = shift;
609 my %opts = (ref $_[0]) ? %{shift()} : ();
610
611 ## Retrieve options
612 chomp( my $msg = ($opts{-msg} || "")."@_" );
613 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
614 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
615 unless (exists $opts{-severity}) {
616 ## See if can find severity in message prefix
617 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
618 }
619 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
620
621 ## Increment error count and print message "
622 ++($self->{_NUM_ERRORS})
623 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
624 ++($self->{_NUM_WARNINGS})
625 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
626 unless($self->{-quiet}) {
627 my $out_fh = $self->output_handle() || \*STDERR;
628 print $out_fh ($severity, $msg, $line, $file, "\n")
629 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
630 }
631}
632
633##################################
634
635=item C<$checker-E<gt>num_errors()>
636
637Set (if argument specified) and retrieve the number of errors found.
638
639=cut
640
641sub num_errors {
642 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
643}
644
645##################################
646
647=item C<$checker-E<gt>num_warnings()>
648
649Set (if argument specified) and retrieve the number of warnings found.
650
651=cut
652
653sub num_warnings {
654 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
655}
656
657##################################
658
659=item C<$checker-E<gt>name()>
660
661Set (if argument specified) and retrieve the canonical name of POD as
662found in the C<=head1 NAME> section.
663
664=cut
665
666sub name {
667 return (@_ > 1 && $_[1]) ?
668 ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
669}
670
671##################################
672
673=item C<$checker-E<gt>node()>
674
675Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
676and C<=item>) of the current POD. The nodes are returned in the order of
677their occurrence. They consist of plain text, each piece of whitespace is
678collapsed to a single blank.
679
680=cut
681
682sub node {
683 my ($self,$text) = @_;
684 if(defined $text) {
685 $text =~ s/\s+$//s; # strip trailing whitespace
686 $text =~ s/\s+/ /gs; # collapse whitespace
687 # add node, order important!
688 push(@{$self->{_nodes}}, $text);
689 # keep also a uniqueness counter
690 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
691 return $text;
692 }
693 @{$self->{_nodes}};
694}
695
696##################################
697
698=item C<$checker-E<gt>idx()>
699
700Add (if argument specified) and retrieve the index entries (as defined by
701C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
702of whitespace is collapsed to a single blank.
703
704=cut
705
706# set/return index entries of current POD
707sub idx {
708 my ($self,$text) = @_;
709 if(defined $text) {
710 $text =~ s/\s+$//s; # strip trailing whitespace
711 $text =~ s/\s+/ /gs; # collapse whitespace
712 # add node, order important!
713 push(@{$self->{_index}}, $text);
714 # keep also a uniqueness counter
715 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
716 return $text;
717 }
718 @{$self->{_index}};
719}
720
721##################################
722
723=item C<$checker-E<gt>hyperlink()>
724
725Add (if argument specified) and retrieve the hyperlinks (as defined by
726C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
727number and C<Pod::Hyperlink> object.
728
729=back
730
731=cut
732
733# set/return hyperlinks of the current POD
734sub hyperlink {
735 my $self = shift;
736 if($_[0]) {
737 push(@{$self->{_links}}, $_[0]);
738 return $_[0];
739 }
740 @{$self->{_links}};
741}
742
743## overrides for Pod::Parser
744
745sub end_pod {
746 ## Do some final checks and
747 ## print the number of errors found
748 my $self = shift;
749 my $infile = $self->input_file();
750
751 if(@{$self->{_list_stack}}) {
752 my $list;
753 while(($list = $self->_close_list('EOF',$infile)) &&
754 $list->indent() ne 'auto') {
755 $self->poderror({ -line => 'EOF', -file => $infile,
756 -severity => 'ERROR', -msg => "=over on line " .
757 $list->start() . " without closing =back" }); #"
758 }
759 }
760
761 # check validity of document internal hyperlinks
762 # first build the node names from the paragraph text
763 my %nodes;
764 foreach($self->node()) {
765 $nodes{$_} = 1;
766 if(/^(\S+)\s+\S/) {
767 # we have more than one word. Use the first as a node, too.
768 # This is used heavily in perlfunc.pod
769 $nodes{$1} ||= 2; # derived node
770 }
771 }
772 foreach($self->idx()) {
773 $nodes{$_} = 3; # index node
774 }
775 foreach($self->hyperlink()) {
776 my ($line,$link) = @$_;
777 # _TODO_ what if there is a link to the page itself by the name,
778 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
779 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
780 my $node = $self->_check_ptree($self->parse_text($link->node(),
781 $line), $line, $infile, 'L');
782 if($node && !$nodes{$node}) {
783 $self->poderror({ -line => $line || '', -file => $infile,
784 -severity => 'ERROR',
785 -msg => "unresolved internal link '$node'"});
786 }
787 }
788 }
789
790 # check the internal nodes for uniqueness. This pertains to
791 # =headX, =item and X<...>
792 if($self->{-warnings} && $self->{-warnings}>1) {
793 foreach(grep($self->{_unique_nodes}->{$_} > 1,
794 keys %{$self->{_unique_nodes}})) {
795 $self->poderror({ -line => '-', -file => $infile,
796 -severity => 'WARNING',
797 -msg => "multiple occurrence of link target '$_'"});
798 }
799 }
800
801 # no POD found here
802 $self->num_errors(-1) if($self->{_commands} == 0);
803}
804
805# check a POD command directive
806sub command {
807 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
808 my ($file, $line) = $pod_para->file_line;
809 ## Check the command syntax
810 my $arg; # this will hold the command argument
811 if (! $VALID_COMMANDS{$cmd}) {
812 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
813 -msg => "Unknown command '$cmd'" });
814 }
815 else { # found a valid command
816 $self->{_commands}++; # delete this line if below is enabled again
817
818 ##### following check disabled due to strong request
819 #if(!$self->{_commands}++ && $cmd !~ /^head/) {
820 # $self->poderror({ -line => $line, -file => $file,
821 # -severity => 'WARNING',
822 # -msg => "file does not start with =head" });
823 #}
824
825 # check syntax of particular command
826 if($cmd eq 'over') {
827 # check for argument
828 $arg = $self->interpolate_and_check($paragraph, $line,$file);
829 my $indent = 4; # default
830 if($arg && $arg =~ /^\s*(\d+)\s*$/) {
831 $indent = $1;
832 }
833 # start a new list
834 $self->_open_list($indent,$line,$file);
835 }
836 elsif($cmd eq 'item') {
837 # are we in a list?
838 unless(@{$self->{_list_stack}}) {
839 $self->poderror({ -line => $line, -file => $file,
840 -severity => 'ERROR',
841 -msg => "=item without previous =over" });
842 # auto-open in case we encounter many more
843 $self->_open_list('auto',$line,$file);
844 }
845 my $list = $self->{_list_stack}->[0];
846 # check whether the previous item had some contents
847 if(defined $self->{_list_item_contents} &&
848 $self->{_list_item_contents} == 0) {
849 $self->poderror({ -line => $line, -file => $file,
850 -severity => 'WARNING',
851 -msg => "previous =item has no contents" });
852 }
853 if($list->{_has_par}) {
854 $self->poderror({ -line => $line, -file => $file,
855 -severity => 'WARNING',
856 -msg => "preceding non-item paragraph(s)" });
857 delete $list->{_has_par};
858 }
859 # check for argument
860 $arg = $self->interpolate_and_check($paragraph, $line, $file);
861 if($arg && $arg =~ /(\S+)/) {
862 $arg =~ s/[\s\n]+$//;
863 my $type;
864 if($arg =~ /^[*]\s*(\S*.*)/) {
865 $type = 'bullet';
866 $self->{_list_item_contents} = $1 ? 1 : 0;
867 $arg = $1;
868 }
869 elsif($arg =~ /^\d+\.?\s*(\S*)/) {
870 $type = 'number';
871 $self->{_list_item_contents} = $1 ? 1 : 0;
872 $arg = $1;
873 }
874 else {
875 $type = 'definition';
876 $self->{_list_item_contents} = 1;
877 }
878 my $first = $list->type();
879 if($first && $first ne $type) {
880 $self->poderror({ -line => $line, -file => $file,
881 -severity => 'WARNING',
882 -msg => "=item type mismatch ('$first' vs. '$type')"});
883 }
884 else { # first item
885 $list->type($type);
886 }
887 }
888 else {
889 $self->poderror({ -line => $line, -file => $file,
890 -severity => 'WARNING',
891 -msg => "No argument for =item" });
892 $arg = ' '; # empty
893 $self->{_list_item_contents} = 0;
894 }
895 # add this item
896 $list->item($arg);
897 # remember this node
898 $self->node($arg);
899 }
900 elsif($cmd eq 'back') {
901 # check if we have an open list
902 unless(@{$self->{_list_stack}}) {
903 $self->poderror({ -line => $line, -file => $file,
904 -severity => 'ERROR',
905 -msg => "=back without previous =over" });
906 }
907 else {
908 # check for spurious characters
909 $arg = $self->interpolate_and_check($paragraph, $line,$file);
910 if($arg && $arg =~ /\S/) {
911 $self->poderror({ -line => $line, -file => $file,
912 -severity => 'ERROR',
913 -msg => "Spurious character(s) after =back" });
914 }
915 # close list
916 my $list = $self->_close_list($line,$file);
917 # check for empty lists
918 if(!$list->item() && $self->{-warnings}) {
919 $self->poderror({ -line => $line, -file => $file,
920 -severity => 'WARNING',
921 -msg => "No items in =over (at line " .
922 $list->start() . ") / =back list"}); #"
923 }
924 }
925 }
926 elsif($cmd =~ /^head(\d+)/) {
927 my $hnum = $1;
928 $self->{"_have_head_$hnum"}++; # count head types
929 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
930 $self->poderror({ -line => $line, -file => $file,
931 -severity => 'WARNING',
932 -msg => "=head$hnum without preceding higher level"});
933 }
934 # check whether the previous =head section had some contents
935 if(defined $self->{_commands_in_head} &&
936 $self->{_commands_in_head} == 0 &&
937 defined $self->{_last_head} &&
938 $self->{_last_head} >= $hnum) {
939 $self->poderror({ -line => $line, -file => $file,
940 -severity => 'WARNING',
941 -msg => "empty section in previous paragraph"});
942 }
943 $self->{_commands_in_head} = -1;
944 $self->{_last_head} = $hnum;
945 # check if there is an open list
946 if(@{$self->{_list_stack}}) {
947 my $list;
948 while(($list = $self->_close_list($line,$file)) &&
949 $list->indent() ne 'auto') {
950 $self->poderror({ -line => $line, -file => $file,
951 -severity => 'ERROR',
952 -msg => "=over on line ". $list->start() .
953 " without closing =back (at $cmd)" });
954 }
955 }
956 # remember this node
957 $arg = $self->interpolate_and_check($paragraph, $line,$file);
958 $arg =~ s/[\s\n]+$//s;
959 $self->node($arg);
960 unless(length($arg)) {
961 $self->poderror({ -line => $line, -file => $file,
962 -severity => 'ERROR',
963 -msg => "empty =$cmd"});
964 }
965 if($cmd eq 'head1') {
966 $self->{_current_head1} = $arg;
967 } else {
968 $self->{_current_head1} = '';
969 }
970 }
971 elsif($cmd eq 'begin') {
972 if($self->{_have_begin}) {
973 # already have a begin
974 $self->poderror({ -line => $line, -file => $file,
975 -severity => 'ERROR',
976 -msg => "Nested =begin's (first at line " .
977 $self->{_have_begin} . ")"});
978 }
979 else {
980 # check for argument
981 $arg = $self->interpolate_and_check($paragraph, $line,$file);
982 unless($arg && $arg =~ /(\S+)/) {
983 $self->poderror({ -line => $line, -file => $file,
984 -severity => 'ERROR',
985 -msg => "No argument for =begin"});
986 }
987 # remember the =begin
988 $self->{_have_begin} = "$line:$1";
989 }
990 }
991 elsif($cmd eq 'end') {
992 if($self->{_have_begin}) {
993 # close the existing =begin
994 $self->{_have_begin} = '';
995 # check for spurious characters
996 $arg = $self->interpolate_and_check($paragraph, $line,$file);
997 # the closing argument is optional
998 #if($arg && $arg =~ /\S/) {
999 # $self->poderror({ -line => $line, -file => $file,
1000 # -severity => 'WARNING',
1001 # -msg => "Spurious character(s) after =end" });
1002 #}
1003 }
1004 else {
1005 # don't have a matching =begin
1006 $self->poderror({ -line => $line, -file => $file,
1007 -severity => 'ERROR',
1008 -msg => "=end without =begin" });
1009 }
1010 }
1011 elsif($cmd eq 'for') {
1012 unless($paragraph =~ /\s*(\S+)\s*/) {
1013 $self->poderror({ -line => $line, -file => $file,
1014 -severity => 'ERROR',
1015 -msg => "=for without formatter specification" });
1016 }
1017 $arg = ''; # do not expand paragraph below
1018 }
1019 elsif($cmd =~ /^(pod|cut)$/) {
1020 # check for argument
1021 $arg = $self->interpolate_and_check($paragraph, $line,$file);
1022 if($arg && $arg =~ /(\S+)/) {
1023 $self->poderror({ -line => $line, -file => $file,
1024 -severity => 'ERROR',
1025 -msg => "Spurious text after =$cmd"});
1026 }
1027 }
1028 $self->{_commands_in_head}++;
1029 ## Check the interior sequences in the command-text
1030 $self->interpolate_and_check($paragraph, $line,$file)
1031 unless(defined $arg);
1032 }
1033}
1034
1035sub _open_list
1036{
1037 my ($self,$indent,$line,$file) = @_;
1038 my $list = Pod::List->new(
1039 -indent => $indent,
1040 -start => $line,
1041 -file => $file);
1042 unshift(@{$self->{_list_stack}}, $list);
1043 undef $self->{_list_item_contents};
1044 $list;
1045}
1046
1047sub _close_list
1048{
1049 my ($self,$line,$file) = @_;
1050 my $list = shift(@{$self->{_list_stack}});
1051 if(defined $self->{_list_item_contents} &&
1052 $self->{_list_item_contents} == 0) {
1053 $self->poderror({ -line => $line, -file => $file,
1054 -severity => 'WARNING',
1055 -msg => "previous =item has no contents" });
1056 }
1057 undef $self->{_list_item_contents};
1058 $list;
1059}
1060
1061# process a block of some text
1062sub interpolate_and_check {
1063 my ($self, $paragraph, $line, $file) = @_;
1064 ## Check the interior sequences in the command-text
1065 # and return the text
1066 $self->_check_ptree(
1067 $self->parse_text($paragraph,$line), $line, $file, '');
1068}
1069
1070sub _check_ptree {
1071 my ($self,$ptree,$line,$file,$nestlist) = @_;
1072 local($_);
1073 my $text = '';
1074 # process each node in the parse tree
1075 foreach(@$ptree) {
1076 # regular text chunk
1077 unless(ref) {
1078 # count the unescaped angle brackets
1079 # complain only when warning level is greater than 1
1080 if($self->{-warnings} && $self->{-warnings}>1) {
1081 my $count;
1082 if($count = tr/<>/<>/) {
1083 $self->poderror({ -line => $line, -file => $file,
1084 -severity => 'WARNING',
1085 -msg => "$count unescaped <> in paragraph" });
1086 }
1087 }
1088 $text .= $_;
1089 next;
1090 }
1091 # have an interior sequence
1092 my $cmd = $_->cmd_name();
1093 my $contents = $_->parse_tree();
1094 ($file,$line) = $_->file_line();
1095 # check for valid tag
1096 if (! $VALID_SEQUENCES{$cmd}) {
1097 $self->poderror({ -line => $line, -file => $file,
1098 -severity => 'ERROR',
1099 -msg => qq(Unknown interior-sequence '$cmd')});
1100 # expand it anyway
1101 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1102 next;
1103 }
1104 if($nestlist =~ /$cmd/) {
1105 $self->poderror({ -line => $line, -file => $file,
1106 -severity => 'WARNING',
1107 -msg => "nested commands $cmd<...$cmd<...>...>"});
1108 # _TODO_ should we add the contents anyway?
1109 # expand it anyway, see below
1110 }
1111 if($cmd eq 'E') {
1112 # preserve entities
1113 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1114 $self->poderror({ -line => $line, -file => $file,
1115 -severity => 'ERROR',
1116 -msg => "garbled entity " . $_->raw_text()});
1117 next;
1118 }
1119 my $ent = $$contents[0];
1120 my $val;
1121 if($ent =~ /^0x[0-9a-f]+$/i) {
1122 # hexadec entity
1123 $val = hex($ent);
1124 }
1125 elsif($ent =~ /^0\d+$/) {
1126 # octal
1127 $val = oct($ent);
1128 }
1129 elsif($ent =~ /^\d+$/) {
1130 # numeric entity
1131 $val = $ent;
1132 }
1133 if(defined $val) {
1134 if($val>0 && $val<256) {
1135 $text .= chr($val);
1136 }
1137 else {
1138 $self->poderror({ -line => $line, -file => $file,
1139 -severity => 'ERROR',
1140 -msg => "Entity number out of range " . $_->raw_text()});
1141 }
1142 }
1143 elsif($ENTITIES{$ent}) {
1144 # known ISO entity
1145 $text .= $ENTITIES{$ent};
1146 }
1147 else {
1148 $self->poderror({ -line => $line, -file => $file,
1149 -severity => 'WARNING',
1150 -msg => "Unknown entity " . $_->raw_text()});
1151 $text .= "E<$ent>";
1152 }
1153 }
1154 elsif($cmd eq 'L') {
1155 # try to parse the hyperlink
1156 my $link = Pod::Hyperlink->new($contents->raw_text());
1157 unless(defined $link) {
1158 $self->poderror({ -line => $line, -file => $file,
1159 -severity => 'ERROR',
1160 -msg => "malformed link " . $_->raw_text() ." : $@"});
1161 next;
1162 }
1163 $link->line($line); # remember line
1164 if($self->{-warnings}) {
1165 foreach my $w ($link->warning()) {
1166 $self->poderror({ -line => $line, -file => $file,
1167 -severity => 'WARNING',
1168 -msg => $w });
1169 }
1170 }
1171 # check the link text
1172 $text .= $self->_check_ptree($self->parse_text($link->text(),
1173 $line), $line, $file, "$nestlist$cmd");
1174 # remember link
1175 $self->hyperlink([$line,$link]);
1176 }
1177 elsif($cmd =~ /[BCFIS]/) {
1178 # add the guts
1179 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1180 }
1181 elsif($cmd eq 'Z') {
1182 if(length($contents->raw_text())) {
1183 $self->poderror({ -line => $line, -file => $file,
1184 -severity => 'ERROR',
1185 -msg => "Nonempty Z<>"});
1186 }
1187 }
1188 elsif($cmd eq 'X') {
1189 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1190 if($idx =~ /^\s*$/s) {
1191 $self->poderror({ -line => $line, -file => $file,
1192 -severity => 'ERROR',
1193 -msg => "Empty X<>"});
1194 }
1195 else {
1196 # remember this node
1197 $self->idx($idx);
1198 }
1199 }
1200 else {
1201 # not reached
1202 die "internal error";
1203 }
1204 }
1205 $text;
1206}
1207
1208# process a block of verbatim text
1209sub verbatim {
1210 ## Nothing particular to check
1211 my ($self, $paragraph, $line_num, $pod_para) = @_;
1212
1213 $self->_preproc_par($paragraph);
1214
1215 if($self->{_current_head1} eq 'NAME') {
1216 my ($file, $line) = $pod_para->file_line;
1217 $self->poderror({ -line => $line, -file => $file,
1218 -severity => 'WARNING',
1219 -msg => 'Verbatim paragraph in NAME section' });
1220 }
1221}
1222
1223# process a block of regular text
1224sub textblock {
1225 my ($self, $paragraph, $line_num, $pod_para) = @_;
1226 my ($file, $line) = $pod_para->file_line;
1227
1228 $self->_preproc_par($paragraph);
1229
1230 # skip this paragraph if in a =begin block
1231 unless($self->{_have_begin}) {
1232 my $block = $self->interpolate_and_check($paragraph, $line,$file);
1233 if($self->{_current_head1} eq 'NAME') {
1234 if($block =~ /^\s*(\S+?)\s*[,-]/) {
1235 # this is the canonical name
1236 $self->{-name} = $1 unless(defined $self->{-name});
1237 }
1238 }
1239 }
1240}
1241
1242sub _preproc_par
1243{
1244 my $self = shift;
1245 $_[0] =~ s/[\s\n]+$//;
1246 if($_[0]) {
1247 $self->{_commands_in_head}++;
1248 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1249 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1250 $self->{_list_stack}->[0]->{_has_par} = 1;
1251 }
1252 }
1253}
1254
12551;
1256
1257__END__
1258
1259=head1 AUTHOR
1260
1261Please report bugs using L<http://rt.cpan.org>.
1262
1263Brad Appleton E<lt>[email protected]<gt> (initial version),
1264Marek Rouchal E<lt>[email protected]<gt>
1265
1266Based on code for B<Pod::Text::pod2text()> written by
1267Tom Christiansen E<lt>[email protected]<gt>
1268
1269=cut
1270
Note: See TracBrowser for help on using the repository browser.