Last change
on this file since 9156 was 1972, checked in by jmt14, 23 years ago |
* empty log message *
|
-
Property svn:executable
set to
*
-
Property svn:keywords
set to
Author Date Id Revision
|
File size:
953 bytes
|
Rev | Line | |
---|
[1972] | 1 | #!/usr/bin/perl -w
|
---|
| 2 |
|
---|
| 3 | # convert an html file to a text file
|
---|
| 4 |
|
---|
| 5 | # Version
|
---|
| 6 | # 1 1999 Aug 24 First version. Compensates for files Lynx cannot parse.
|
---|
| 7 | # 1.1 1999 Aug 24 Instead of special cases, put a time limit on lynx.
|
---|
| 8 |
|
---|
| 9 | die unless (-e "$ARGV[0]");
|
---|
| 10 | $filename = $ARGV[0];
|
---|
| 11 |
|
---|
| 12 | # Lynx can't handle framesets. Sorry.
|
---|
| 13 | #$frameset = `grep "<FRAMESET" $filename`;
|
---|
| 14 | #exit if ($frameset =~ /./);
|
---|
| 15 |
|
---|
| 16 | # Lynx can't handle files with no body. Sorry.
|
---|
| 17 | #$size = `wc $filename`;
|
---|
| 18 | #($lines, $words, $chars) = $size =~ /^\s+(\d+)\s+(\d+)\s+(\d+)/;
|
---|
| 19 | #exit unless ($lines && $words && $chars);
|
---|
| 20 | #exit if ($lines < 10);
|
---|
| 21 | #exit if ($words < 10);
|
---|
| 22 | #exit if ($chars < 10);
|
---|
| 23 |
|
---|
| 24 | # convert the html file to text with lynx
|
---|
| 25 | `ulimit -t 300; lynx -force_html -nolist -dump $filename > $filename.$$`;
|
---|
| 26 |
|
---|
| 27 | open(IN, "$filename.$$");
|
---|
| 28 | while (<IN>) {
|
---|
| 29 |
|
---|
| 30 | # remove the [IMAGE], [LINK], and [INLINE] markers
|
---|
| 31 | s/\[INLINE\]/. /g;
|
---|
| 32 | s/\[IMAGE\]/. /g;
|
---|
| 33 | s/\[LINK\]/. /g;
|
---|
| 34 |
|
---|
| 35 | print;
|
---|
| 36 | }
|
---|
| 37 |
|
---|
| 38 | `rm $filename.$$`;
|
---|
Note:
See
TracBrowser
for help on using the repository browser.