source: tags/gsdl-2_30d-distribution/gsdl/perllib/Kea-1.1.4/convert-html-to-text.pl@ 2308

Last change on this file since 2308 was 1972, checked in by jmt14, 23 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 953 bytes
Line 
1#!/usr/bin/perl -w
2
3# convert an html file to a text file
4
5# Version
6# 1 1999 Aug 24 First version. Compensates for files Lynx cannot parse.
7# 1.1 1999 Aug 24 Instead of special cases, put a time limit on lynx.
8
9die unless (-e "$ARGV[0]");
10$filename = $ARGV[0];
11
12# Lynx can't handle framesets. Sorry.
13#$frameset = `grep "<FRAMESET" $filename`;
14#exit if ($frameset =~ /./);
15
16# Lynx can't handle files with no body. Sorry.
17#$size = `wc $filename`;
18#($lines, $words, $chars) = $size =~ /^\s+(\d+)\s+(\d+)\s+(\d+)/;
19#exit unless ($lines && $words && $chars);
20#exit if ($lines < 10);
21#exit if ($words < 10);
22#exit if ($chars < 10);
23
24# convert the html file to text with lynx
25`ulimit -t 300; lynx -force_html -nolist -dump $filename > $filename.$$`;
26
27open(IN, "$filename.$$");
28while (<IN>) {
29
30 # remove the [IMAGE], [LINK], and [INLINE] markers
31 s/\[INLINE\]/. /g;
32 s/\[IMAGE\]/. /g;
33 s/\[LINK\]/. /g;
34
35 print;
36}
37
38`rm $filename.$$`;
Note: See TracBrowser for help on using the repository browser.