source: trunk/gsdl/perllib/Kea-1.1.4/cstr-to-text.pl@ 3161

Last change on this file since 3161 was 1972, checked in by jmt14, 23 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.0 KB
Line 
1#! /usr/bin/perl -w
2
3# cstr-to-text.pl: change a cstr html file to a text file.
4# Gordon Paynter ([email protected])
5
6# On the lucy/rose/borg change first line to: #! /usr/local/bin/perl -w
7
8# Version 1 1998 Oct 23 First distibuted, give or take a version.
9# Version 1.01 1998 Nov 17 New page number form: "<p><!--Page No-->..."
10# Version 1.02 1998 Nov 17 Bug: <p> at end of line.
11
12if (!$ARGV[0] || !$ARGV[1]) {
13 die "Usage: cstr-to-text.pl <input-cstr-file> <output-text-file>\n";
14}
15$infile = $ARGV[0];
16$tmpfile = "/tmp/c2t.$$";
17$outfile = $ARGV[1];
18
19
20# print STDERR "Preparing text file $infile in $outfile\n";
21
22open(IN, "<$infile");
23open(OUT, ">$tmpfile");
24
25$ignore_next_paragraph_marker = 0;
26
27while (<IN>) {
28
29 $line = $_;
30
31 if ($line =~ /^<\!\-\-Page No\-\->/) {
32 # print "Page number generated by prescript before 2.1\n";
33 $ignore_next_paragraph_marker = 1;
34 next;
35 }
36
37 if ($line =~ /^<p><\!\-\-Page No\-\->/) {
38 # print "Page number generated by prescript 2.1\n";
39 if ($line =~ /<p>\s*$/) {
40 $ignore_next_paragraph_marker = 0;
41 } else {
42 $ignore_next_paragraph_marker = 1;
43 }
44 next;
45 }
46
47 if ($line =~ /^<\!\-\-End Of Page\-\->/) {
48 # print "End of Page generated by prescript (up to and including 2.1)\n";
49 if ($line =~ /<p>\s*$/) {
50 $ignore_next_paragraph_marker = 0;
51 } else {
52 $ignore_next_paragraph_marker = 1;
53 }
54 next;
55 }
56
57 if ($line =~ /^<p>\d+$/) {
58 # print "Page number as paragraph mark and single number\n";
59 $ignore_next_paragraph_marker = 1;
60 next;
61 }
62
63 if ($line =~ /^<p>\- \d+ \-$/) {
64 # print "Page number in yet another form\n";
65 $ignore_next_paragraph_marker = 1;
66 next;
67 }
68
69 if ($ignore_next_paragraph_marker && ($line =~ /^<p>/)) {
70 $line =~ s/^<p>//;
71 $ignore_next_paragraph_marker = 0;
72 }
73
74 if ($line =~ /[A-Za-z0-9]\-$/) {
75 #print "Hyphenation\n";
76 $ignore_next_paragraph_marker = 1;
77 }
78
79
80 print OUT $line;
81
82}
83
84close(OUT);
85
86`lynx -force_html -dump $tmpfile > $outfile`;
87
88`rm $tmpfile`;
89
Note: See TracBrowser for help on using the repository browser.