1 | #! /usr/bin/perl -w
|
---|
2 |
|
---|
3 | # convert_toc.pl converts old <<TOC>> marked up files to use the new
|
---|
4 | # <Section> syntax (suitable for processing by HTMLPlug -description_tags
|
---|
5 |
|
---|
6 | use File::Basename;
|
---|
7 |
|
---|
8 | my $level = 0;
|
---|
9 |
|
---|
10 | sub main {
|
---|
11 | open STDIN, "<$ARGV[0]" or die "$ARGV[0]: $!\n";
|
---|
12 | open STDOUT, ">$ARGV[1]" or die "$ARGV[1]: $!\n";
|
---|
13 |
|
---|
14 | my $content = "";
|
---|
15 |
|
---|
16 | while (<STDIN>) {
|
---|
17 | $_ =~ s/[\cJ\cM]+$//;
|
---|
18 | $content .= $_;
|
---|
19 | }
|
---|
20 |
|
---|
21 | # fix images
|
---|
22 | $content =~ s/<<I>>\s*(\w+\.(?:png|jpe?g|gif))\s*(.*?)<\/p>/<center><img src=\"$1\"><br>$2<\/center><\/p>\n/igs;
|
---|
23 |
|
---|
24 | # may need to rename image files if case isn't consistent
|
---|
25 | # (i.e. collections prepared on windows may have images named
|
---|
26 | # AAA.GIF which are linked into the HTML as aaa.gif)
|
---|
27 | my $image_file = $1;
|
---|
28 | my $dirname = &File::Basename::dirname($ARGV[0]);
|
---|
29 | if (!-e "$dirname/$image_file") {
|
---|
30 | # try all lower case
|
---|
31 | my $image_file_new = lc($image_file);
|
---|
32 | if (-e "$dirname/$image_file_new") {
|
---|
33 | print STDERR "renaming $dirname/$image_file --> $dirname/$image_file_new\n";
|
---|
34 | rename ("$dirname/$image_file", "$dirname/$image_file_new");
|
---|
35 | } else {
|
---|
36 | print STDERR "ERROR**** $dirname/$image_file does not exist\n";
|
---|
37 | }
|
---|
38 | }
|
---|
39 |
|
---|
40 | # process section title
|
---|
41 | while ($content =~ s/(?:<P[^>]*>|<BR>)(?:\s*<\/?[^>]*>)*?<<TOC(\d+)>>(.*?)(?:\s*<\/?[^>]*>)*?\s*(<\/P[^>]*>|<BR>)/process_toc ($1, $2)/sige) {}
|
---|
42 |
|
---|
43 | # close the remaining sections
|
---|
44 | my $section_ending = "<!--\n";
|
---|
45 | for (my $j = 0; $j < $level; $j++) {
|
---|
46 | $section_ending .= "</Section>\n";
|
---|
47 | }
|
---|
48 | $section_ending .= "-->\n";
|
---|
49 | $content =~ s/(<\/body>)/$section_ending.$1/ie;
|
---|
50 |
|
---|
51 | print STDOUT $content;
|
---|
52 | close STDOUT; close STDIN;
|
---|
53 | }
|
---|
54 |
|
---|
55 | sub process_toc {
|
---|
56 | my ($thislevel, $title) = @_;
|
---|
57 | my $toc = '';
|
---|
58 |
|
---|
59 | $title =~ s/<[^>]+>//g;
|
---|
60 | $title =~ s/^\s+//;
|
---|
61 | $title =~ s/\s+$//;
|
---|
62 | $title =~ s/\n/ /g;
|
---|
63 |
|
---|
64 | $toc .= "\n<!--\n";
|
---|
65 |
|
---|
66 | if ($thislevel <= $level) {
|
---|
67 | for (my $i = 0; $i < ($level-$thislevel+1); $i++) {
|
---|
68 | $toc .= "</Section>\n";
|
---|
69 | }
|
---|
70 | }
|
---|
71 | $toc .= "<Section>\n".
|
---|
72 | " <Description>\n".
|
---|
73 | " <Metadata name=\"Title\">$title</Metadata>\n".
|
---|
74 | " </Description>\n".
|
---|
75 | "-->\n";
|
---|
76 |
|
---|
77 | $level = $thislevel;
|
---|
78 |
|
---|
79 | return $toc;
|
---|
80 | }
|
---|
81 |
|
---|
82 | &main;
|
---|