source: main/trunk/greenstone2/bin/script/xlstohtml.pl@ 24375

Last change on this file since 24375 was 3022, checked in by jrm21, 22 years ago

New scripts won't append $GSDLHOME to the exec name as badness happens
on windows if there are spaces in it. This will happen on unix too, but
unix people don't normally put spaces in directory names...

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 1.6 KB
Line 
1eval 'exec perl -x -- $0 $@'
2 if 0;
3#! perl
4# line 6
5
6sub usage() {
7 print "$0 <input.xls> <output.html>\n";
8}
9
10
11if (@ARGV != 2) {
12 usage();
13 exit(1);
14}
15my $input_xls=shift;
16my $output_html=shift;
17
18# try to find xlhtml binary in GSDLHOME
19my $xlhtml_binary="xlhtml";
20my $GSDLOS=$ENV{'GSDLOS'};
21my $GSDLHOME=$ENV{'GSDLHOME'};
22if ($GSDLOS =~ /^windows/i) {
23 $xlhtml_binary.=".exe";
24}
25
26# Trouble happens if there are spaces in GSDLHOME. Assume that on windows
27# the program is found in the path.
28if ($GSDLOS !~ /windows/i && -x "$GSDLHOME/bin/$GSDLOS/$xlhtml_binary") {
29 $xlhtml_binary="$GSDLHOME/bin/$GSDLOS/$xlhtml_binary";
30}
31
32if (! -r $input_xls) {
33 print STDERR "Unable to read file `$input_xls'\n";
34 exit (1);
35}
36
37my $return_value=
38 system("$xlhtml_binary \"$input_xls\" > \"$output_html\"");
39
40if ($return_value != 0) {
41 exit (1);
42}
43
44# Ok, we made an html file. Check to see if it has any content, and remove
45# the little nag link at the the bottom.
46my $html="";
47open (HTML, "$output_html") || die "Can't read file:$!";
48$html=join('', <HTML>);
49close HTML;
50
51$html =~ s@<hr><FONT SIZE=-1>Created with.*\n</BODY></HTML>$@</BODY></HTML>@s;
52
53# xlHtml uses the filename as the title.
54# HTMLPlug will use the first 100 chars instead if there's no title.
55# Don't know if that's a good idea with a spreadsheet, though...
56$html =~ s@<title>.*?\.xls</title>@<title></title>@i;
57
58my $tmp=$html;
59$tmp =~ s/^.*?<BODY>//ms;
60$tmp =~ s/(&nbsp;)|\s//gims;
61if ($tmp !~ m/(>[^<]+<)/) {
62 print STDERR "No text found in extracted html file!\n";
63 exit(1);
64}
65open (NEWHTML, ">$output_html") || die "Can't create file:$!";
66print NEWHTML $html;
67close NEWHTML;
68exit (0);
69
Note: See TracBrowser for help on using the repository browser.