source: trunk/gsdl/bin/script/gs2html.pl@ 1417

Last change on this file since 1417 was 1417, checked in by davidb, 24 years ago

Additions so ConvertPlug etc. can handle filenames with spaces in them.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.2 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# gs2html.pl -- convert various documents to HTML
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# Convert Microsoft Word, and Adobe PDF to HTML using the
29# appropriate specialist convert util
30
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
35
36}
37
38use parsargv;
39use util;
40use Cwd;
41use File::Basename;
42
43
44sub print_usage
45{
46
47 print STDERR "Usage: $0 [-t doc|pdf] filename\n";
48 exit(1);
49}
50
51
52sub main
53{
54 my (@ARGV) = @_;
55
56 my ($input_type,$verbose);
57
58 if (!parsargv::parse(\@ARGV,
59 'type/(doc|pdf)/', \$input_type,
60 'verbose/\d+/0', \$verbose))
61 {
62 print_usage();
63 }
64
65 if (scalar(@ARGV!=1))
66 {
67 print_usage();
68 }
69
70 my $input_filename = $ARGV[0];
71 if (!-r $input_filename)
72 {
73 print STDERR "Error: unable to open $input_filename for reading\n";
74 exit(1);
75 }
76
77 my ($tailname,$dirname,$suffix)
78 = File::Basename::fileparse($input_filename,'\..+');
79 my $output_filename = &util::filename_cat($dirname,"$tailname.html");
80
81 if ($input_type eq "")
82 {
83 $input_type = substr($suffix,1,length($suffix)-1);
84 }
85
86 # Change to temporary working directory
87 my $stored_dir = cwd();
88 chdir ($dirname) || die "Unable to change to directory $dirname";
89
90
91 # Select convert utility
92 my $cmd = "";
93 if (!defined $input_type)
94 {
95 print STDERR "Error: No filename extension or input type defined\n";
96 exit(1);
97 }
98 elsif ($input_type eq "doc")
99 {
100 my $wv_cfgfile = &util::filename_cat($ENV{'GSDLHOME'},"etc","wvhtml.xml");
101 $cmd = "( echo Processing ; wvHtml --config $wv_cfgfile \"$input_filename\" 2>&1 > \"$output_filename\" )";
102 $cmd .= " | fgrep -v wvWarning" if ($verbose<3);
103 $cmd .= " | fgrep -v wvError" if ($verbose<5);
104 }
105 elsif ($input_type eq "pdf")
106 {
107 $cmd = "pdftohtml -F -d $dirname -o \"$tailname.html\" \"$input_filename\"";
108 $cmd .= " >/dev/null" if ($verbose<3);
109 }
110 else
111 {
112 print STDERR "Error: Unable to convert to type '$input_type'\n";
113 exit(1);
114 }
115
116 if (system($cmd)>0)
117 {
118 print STDERR "Error: failed to execute $cmd: $!\n";
119 exit(1);
120 }
121
122 # restore to original working directory
123 chdir ($stored_dir) || die "Unable to return to directory $stored_dir";
124
125}
126
127
128
129&main(@ARGV)
130
Note: See TracBrowser for help on using the repository browser.