1 | #!/usr/bin/perl -w
|
---|
2 |
|
---|
3 |
|
---|
4 | BEGIN {
|
---|
5 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
6 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
7 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
|
---|
8 | }
|
---|
9 |
|
---|
10 | use strict;
|
---|
11 | no strict 'subs'; # allow barewords (eg STDERR) as function arguments
|
---|
12 | no strict 'refs'; # allow filehandles to be variables and vice versa
|
---|
13 |
|
---|
14 | use encodings;
|
---|
15 | use printusage;
|
---|
16 | use parse2;
|
---|
17 | use FileHandle;
|
---|
18 |
|
---|
19 | my $arguments =
|
---|
20 | [
|
---|
21 | { 'name' => "language",
|
---|
22 | 'desc' => "{scripts.language}",
|
---|
23 | 'type' => "string",
|
---|
24 | 'reqd' => "no",
|
---|
25 | 'hiddengli' => "yes" },
|
---|
26 | { 'name' => "plugin",
|
---|
27 | 'desc' => "{srcreplace.plugin}",
|
---|
28 | 'type' => "string",
|
---|
29 | 'reqd' => "yes",
|
---|
30 | 'hiddengli' => "yes"},
|
---|
31 | { 'name' => "verbosity",
|
---|
32 | 'desc' => "{import.verbosity}",
|
---|
33 | 'type' => "int",
|
---|
34 | 'range' => "0,",
|
---|
35 | 'deft' => "1",
|
---|
36 | 'reqd' => "no",
|
---|
37 | 'modegli' => "3" },
|
---|
38 | # Do not remove the following option, it's a flag for generating the xml of the options
|
---|
39 | # It WILL be used!
|
---|
40 | { 'name' => "xml", # run with -xml, the output generated should be valid XML. Used from GLI
|
---|
41 | 'desc' => "",
|
---|
42 | 'type' => "flag",
|
---|
43 | 'reqd' => "no",
|
---|
44 | 'hiddengli' => "yes" }
|
---|
45 | ];
|
---|
46 |
|
---|
47 | my $options = { 'name' => "replace_srcdoc_with_html.pl",
|
---|
48 | 'desc' => "{srcreplace.desc}",
|
---|
49 | 'args' => $arguments };
|
---|
50 |
|
---|
51 |
|
---|
52 | sub main
|
---|
53 | {
|
---|
54 | my ($language, $plugin, $verbosity);
|
---|
55 |
|
---|
56 | my $xml = 0;
|
---|
57 |
|
---|
58 | my $hashParsingResult = {};
|
---|
59 |
|
---|
60 |
|
---|
61 | # parse the options
|
---|
62 | my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
|
---|
63 |
|
---|
64 | # If parse returns -1 then something has gone wrong
|
---|
65 | if ($intArgLeftinAfterParsing == -1)
|
---|
66 | {
|
---|
67 | &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
|
---|
68 | die "\n";
|
---|
69 | }
|
---|
70 |
|
---|
71 | foreach my $strVariable (keys %$hashParsingResult)
|
---|
72 | {
|
---|
73 | eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
|
---|
74 | }
|
---|
75 |
|
---|
76 | # If $language has been specified, load the appropriate resource bundle
|
---|
77 | # (Otherwise, the default resource bundle will be loaded automatically)
|
---|
78 | if ($language && $language =~ /\S/) {
|
---|
79 | &gsprintf::load_language_specific_resource_bundle($language);
|
---|
80 | }
|
---|
81 |
|
---|
82 | if ($xml) {
|
---|
83 | &PrintUsage::print_xml_usage($options);
|
---|
84 | print "\n";
|
---|
85 | return;
|
---|
86 | }
|
---|
87 |
|
---|
88 | # There should be one arg left after parsing (the filename)
|
---|
89 | # Or the user may have specified -h, in which case we output the usage
|
---|
90 | if($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
|
---|
91 | {
|
---|
92 | &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
|
---|
93 | die "\n";
|
---|
94 | }
|
---|
95 |
|
---|
96 | # The filename of the document to be replaced is the first value
|
---|
97 | # that remains after the options have been parsed out
|
---|
98 | my $filename = $ARGV[0];
|
---|
99 | if (!defined $filename || $filename !~ /\w/) {
|
---|
100 |
|
---|
101 | &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
|
---|
102 | print STDERR "You need to specify a filename\n";
|
---|
103 | die "\n";
|
---|
104 | }
|
---|
105 | # check that file exists
|
---|
106 | if (!-e $filename) {
|
---|
107 | print STDERR "File $filename doesn't exist...\n";
|
---|
108 | die "\n";
|
---|
109 | }
|
---|
110 | # check required options
|
---|
111 | if (!defined $plugin || $plugin !~ /\w/) {
|
---|
112 | &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
|
---|
113 | print STDERR "You need to specify a plugin";
|
---|
114 | die "\n";
|
---|
115 | }
|
---|
116 |
|
---|
117 | # ConvertToPlug.pm's subclasses should be available here through GLI,
|
---|
118 | # but in cmdline version, these should be supplied
|
---|
119 | my $plugobj;
|
---|
120 | require "$plugin.pm";
|
---|
121 | eval ("\$plugobj = new $plugin()");
|
---|
122 | die "$@" if $@;
|
---|
123 |
|
---|
124 | # ...and initialize it
|
---|
125 | $plugobj->init(1, "STDERR", "STDERR");
|
---|
126 |
|
---|
127 | # find the import directory, where we want to create it in. This is where the file
|
---|
128 | # passed as parameter by GLI is located.
|
---|
129 |
|
---|
130 | # derive tmp filename from input filename
|
---|
131 | my ($tailname, $import_dir, $suffix)
|
---|
132 | = &File::Basename::fileparse($filename, "\\.[^\\.]+\$");
|
---|
133 |
|
---|
134 | # Use the plugin's tmp_area_convert_file function to avoid duplicating code.
|
---|
135 | # This method returns the name of the output file. In the case of Word docs,
|
---|
136 | # if converted with windows_scripting a "filename_files" folder might have been
|
---|
137 | # created for associated files. Same situation when using wvware with gsConvert.pl.
|
---|
138 | # (When old gsConvert.pl was used, wvware created no separate directory, instead files
|
---|
139 | # associated with the html generated would be at the same level in the tmp folder
|
---|
140 | # where the output file was created.) Now it's the same no matter whether wvware
|
---|
141 | # or windows_scripting did the conversion of the Word doc to html.
|
---|
142 | my $output_filename = $plugobj->tmp_area_convert_file("html", $filename);
|
---|
143 |
|
---|
144 |
|
---|
145 | # if something went wrong, then tmp_area_convert_file returns "", but can also check
|
---|
146 | # for whether the output file exists or not
|
---|
147 | if(!-e $output_filename || $output_filename eq "") {
|
---|
148 | # if no output html file was created, then die so that GLI displays error message
|
---|
149 | print STDERR "***replace_srcdoc_with_html.pl: no output file created for $filename ***\n";
|
---|
150 | die "No html file created for $filename. Replacement did not take place\n"; # Program NEEDS to die here,
|
---|
151 | # else the error that occurred is not transmitted to GLI and it thinks instead that execution was fine
|
---|
152 | #return 0; # error code 0 for false <- NO, needs to die, not return!
|
---|
153 | }
|
---|
154 | #else:
|
---|
155 |
|
---|
156 | # now, find out what to move:
|
---|
157 | # it may be a single file, or, if it is a word doc, it may also have an image folder
|
---|
158 | # which has the name "filename-without-extension_files"
|
---|
159 | my ($tmp_name, $tmp_dir, $ext) = &File::Basename::fileparse($output_filename, "\\.[^\\.]+\$");
|
---|
160 |
|
---|
161 | # the name of the folder of associated files (which may or may not exist) in the tmp dir
|
---|
162 | my $assoc_folder = &util::filename_cat($tmp_dir, $tmp_name."_files");
|
---|
163 |
|
---|
164 | # Need to check for naming collisions: in case there is already a file or folder
|
---|
165 | # in the import directory by the name of those we want to move there from the tmp folder
|
---|
166 | # First need to work out the full paths to any assoc folder if it were copied into the
|
---|
167 | # import directory, and the main html file if it were copied into the import folder:
|
---|
168 | my $new_assoc_folder = &util::filename_cat($import_dir, $tmp_name."_files");
|
---|
169 | my $new_file = &util::filename_cat($import_dir, $tmp_name.$ext);
|
---|
170 |
|
---|
171 | # If there is an image folder, any naming collisions now would mean that the links of
|
---|
172 | # the html file to the image folder would break if we changed the assoc_folder's name.
|
---|
173 | # Therefore, in such a case this process dies after deleting both the file and assoc_folder.
|
---|
174 | if(-e $assoc_folder && -e $new_assoc_folder) {
|
---|
175 | # so an associated folder was generated, AND a folder by that name already exists
|
---|
176 | # in the import folder where we want to copy the generated folder to.
|
---|
177 | &util::rm($output_filename);
|
---|
178 | &util::rm_r($assoc_folder); # we know directory exists, so remove dir
|
---|
179 | die "Image folder $new_assoc_folder already exists.\nDeleting generated file and folder, else links to images will break.\n";
|
---|
180 | }
|
---|
181 | # Finally, check that no file already exists with the same name as the generated stand-alone
|
---|
182 | # file. Have to do this *after* checking for name collisions with any assoc_folder, because
|
---|
183 | # that also tries to remove any output files.
|
---|
184 | if(-e $new_file) { # a file by that name already exists, delete the generated file
|
---|
185 | &util::rm($output_filename);
|
---|
186 | die "File $new_file already exists. Deleting generated file.\n";
|
---|
187 | }
|
---|
188 |
|
---|
189 | # Now we know we have no file name collisions. We 'move' the html file by copying its
|
---|
190 | # contents over and ensuring that these contents are utf8. If we don't do this, PDFs
|
---|
191 | # replaced by html may fail, whereas those converted with PDFPlug will have succeeded.
|
---|
192 | open(FIN,"<$output_filename") or die "replace_srcdoc_with_html.pl: Unable to open $output_filename to ensure utf8...ERROR: $!\n";
|
---|
193 | my $html_contents;
|
---|
194 | {
|
---|
195 | local $/ = undef; # Read entire file at once
|
---|
196 | $html_contents = <FIN>; # Now file is read in as one single 'line'
|
---|
197 | &unicode::ensure_utf8(\$html_contents); # turn any high bytes that aren't valid utf-8 into utf-8.
|
---|
198 | }
|
---|
199 | close(FIN);
|
---|
200 |
|
---|
201 | # write the utf8 contents to the new file and delete the original.
|
---|
202 | open(FOUT, ">$new_file") or die "replace_srcdoc_with_html.pl: Unable to open $new_file for writing out utf8 html...ERROR: $!\n";
|
---|
203 | print FOUT $html_contents;
|
---|
204 | close(FOUT);
|
---|
205 | &util::rm($output_filename);
|
---|
206 |
|
---|
207 | # move any associated folders containing associated files too
|
---|
208 | if(-e $assoc_folder) {
|
---|
209 | #print STDERR "****Folder for associated files is $assoc_folder\n";
|
---|
210 | #&util::mv($assoc_folder, $import_dir); # doesn't work for me
|
---|
211 | &util::cp_r($assoc_folder, $import_dir);
|
---|
212 | &util::rm_r($assoc_folder);
|
---|
213 | }
|
---|
214 |
|
---|
215 | # Now we can remove the source doc permanently (there are no assocdirs for source doc)
|
---|
216 | &util::rm($filename);
|
---|
217 |
|
---|
218 | # need this output statement here, as GShell.java's runRemote() sets status to CANCELLED
|
---|
219 | # if there is no output! (Therefore, it only had this adverse affect when running GSDL remotely)
|
---|
220 | # Do something useful with it: return the new filename without extension, used by remote GS server
|
---|
221 | print STDOUT "$tmp_name\n";
|
---|
222 | }
|
---|
223 | &main(@ARGV);
|
---|