source: gsdl/trunk/bin/script/replace_srcdoc_with_html.pl@ 18394

Last change on this file since 18394 was 18394, checked in by ak19, 12 years ago

Instead of returning the string Success, the main method now returns the new tailname of the generated html file. This is required when using the remote greenstone server, since the uploaded files once replaced need not have the same names anymore (can be url encoded or base 64 encoded, and spaces replaced with underscores), which means the client will need to know the new names of the files to download.

  • Property svn:executable set to *
File size: 8.8 KB
Line 
1#!/usr/bin/perl -w
2
3
4BEGIN {
5 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
6 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
8}
9
10use strict;
11no strict 'subs'; # allow barewords (eg STDERR) as function arguments
12no strict 'refs'; # allow filehandles to be variables and vice versa
13
14use encodings;
15use printusage;
16use parse2;
17use FileHandle;
18
19my $arguments =
20 [
21 { 'name' => "language",
22 'desc' => "{scripts.language}",
23 'type' => "string",
24 'reqd' => "no",
25 'hiddengli' => "yes" },
26 { 'name' => "plugin",
27 'desc' => "{srcreplace.plugin}",
28 'type' => "string",
29 'reqd' => "yes",
30 'hiddengli' => "yes"},
31 { 'name' => "verbosity",
32 'desc' => "{import.verbosity}",
33 'type' => "int",
34 'range' => "0,",
35 'deft' => "1",
36 'reqd' => "no",
37 'modegli' => "4" },
38 # Do not remove the following option, it's a flag for generating the xml of the options
39 # It WILL be used!
40 { 'name' => "xml", # run with -xml, the output generated should be valid XML. Used from GLI
41 'desc' => "",
42 'type' => "flag",
43 'reqd' => "no",
44 'hiddengli' => "yes" }
45 ];
46
47my $options = { 'name' => "replace_srcdoc_with_html.pl",
48 'desc' => "{srcreplace.desc}",
49 'args' => $arguments };
50
51
52sub main
53{
54 my ($language, $plugin, $verbosity);
55
56 my $xml = 0;
57
58 my $hashParsingResult = {};
59
60
61 # parse the options
62 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
63
64 # If parse returns -1 then something has gone wrong
65 if ($intArgLeftinAfterParsing == -1)
66 {
67 &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
68 die "\n";
69 }
70
71 foreach my $strVariable (keys %$hashParsingResult)
72 {
73 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
74 }
75
76 # If $language has been specified, load the appropriate resource bundle
77 # (Otherwise, the default resource bundle will be loaded automatically)
78 if ($language && $language =~ /\S/) {
79 &gsprintf::load_language_specific_resource_bundle($language);
80 }
81
82 if ($xml) {
83 &PrintUsage::print_xml_usage($options);
84 print "\n";
85 return;
86 }
87
88 # There should be one arg left after parsing (the filename)
89 # Or the user may have specified -h, in which case we output the usage
90 if($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
91 {
92 &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
93 die "\n";
94 }
95
96 # The filename of the document to be replaced is the first value
97 # that remains after the options have been parsed out
98 my $filename = $ARGV[0];
99 if (!defined $filename || $filename !~ /\w/) {
100
101 &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
102 print STDERR "You need to specify a filename\n";
103 die "\n";
104 }
105 # check that file exists
106 if (!-e $filename) {
107 print STDERR "File $filename doesn't exist...\n";
108 die "\n";
109 }
110 # check required options
111 if (!defined $plugin || $plugin !~ /\w/) {
112 &PrintUsage::print_txt_usage($options, "{srcreplace.params}");
113 print STDERR "You need to specify a plugin";
114 die "\n";
115 }
116
117 # ConvertToPlug.pm's subclasses should be available here through GLI,
118 # but in cmdline version, these should be supplied
119 my $plugobj;
120 require "$plugin.pm";
121 eval ("\$plugobj = new $plugin()");
122 die "$@" if $@;
123
124 # ...and initialize it
125 $plugobj->init(1, "STDERR", "STDERR");
126
127 # find the import directory, where we want to create it in. This is where the file
128 # passed as parameter by GLI is located.
129
130 # derive tmp filename from input filename
131 my ($tailname, $import_dir, $suffix)
132 = &File::Basename::fileparse($filename, "\\.[^\\.]+\$");
133
134 # Use the plugin's tmp_area_convert_file function to avoid duplicating code.
135 # This method returns the name of the output file. In the case of Word docs,
136 # if converted with windows_scripting a "filename_files" folder might have been
137 # created for associated files. Same situation when using wvware with gsConvert.pl.
138 # (When old gsConvert.pl was used, wvware created no separate directory, instead files
139 # associated with the html generated would be at the same level in the tmp folder
140 # where the output file was created.) Now it's the same no matter whether wvware
141 # or windows_scripting did the conversion of the Word doc to html.
142 my $output_filename = $plugobj->tmp_area_convert_file("html", $filename);
143
144
145 # if something went wrong, then tmp_area_convert_file returns "", but can also check
146 # for whether the output file exists or not
147 if(!-e $output_filename || $output_filename eq "") {
148 # if no output html file was created, then die so that GLI displays error message
149 print STDERR "***replace_srcdoc_with_html.pl: no output file created for $filename ***\n";
150 die "No html file created for $filename. Replacement did not take place\n"; # Program NEEDS to die here,
151 # else the error that occurred is not transmitted to GLI and it thinks instead that execution was fine
152 #return 0; # error code 0 for false <- NO, needs to die, not return!
153 }
154 #else:
155
156 # now, find out what to move:
157 # it may be a single file, or, if it is a word doc, it may also have an image folder
158 # which has the name "filename-without-extension_files"
159 my ($tmp_name, $tmp_dir, $ext) = &File::Basename::fileparse($output_filename, "\\.[^\\.]+\$");
160
161 # the name of the folder of associated files (which may or may not exist) in the tmp dir
162 my $assoc_folder = &util::filename_cat($tmp_dir, $tmp_name."_files");
163
164 # Need to check for naming collisions: in case there is already a file or folder
165 # in the import directory by the name of those we want to move there from the tmp folder
166 # First need to work out the full paths to any assoc folder if it were copied into the
167 # import directory, and the main html file if it were copied into the import folder:
168 my $new_assoc_folder = &util::filename_cat($import_dir, $tmp_name."_files");
169 my $new_file = &util::filename_cat($import_dir, $tmp_name.$ext);
170
171 # If there is an image folder, any naming collisions now would mean that the links of
172 # the html file to the image folder would break if we changed the assoc_folder's name.
173 # Therefore, in such a case this process dies after deleting both the file and assoc_folder.
174 if(-e $assoc_folder && -e $new_assoc_folder) {
175 # so an associated folder was generated, AND a folder by that name already exists
176 # in the import folder where we want to copy the generated folder to.
177 &util::rm($output_filename);
178 &util::rm_r($assoc_folder); # we know directory exists, so remove dir
179 die "Image folder $new_assoc_folder already exists.\nDeleting generated file and folder, else links to images will break.\n";
180 }
181 # Finally, check that no file already exists with the same name as the generated stand-alone
182 # file. Have to do this *after* checking for name collisions with any assoc_folder, because
183 # that also tries to remove any output files.
184 if(-e $new_file) { # a file by that name already exists, delete the generated file
185 &util::rm($output_filename);
186 die "File $new_file already exists. Deleting generated file.\n";
187 }
188
189 # Now we know we have no file name collisions. We 'move' the html file by copying its
190 # contents over and ensuring that these contents are utf8. If we don't do this, PDFs
191 # replaced by html may fail, whereas those converted with PDFPlug will have succeeded.
192 open(FIN,"<$output_filename") or die "replace_srcdoc_with_html.pl: Unable to open $output_filename to ensure utf8...ERROR: $!\n";
193 my $html_contents;
194 {
195 local $/ = undef; # Read entire file at once
196 $html_contents = <FIN>; # Now file is read in as one single 'line'
197 &unicode::ensure_utf8(\$html_contents); # turn any high bytes that aren't valid utf-8 into utf-8.
198 }
199 close(FIN);
200
201 # write the utf8 contents to the new file and delete the original.
202 open(FOUT, ">$new_file") or die "replace_srcdoc_with_html.pl: Unable to open $new_file for writing out utf8 html...ERROR: $!\n";
203 print FOUT $html_contents;
204 close(FOUT);
205 &util::rm($output_filename);
206
207 # move any associated folders containing associated files too
208 if(-e $assoc_folder) {
209 #print STDERR "****Folder for associated files is $assoc_folder\n";
210 #&util::mv($assoc_folder, $import_dir); # doesn't work for me
211 &util::cp_r($assoc_folder, $import_dir);
212 &util::rm_r($assoc_folder);
213 }
214
215 # Now we can remove the source doc permanently (there are no assocdirs for source doc)
216 &util::rm($filename);
217
218 # need this output statement here, as GShell.java's runRemote() sets status to CANCELLED
219 # if there is no output! (Therefore, it only had this adverse affect when running GSDL remotely)
220 # Do something useful with it: return the new filename without extension, used by remote GS server
221 print STDOUT "$tmp_name\n";
222}
223&main(@ARGV);
Note: See TracBrowser for help on using the repository browser.