source: main/trunk/greenstone2/perllib/gsprintf.pm@ 32303

Last change on this file since 32303 was 31898, checked in by ak19, 7 years ago

With Kathy's commit 31896 doing away with the need for the recently introduced gsprintf_multiline() by moving the regex replacements of backslash-n (and backslash-t) with newline and tab into gsprint::lookup_string, can now shift WebDownload calls to gsprintf_multine to use regular gsprintf. Putting back the original gsprintf method as gsprintf_multiline is no longer needed.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.5 KB
RevLine 
[5613]1###########################################################################
2#
3# gsprintf.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
[7828]25use strict;
26no strict 'refs';
[5613]27
28package gsprintf;
[7828]29require Exporter;
30@gsprintf::ISA=qw(Exporter);
[5613]31
[31419]32use Encode;
33
[6934]34use unicode;
[5613]35use util;
[27306]36use FileUtils;
[5613]37
[15007]38@gsprintf::EXPORT_OK = qw(gsprintf); # functions we can export into namespace
[5613]39
[7828]40
[6945]41# Language-specific resource bundle
42my %specialresourcebundle = ();
[6993]43our $specialoutputencoding; # our, so that it can be changed outside.
[6925]44
[6945]45# Default resource bundle
46my %defaultresourcebundle;
47my $defaultoutputencoding;
[6925]48
[6945]49# English resource bundle
50my %englishresourcebundle;
51my $englishoutputencoding;
52
53# Ignore the OutputEncoding strings in the resource bundles and output all text in UTF-8
54my $outputstringsinUTF8 = 0;
[25499]55my $freetext_xml_mode = 0;
[6945]56
57
[25499]58sub make_freetext_xml_safe
59{
60 my ($text) = @_;
61
62 $text =~ s/\&/&/g;
63 $text =~ s/\"/"/g;
64 $text =~ s/\</&lt;/g;
65 $text =~ s/\>/&gt;/g;
66
67 return $text;
68}
69
70
[5613]71sub gsprintf
72{
73 my ($handle, $text_string, @text_arguments) = @_;
74
75 # Return unless the required arguments were supplied
76 return unless (defined($handle) && defined($text_string));
[18981]77
[5613]78 # Look up all the strings in the dictionary
[6925]79 $text_string =~ s/(\{[^\}]+\})/&lookup_string($1)/eg;
[5613]80
81 # Resolve the string arguments using sprintf, then write out to the handle
[25499]82 my $text_string_resolved = sprintf($text_string, @text_arguments);
[31860]83
[25499]84 if ($freetext_xml_mode) {
85 $text_string_resolved = make_freetext_xml_safe($text_string_resolved);
86 }
[31860]87
[25499]88 print $handle $text_string_resolved;
[5613]89}
90
91
[31860]92
[6920]93sub lookup_string
[5613]94{
[31419]95 my ($stringkey, $native_perl) = @_;
96
97 if (!defined $native_perl || $native_perl != 1) {
98 $native_perl = 0;
99 }
[10228]100 return "" unless defined $stringkey;
[6945]101 # Try the language-specific resource bundle first
102 my $utf8string = $specialresourcebundle{$stringkey};
103 my $outputencoding = $specialoutputencoding;
104
105 # Try the default resource bundle next
106 if (!defined($utf8string)) {
[18981]107 # Load the default resource bundle if it is not already loaded
108 &load_default_resource_bundle() if (!%defaultresourcebundle);
109
110 $utf8string = $defaultresourcebundle{$stringkey};
111 $outputencoding = $defaultoutputencoding;
[6925]112 }
[18981]113
[6945]114 # Try the English resource bundle last
115 if (!defined($utf8string)) {
[18981]116 # Load the English resource bundle if it is not already loaded
117 &load_english_resource_bundle() if (!%englishresourcebundle);
118
119 $utf8string = $englishresourcebundle{$stringkey};
120 $outputencoding = $englishoutputencoding;
[6945]121 }
[18981]122
[6945]123 # No matching string was found, so just return the key
124 if (!defined($utf8string)) {
[18981]125 return $stringkey;
[6945]126 }
[31896]127 # we allow \n and \t as newlines and tabs.
128 $utf8string =~ s@([^\\])\\n@$1\n@g;
129 $utf8string =~ s@([^\\])\\t@$1\t@g;
130 $utf8string =~ s@\\\\@\\@g;
[18981]131
[31419]132 if ($native_perl ==1) {
133 # decode the utf8 string to perl internal format
134 return decode("utf8", $utf8string);
135 }
[18981]136
[31419]137 # Return the utf8 string if our output encoding is utf8
138 if (!defined($outputencoding) || $outputstringsinUTF8
139 || $outputencoding eq "utf8") {
140 return $utf8string;
141 }
142
[6934]143 # If an 8-bit output encoding has been defined, encode the string appropriately
[9969]144 my $encoded=unicode::unicode2singlebyte(&unicode::utf82unicode($utf8string), $outputencoding);
145
146 # If we successfully encoded it, return it
147 if ($encoded) { return $encoded }
[18981]148
[9969]149 # Otherwise, we can't convert to the requested encoding. return the utf8?
150 $specialoutputencoding='utf8';
151 return $utf8string;
[5613]152}
153
154
[6945]155sub load_language_specific_resource_bundle
[5613]156{
[6934]157 my $language = shift(@_);
[18981]158
[6945]159 # Read the specified resource bundle
[11632]160 my $resourcebundlename = "strings_" . $language . ".properties";
[18981]161
162 %specialresourcebundle
163 = &read_resource_bundle_and_extensions($ENV{'GSDLHOME'},"perllib",$resourcebundlename);
[6945]164 return if (!%specialresourcebundle);
[18981]165
[6945]166 # Read the output encoding to use from the resource bundle
167 if ($ENV{'GSDLOS'} =~ /windows/) {
[18981]168 $specialoutputencoding = $specialresourcebundle{"{OutputEncoding.windows}"};
[5613]169 }
[6945]170 else {
[18981]171 # see if there is an encoding set in the appropriate locale env var
172
173 foreach my $envvar ('LC_ALL', 'LANG') {
174 if (!exists $ENV{$envvar}) { next }
175 my $locale=$ENV{$envvar};
176 if ($locale !~ /^\w+\.(.+)$/) { next }
177 my $enc=lc($1);
178 $enc =~ s/-/_/g;
179 if ($enc eq 'utf_8') { $enc='utf8' } # normalise to this name
180 $specialoutputencoding = $enc;
181 return;
182 }
183 $specialoutputencoding = $specialresourcebundle{"{OutputEncoding.unix}"};
[6945]184 }
185}
[5613]186
[6945]187
188sub load_default_resource_bundle
189{
190 # Read the default resource bundle
[11632]191 my $resourcebundlename = "strings.properties";
[5613]192
[18981]193 %defaultresourcebundle
194 = &read_resource_bundle_and_extensions($ENV{'GSDLHOME'},"perllib",$resourcebundlename);
[10982]195 if (!%defaultresourcebundle) {
196 # $! will still have the error value for the last failed syscall
[25499]197
198 my $error_message = "$! $resourcebundlename\n";
199
200 if ($freetext_xml_mode) {
201 $error_message = make_freetext_xml_safe($error_message);
202 }
203
204 print STDERR $error_message;
205
[18981]206 # set something so we don't bother trying to load it again
207 $defaultresourcebundle{0}=undef;
[10982]208 return;
209 }
[18981]210
[6945]211 # Read the output encoding to use from the resource bundle
212 if ($ENV{'GSDLOS'} =~ /windows/) {
[18981]213 $defaultoutputencoding = $defaultresourcebundle{"{OutputEncoding.windows}"};
[5613]214 }
[6945]215 else {
[18981]216 $defaultoutputencoding = $defaultresourcebundle{"{OutputEncoding.unix}"};
[6945]217 }
218}
[5613]219
[6945]220
221sub load_english_resource_bundle
222{
223 # Ensure the English resource bundle hasn't already been loaded
[9362]224 if (%specialresourcebundle && $specialresourcebundle{"{Language.code}"} eq "en") {
[18981]225 %englishresourcebundle = %specialresourcebundle;
226 $englishoutputencoding = $specialoutputencoding;
[6945]227 }
[10982]228
229 if ($defaultresourcebundle{"{Language.code}"} &&
[17533]230 $defaultresourcebundle{"{Language.code}"} eq "en") {
[18981]231 %englishresourcebundle = %defaultresourcebundle;
232 $englishoutputencoding = $defaultoutputencoding;
[6945]233 }
[18981]234
[6945]235 # Read the English resource bundle
[11632]236 my $resourcebundlename = "strings_en.properties";
[6945]237
[18981]238 %englishresourcebundle
239 = &read_resource_bundle_and_extensions($ENV{'GSDLHOME'},"perllib",$resourcebundlename);
[6945]240 return if (!%englishresourcebundle);
[18981]241
[6945]242 # Read the output encoding to use from the resource bundle
243 if ($ENV{'GSDLOS'} =~ /windows/) {
[18981]244 $englishoutputencoding = $englishresourcebundle{"{OutputEncoding.windows}"};
[6945]245 }
246 else {
[18981]247 $englishoutputencoding = $englishresourcebundle{"{OutputEncoding.unix}"};
[6945]248 }
249}
250
251
[18981]252sub read_resource_bundle_and_extensions
253{
254 my ($bundle_base,$primary_dir,$resourcename) = @_;
255
256 my $primary_resourcebundlefile
[27306]257 = &FileUtils::filenameConcatenate($bundle_base,$primary_dir,$resourcename);
[18981]258
259 my $resourcebundle = read_resource_bundle($primary_resourcebundlefile);
260 return if (!defined $resourcebundle);
261
[19026]262 if (defined $ENV{'GSDLEXTS'}) {
263 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
264 foreach my $e (@extensions) {
265 my $ext_base
[27306]266 = &FileUtils::filenameConcatenate($bundle_base,"ext",$e);
[19026]267
268 my $ext_resourcebundlefile
[27306]269 = &FileUtils::filenameConcatenate($ext_base,$primary_dir,$resourcename);
[19026]270
271 # can ignore return value (will be same reference to $resourcebundle)
272 read_resource_bundle($ext_resourcebundlefile,$resourcebundle);
273 }
[18981]274 }
[21289]275 if (defined $ENV{'GSDL3EXTS'}) {
276 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
277 foreach my $e (@extensions) {
278 my $ext_base
[27306]279 = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"ext",$e);
[21289]280
281 my $ext_resourcebundlefile
[27306]282 = &FileUtils::filenameConcatenate($ext_base,$primary_dir,$resourcename);
[21289]283
284 # can ignore return value (will be same reference to $resourcebundle)
285 read_resource_bundle($ext_resourcebundlefile,$resourcebundle);
286 }
287 }
[18981]288
289 return %$resourcebundle;
290}
291
292
[6945]293sub read_resource_bundle
294{
[18981]295 my ($resourcebundlefilepath,$resourcebundle) = @_;
[6945]296
[18981]297 if (!open(RESOURCE_BUNDLE, "<$resourcebundlefilepath")) {
298 # When called for the first time (primary resource), $resourcebundle
299 # is not defined (=undef). If the file does not exist, then we return
300 # this 'undef' to signal it was not found
301 # For an extension resource bundle, if it does not exist this
302 # is not so serious (in fact quite likely) => return what we
303 # have built up so far
304
305 return $resourcebundle;
306 }
[6945]307
[18981]308 if (!defined $resourcebundle) {
309 # resource files exists, so exect some content to be stored
310 $resourcebundle = {};
311 }
312
[6945]313 # Load this resource bundle
[6934]314 my @resourcebundlelines = <RESOURCE_BUNDLE>;
[5613]315 close(RESOURCE_BUNDLE);
316
[6945]317 # Parse the resource bundle
[18981]318
[7828]319 foreach my $line (@resourcebundlelines) {
[17533]320 # Remove any trailing whitespace
321 $line =~ s/(\s*)$//;
[5613]322
[17533]323 # Ignore comments and empty lines
324 if ($line !~ /^\#/ && $line ne "") {
325 # Parse key (everything up to the first colon)
326 if ($line =~ m/^([^:]+):(.+)$/) {
327 my $linekey = "{" . $1 . "}";
328 my $linetext = $2;
329 $linetext =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d).*$//i;
[5613]330
[17533]331 # Map key to text
[18981]332 $resourcebundle->{$linekey} = $linetext;
[17533]333 }
334 }
[5613]335 }
[6934]336
[18981]337 return $resourcebundle;
[5613]338}
339
340
[25499]341sub set_print_freetext_for_xml
342{
343 $freetext_xml_mode = 1;
344}
345
346sub set_print_xml_tags
347{
348 $freetext_xml_mode = 0;
349}
350
[6945]351sub output_strings_in_UTF8
[6934]352{
[6945]353 $outputstringsinUTF8 = 1;
[6934]354}
355
356
[15007]357sub debug_unicode_string
358{
359 join("",
[17533]360 map { $_ > 255 ? # if wide character...
361 sprintf("\\x{%04X}", $_) : # \x{...}
362 chr($_)
363 } unpack("U*", $_[0])); # unpack Unicode characters
[15007]364}
365
366
[5613]3671;
Note: See TracBrowser for help on using the repository browser.