root/gsdl/trunk/perllib/gsprintf.pm @ 15007

Revision 15007, 7.9 KB (checked in by davidb, 12 years ago)

"debug_unicode_string" function added to help figure out uncoding problems. Prints out any character above 255 in \x{HEXCODE} form.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# gsprintf.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25use strict;
26no strict 'refs';
27
28package gsprintf;
29require Exporter;
30@gsprintf::ISA=qw(Exporter);
31
32use unicode;
33use util;
34
35@gsprintf::EXPORT_OK = qw(gsprintf); # functions we can export into namespace
36
37
38# Language-specific resource bundle
39my %specialresourcebundle = ();
40our $specialoutputencoding; # our, so that it can be changed outside.
41
42# Default resource bundle
43my %defaultresourcebundle;
44my $defaultoutputencoding;
45
46# English resource bundle
47my %englishresourcebundle;
48my $englishoutputencoding;
49
50# Ignore the OutputEncoding strings in the resource bundles and output all text in UTF-8
51my $outputstringsinUTF8 = 0;
52
53
54sub gsprintf
55{
56    my ($handle, $text_string, @text_arguments) = @_;
57
58    # Return unless the required arguments were supplied
59    return unless (defined($handle) && defined($text_string));
60
61    # Look up all the strings in the dictionary
62    $text_string =~ s/(\{[^\}]+\})/&lookup_string($1)/eg;
63
64    # Resolve the string arguments using sprintf, then write out to the handle
65    print $handle sprintf($text_string, @text_arguments);
66}
67
68
69sub lookup_string
70{
71    my ($stringkey) = @_;
72    return "" unless defined $stringkey;
73    # Try the language-specific resource bundle first
74    my $utf8string = $specialresourcebundle{$stringkey};
75    my $outputencoding = $specialoutputencoding;
76
77    # Try the default resource bundle next
78    if (!defined($utf8string)) {
79    # Load the default resource bundle if it is not already loaded
80    &load_default_resource_bundle() if (!%defaultresourcebundle);
81
82    $utf8string = $defaultresourcebundle{$stringkey};
83    $outputencoding = $defaultoutputencoding;
84    }
85
86    # Try the English resource bundle last
87    if (!defined($utf8string)) {
88    # Load the English resource bundle if it is not already loaded
89    &load_english_resource_bundle() if (!%englishresourcebundle);
90
91    $utf8string = $englishresourcebundle{$stringkey};
92    $outputencoding = $englishoutputencoding;
93    }
94
95    # No matching string was found, so just return the key
96    if (!defined($utf8string)) {
97    return $stringkey;
98    }
99
100    # Return the string matching the key
101    return $utf8string if (!defined($outputencoding) || $outputstringsinUTF8
102               || $outputencoding eq "utf8");
103
104    # If an 8-bit output encoding has been defined, encode the string appropriately
105    my $encoded=unicode::unicode2singlebyte(&unicode::utf82unicode($utf8string), $outputencoding);
106   
107    # If we successfully encoded it, return it
108    if ($encoded) { return $encoded }
109
110    # Otherwise, we can't convert to the requested encoding. return the utf8?
111    $specialoutputencoding='utf8';
112    return $utf8string;
113}
114
115
116sub load_language_specific_resource_bundle
117{
118    my $language = shift(@_);
119
120    # Read the specified resource bundle
121    my $resourcebundlehome = &util::filename_cat("$ENV{'GSDLHOME'}", "perllib");
122    my $resourcebundlename = "strings_" . $language . ".properties";
123    my $resourcebundlefile = &util::filename_cat($resourcebundlehome, $resourcebundlename);
124
125    %specialresourcebundle = &read_resource_bundle($resourcebundlefile);
126    return if (!%specialresourcebundle);
127
128    # Read the output encoding to use from the resource bundle
129    if ($ENV{'GSDLOS'} =~ /windows/) {
130    $specialoutputencoding = $specialresourcebundle{"{OutputEncoding.windows}"};
131    }
132    else {
133    # see if there is an encoding set in the appropriate locale env var
134
135    foreach my $envvar ('LC_ALL', 'LANG') {
136        if (!exists $ENV{$envvar}) { next }
137        my $locale=$ENV{$envvar};
138        if ($locale !~ /^\w+\.(.+)$/) { next }
139        my $enc=lc($1);
140        $enc =~ s/-/_/g;
141        if ($enc eq 'utf_8') { $enc='utf8' } # normalise to this name
142        $specialoutputencoding = $enc;
143        return;
144    }
145    $specialoutputencoding = $specialresourcebundle{"{OutputEncoding.unix}"};
146    }
147}
148
149
150sub load_default_resource_bundle
151{
152    # Read the default resource bundle
153    my $resourcebundlehome = &util::filename_cat("$ENV{'GSDLHOME'}", "perllib");
154    my $resourcebundlename = "strings.properties";
155    my $resourcebundlefile = &util::filename_cat($resourcebundlehome, $resourcebundlename);
156
157    %defaultresourcebundle = &read_resource_bundle($resourcebundlefile);
158    if (!%defaultresourcebundle) {
159        # $! will still have the error value for the last failed syscall
160        print STDERR "$! $resourcebundlefile\n";
161    # set something so we don't bother trying to load it again
162    $defaultresourcebundle{0}=undef;
163        return;
164    }
165
166    # Read the output encoding to use from the resource bundle
167    if ($ENV{'GSDLOS'} =~ /windows/) {
168    $defaultoutputencoding = $defaultresourcebundle{"{OutputEncoding.windows}"};
169    }
170    else {
171    $defaultoutputencoding = $defaultresourcebundle{"{OutputEncoding.unix}"};
172    }
173}
174
175
176sub load_english_resource_bundle
177{
178    # Ensure the English resource bundle hasn't already been loaded
179    if (%specialresourcebundle && $specialresourcebundle{"{Language.code}"} eq "en") {
180    %englishresourcebundle = %specialresourcebundle;
181    $englishoutputencoding = $specialoutputencoding;
182    }
183   
184    if ($defaultresourcebundle{"{Language.code}"} &&
185        $defaultresourcebundle{"{Language.code}"} eq "en") {
186    %englishresourcebundle = %defaultresourcebundle;
187    $englishoutputencoding = $defaultoutputencoding;
188    }
189
190    # Read the English resource bundle
191    my $resourcebundlehome = &util::filename_cat("$ENV{'GSDLHOME'}", "perllib");
192    my $resourcebundlename = "strings_en.properties";
193    my $resourcebundlefile = &util::filename_cat($resourcebundlehome, $resourcebundlename);
194
195    %englishresourcebundle = &read_resource_bundle($resourcebundlefile);
196    return if (!%englishresourcebundle);
197
198    # Read the output encoding to use from the resource bundle
199    if ($ENV{'GSDLOS'} =~ /windows/) {
200    $englishoutputencoding = $englishresourcebundle{"{OutputEncoding.windows}"};
201    }
202    else {
203    $englishoutputencoding = $englishresourcebundle{"{OutputEncoding.unix}"};
204    }
205}
206
207
208sub read_resource_bundle
209{
210    my ($resourcebundlefilepath) = shift(@_);
211
212    # Return an empty hash if the specified resource bundle could not be read
213    return () if (!open(RESOURCE_BUNDLE, "<$resourcebundlefilepath"));
214
215    # Load this resource bundle
216    my @resourcebundlelines = <RESOURCE_BUNDLE>;
217    close(RESOURCE_BUNDLE);
218
219    # Parse the resource bundle
220    my %resourcebundle = ();
221    foreach my $line (@resourcebundlelines) {
222    # Remove any trailing whitespace
223    $line =~ s/(\s*)$//;
224
225    # Ignore comments and empty lines
226    if ($line !~ /^\#/ && $line ne "") {
227        # Parse key (everything up to the first colon)
228        $line =~ /^([^:]+):(.+)$/;
229        my $linekey = "{" . $1 . "}";
230        my $linetext = $2;
231        $linetext =~ s/(\s*)\#\s+Updated\s+(\d?\d-\D\D\D-\d\d\d\d)\s*$//i;
232
233        # Map key to text
234        $resourcebundle{$linekey} = $linetext;
235    }
236    }
237
238    return %resourcebundle;
239}
240
241
242sub output_strings_in_UTF8
243{
244    $outputstringsinUTF8 = 1;
245}
246
247
248sub debug_unicode_string
249{
250    join("",
251     map { $_ > 255 ?                      # if wide character...
252           sprintf("\\x{%04X}", $_) :  # \x{...}
253           chr($_)         
254           } unpack("U*", $_[0]));         # unpack Unicode characters
255}
256
257
2581;
Note: See TracBrowser for help on using the browser.