source: main/trunk/greenstone2/perllib/cfgread.pm@ 24586

Last change on this file since 24586 was 24586, checked in by ak19, 13 years ago

Dr Bainbridge fixed the unicode bug that Diego had found when using an hfile to specify the Hierarchy of a classifier. The hfile was so far being read as a regular cfg file read, but the read operation needed to read in UTF8 mode to preserve the encodings of characters in the file.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.8 KB
Line 
1###########################################################################
2#
3# cfgread.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files
27
28package cfgread;
29
30use strict; no strict 'refs';
31
32
33sub read_cfg_line {
34 my ($handle) = @_;
35 my $line = "";
36 my @line = ();
37 my $linecontinues = 0;
38
39 while (defined($line = <$handle>)) {
40 $line =~ s/^\#.*$//; # remove comments
41 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
42 $line =~ s/^\s+//; # remove initial white space
43 # Merge with following line if a quoted phrase is left un-closed.
44 if ($line =~ m/^([\"\'])/ || $line =~ m/[^\\]([\"\'])/) {
45 my $quote=$1;
46
47 # Improve speed substantially by not doing the regular expression on $line in the while loop
48 # (since $line gets longer each iteration, the regular expression gets slower and slower)
49 # Instead we just check each new line to see if it finishes the quoted multi-line value
50 if ($line !~ m/$quote(.*?[^\\])?(\\\\)*$quote/)
51 {
52 my $nextline;
53 while (defined($nextline = <$handle>))
54 {
55 $nextline =~ s/\r?\n//; # remove end-of-line
56 $line .= " " . $nextline;
57
58 # Break out of the while loop if we've found the end of the multi-line value
59 last if ($nextline =~ m/^(.*?[^\\])?(\\\\)*$quote/);
60 }
61 }
62 }
63 $linecontinues = $line =~ s/\\$//;
64
65 while ($line =~ s/\s*(\".*?[^\\](\\\\)*\"|\'.*?[^\\](\\\\)*\'|\S+)\s*//) {
66 if (defined $1) {
67 # remove any enclosing quotes
68 my $entry = $1;
69 $entry =~ s/^([\"\'])(.*)\1$/$2/;
70
71 # substitute an environment variables
72## $entry =~ s/\$(\w+)/$ENV{$1}/g;
73 $entry =~ s/\$\{(\w+)\}/$ENV{$1}/g;
74 push (@line, $entry);
75 } else {
76 push (@line, "");
77 }
78 }
79
80 if (scalar(@line) > 0 && !$linecontinues) {
81# print STDERR "line: \"" . join ("\" \"", @line) . "\"\n";
82 return \@line;
83 }
84 }
85
86 return undef;
87}
88
89sub write_cfg_line {
90 my ($handle, $line) = @_;
91 print $handle join ("\t", @$line), "\n";
92}
93
94
95# stringexp, arrayexp, hashexp,arrayarrayexp and hashhashexp
96# should be something like '^(this|that)$'
97sub read_cfg_file {
98 my ($filename, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
99 $hashhashexp) = @_;
100 my ($line);
101 my $data = {};
102
103 if (open (COLCFG, $filename)) {
104 while (defined ($line = &read_cfg_line('COLCFG'))) {
105 if (scalar(@$line) >= 2) {
106 my $key = shift (@$line);
107 if (defined $stringexp && $key =~ /$stringexp/) {
108 $data->{$key} = shift (@$line);
109
110 } elsif (defined $arrayexp && $key =~ /$arrayexp/) {
111 push (@{$data->{$key}}, @$line);
112
113 } elsif (defined $hashexp && $key =~ /$hashexp/) {
114 my $k = shift @$line;
115 my $v = shift @$line;
116 $data->{$key}->{$k} = $v;
117 } elsif (defined $arrayarrayexp && $key =~ /$arrayarrayexp/) {
118 if (!defined $data->{$key}) {
119 $data->{$key} = [];
120 }
121 push (@{$data->{$key}}, $line);
122 }
123 elsif (defined $hashhashexp && $key =~ /$hashhashexp/) {
124 my $k = shift @$line;
125 my $p = shift @$line;
126 my $v = shift @$line;
127 if (!defined $v) {
128 $v = $p;
129 $p = 'default';
130 }
131 $data->{$key}->{$k}->{$p} = $v;
132 }
133 }
134 }
135 close (COLCFG);
136
137 } else {
138 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
139 }
140
141 return $data;
142}
143
144# If the cfg file contains unicode characters, use this method to read from it
145# Used by HFileHierarchy classifier, since an HFile is read as a cfg file, but
146# can contain unicode characters.
147sub read_cfg_file_unicode {
148 my ($filename, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
149 $hashhashexp) = @_;
150 my ($line);
151 my $data = {};
152
153 if (open (COLCFG, $filename)) {
154 binmode(COLCFG,":utf8");
155 while (defined ($line = &read_cfg_line('COLCFG'))) {
156 if (scalar(@$line) >= 2) {
157
158 #map { decode("utf8",$_) } @$line; #use Encode;
159
160 my $key = shift (@$line);
161 if (defined $stringexp && $key =~ /$stringexp/) {
162 $data->{$key} = shift (@$line);
163
164 } elsif (defined $arrayexp && $key =~ /$arrayexp/) {
165 push (@{$data->{$key}}, @$line);
166
167 } elsif (defined $hashexp && $key =~ /$hashexp/) {
168 my $k = shift @$line;
169 my $v = shift @$line;
170 $data->{$key}->{$k} = $v;
171 } elsif (defined $arrayarrayexp && $key =~ /$arrayarrayexp/) {
172 if (!defined $data->{$key}) {
173 $data->{$key} = [];
174 }
175 push (@{$data->{$key}}, $line);
176 }
177 elsif (defined $hashhashexp && $key =~ /$hashhashexp/) {
178 my $k = shift @$line;
179 my $p = shift @$line;
180 my $v = shift @$line;
181 if (!defined $v) {
182 $v = $p;
183 $p = 'default';
184 }
185 $data->{$key}->{$k}->{$p} = $v;
186 }
187 }
188 }
189 close (COLCFG);
190
191 } else {
192 print STDERR "cfgread::read_cfg_file_unicode couldn't read the cfg file $filename\n";
193 }
194
195 return $data;
196}
197
198
199# stringexp, arrayexp, hashexp and arrayarrayexp
200# should be something like '^(this|that)$'
201sub write_cfg_file {
202 my ($filename, $data, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
203 $hashhashexp) = @_;
204
205 if (open (COLCFG, ">$filename")) {
206 foreach my $key (sort(keys(%$data))) {
207 if ($key =~ /$stringexp/) {
208 &write_cfg_line ('COLCFG', [$key, $data->{$key}]);
209 } elsif ($key =~ /$arrayexp/) {
210 &write_cfg_line ('COLCFG', [$key, @{$data->{$key}}]);
211 } elsif ($key =~ /$hashexp/) {
212 foreach my $k (keys (%{$data->{$key}})) {
213 &write_cfg_line ('COLCFG', [$key, $k, $data->{$key}->{$k}]);
214 }
215 } elsif ($key =~ /$arrayarrayexp/) {
216 foreach my $k (@{$data->{$key}}) {
217 &write_cfg_line ('COLCFG', [$key, @$k]);
218 }
219 } elsif ($key =~ /$hashhashexp/) {
220 foreach my $k (keys (%{$data->{$key}})) {
221 foreach my $p (keys (%{$data->{$key}->{$k}})) {
222 if ($p =~ /default/) {
223 &write_cfg_line ('COLCFG',
224 [$key, $k, $data->{$key}->{$k}]);
225 }
226 else {
227 &write_cfg_line ('COLCFG',
228 [$key, $k, $p, $data->{$key}->{$k}->{$p}]);
229 }
230 }
231 }
232 }
233 }
234 close (COLCFG);
235 } else {
236 print STDERR "cfgread::write_cfg_file couldn't write the cfg file $filename\n";
237 }
238}
239
240
2411;
Note: See TracBrowser for help on using the repository browser.