source: main/trunk/greenstone2/perllib/cfgread.pm@ 22431

Last change on this file since 22431 was 20390, checked in by mdewsnip, 15 years ago

Improved cfgread::read_cfg_line() so it doesn't take forever to parse collect.cfg files containing large multi-line values (e.g. format statements, collectionextra values etc.). It was very slow previously because it was doing a fairly complex regular expression on the full value each time it read a line for the multi-line value. Since the value was getting longer and longer with each line read, the regular expression was taking longer and longer.

By Michael Dewsnip, DL Consulting Ltd.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
Line 
1###########################################################################
2#
3# cfgread.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files
27
28package cfgread;
29
30use strict; no strict 'refs';
31
32
33sub read_cfg_line {
34 my ($handle) = @_;
35 my $line = "";
36 my @line = ();
37 my $linecontinues = 0;
38
39 while (defined($line = <$handle>)) {
40 $line =~ s/^\#.*$//; # remove comments
41 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
42 $line =~ s/^\s+//; # remove initial white space
43 # Merge with following line if a quoted phrase is left un-closed.
44 if ($line =~ m/^([\"\'])/ || $line =~ m/[^\\]([\"\'])/) {
45 my $quote=$1;
46
47 # Improve speed substantially by not doing the regular expression on $line in the while loop
48 # (since $line gets longer each iteration, the regular expression gets slower and slower)
49 # Instead we just check each new line to see if it finishes the quoted multi-line value
50 if ($line !~ m/$quote(.*?[^\\])?(\\\\)*$quote/)
51 {
52 my $nextline;
53 while (defined($nextline = <$handle>))
54 {
55 $nextline =~ s/\r?\n//; # remove end-of-line
56 $line .= " " . $nextline;
57
58 # Break out of the while loop if we've found the end of the multi-line value
59 last if ($nextline =~ m/^(.*?[^\\])?(\\\\)*$quote/);
60 }
61 }
62 }
63 $linecontinues = $line =~ s/\\$//;
64
65 while ($line =~ s/\s*(\".*?[^\\](\\\\)*\"|\'.*?[^\\](\\\\)*\'|\S+)\s*//) {
66 if (defined $1) {
67 # remove any enclosing quotes
68 my $entry = $1;
69 $entry =~ s/^([\"\'])(.*)\1$/$2/;
70
71 # substitute an environment variables
72## $entry =~ s/\$(\w+)/$ENV{$1}/g;
73 $entry =~ s/\$\{(\w+)\}/$ENV{$1}/g;
74 push (@line, $entry);
75 } else {
76 push (@line, "");
77 }
78 }
79
80 if (scalar(@line) > 0 && !$linecontinues) {
81# print STDERR "line: \"" . join ("\" \"", @line) . "\"\n";
82 return \@line;
83 }
84 }
85
86 return undef;
87}
88
89sub write_cfg_line {
90 my ($handle, $line) = @_;
91 print $handle join ("\t", @$line), "\n";
92}
93
94
95# stringexp, arrayexp, hashexp,arrayarrayexp and hashhashexp
96# should be something like '^(this|that)$'
97sub read_cfg_file {
98 my ($filename, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
99 $hashhashexp) = @_;
100 my ($line);
101 my $data = {};
102
103 if (open (COLCFG, $filename)) {
104 while (defined ($line = &read_cfg_line('COLCFG'))) {
105 if (scalar(@$line) >= 2) {
106 my $key = shift (@$line);
107 if (defined $stringexp && $key =~ /$stringexp/) {
108 $data->{$key} = shift (@$line);
109
110 } elsif (defined $arrayexp && $key =~ /$arrayexp/) {
111 push (@{$data->{$key}}, @$line);
112
113 } elsif (defined $hashexp && $key =~ /$hashexp/) {
114 my $k = shift @$line;
115 my $v = shift @$line;
116 $data->{$key}->{$k} = $v;
117 } elsif (defined $arrayarrayexp && $key =~ /$arrayarrayexp/) {
118 if (!defined $data->{$key}) {
119 $data->{$key} = [];
120 }
121 push (@{$data->{$key}}, $line);
122 }
123 elsif (defined $hashhashexp && $key =~ /$hashhashexp/) {
124 my $k = shift @$line;
125 my $p = shift @$line;
126 my $v = shift @$line;
127 if (!defined $v) {
128 $v = $p;
129 $p = 'default';
130 }
131 $data->{$key}->{$k}->{$p} = $v;
132 }
133 }
134 }
135 close (COLCFG);
136
137 } else {
138 print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
139 }
140
141 return $data;
142}
143
144
145# stringexp, arrayexp, hashexp and arrayarrayexp
146# should be something like '^(this|that)$'
147sub write_cfg_file {
148 my ($filename, $data, $stringexp, $arrayexp, $hashexp, $arrayarrayexp,
149 $hashhashexp) = @_;
150
151 if (open (COLCFG, ">$filename")) {
152 foreach my $key (sort(keys(%$data))) {
153 if ($key =~ /$stringexp/) {
154 &write_cfg_line ('COLCFG', [$key, $data->{$key}]);
155 } elsif ($key =~ /$arrayexp/) {
156 &write_cfg_line ('COLCFG', [$key, @{$data->{$key}}]);
157 } elsif ($key =~ /$hashexp/) {
158 foreach my $k (keys (%{$data->{$key}})) {
159 &write_cfg_line ('COLCFG', [$key, $k, $data->{$key}->{$k}]);
160 }
161 } elsif ($key =~ /$arrayarrayexp/) {
162 foreach my $k (@{$data->{$key}}) {
163 &write_cfg_line ('COLCFG', [$key, @$k]);
164 }
165 } elsif ($key =~ /$hashhashexp/) {
166 foreach my $k (keys (%{$data->{$key}})) {
167 foreach my $p (keys (%{$data->{$key}->{$k}})) {
168 if ($p =~ /default/) {
169 &write_cfg_line ('COLCFG',
170 [$key, $k, $data->{$key}->{$k}]);
171 }
172 else {
173 &write_cfg_line ('COLCFG',
174 [$key, $k, $p, $data->{$key}->{$k}->{$p}]);
175 }
176 }
177 }
178 }
179 }
180 close (COLCFG);
181 } else {
182 print STDERR "cfgread::write_cfg_file couldn't write the cfg file $filename\n";
183 }
184}
185
186
1871;
Note: See TracBrowser for help on using the repository browser.