source: main/tags/3.01-src/mgpp/text/UCArray.cpp@ 29148

Last change on this file since 29148 was 8692, checked in by kjdon, 20 years ago

Added the changes from Emanuel Dejanu (Simple Words) - mostly efficiency changes. For example, changing i++ to ++i, delete xxx to delete []xxx, some stuff to do with UCArrays...

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.8 KB
Line 
1/**************************************************************************
2 *
3 * UCArray.cpp -- vector based string class
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#include "UCArray.h"
23#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
24
25
26void SetCStr (UCArray &text, const char *cStr) {
27 text.erase(text.begin(), text.end());
28
29 while (*cStr != '\0') {
30 text.push_back (*cStr);
31 ++cStr;
32 }
33}
34
35void SetCStr (UCArray &text, const char *cStr, size_t nSizeHint) {
36 text.erase(text.begin(), text.end());
37
38 // reserve the needed space in advance
39 if (text.capacity() < nSizeHint + 1) {
40 text.reserve(nSizeHint + 1);
41 }
42 while (*cStr != '\0') {
43 text.push_back (*cStr);
44 ++cStr;
45 }
46}
47
48char * GetCStr(const UCArray& text) {
49
50 char *cstr = new char[text.size()+1];
51 UCArray::const_iterator here = text.begin();
52 UCArray::const_iterator end = text.end();
53
54 int i = 0;
55 while (here != end) {
56 cstr[i] = text[i];
57 ++here; ++i;
58 }
59 cstr[i]='\0';
60 return cstr;
61}
62
63bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr)
64{
65 if ((cStr == NULL || *cStr == '\0') && text.empty()) return true;
66 UCArray::const_iterator thisUC = text.begin();
67 UCArray::const_iterator endUC = text.end();
68 while (thisUC != endUC && *cStr != '\0') {
69 if (*thisUC != *cStr) return false;
70 ++cStr; ++thisUC;
71 }
72 if (thisUC == endUC && *cStr == '\0') return true;
73 return false;
74}
75
76ostream &operator<<(ostream &s, const UCArray &a) {
77 UCArray::const_iterator here = a.begin();
78 UCArray::const_iterator end = a.end();
79 while (here != end) {
80 s << *here;
81 ++here;
82 }
83
84 return s;
85}
86
87
88bool ReadVarLenUL (FILE *f, unsigned long &n) {
89 register unsigned long temp = 0;
90 register unsigned int bitPos = 0;
91 unsigned char b = 0;
92
93 do {
94 b = fgetc (f);
95 if (feof(f)) return false;
96 temp |= (b & 0x7f) << bitPos;
97 bitPos += 7;
98 } while (b >= 0x80 && bitPos < 32);
99
100 n = temp;
101
102 return true;
103}
104
105bool WriteVarLenUL (FILE *f, unsigned long n) {
106 register unsigned long temp = n;
107 register unsigned char b = 0;
108 do {
109 b = static_cast<unsigned char> (temp & 0x7f);
110 if (temp >= 0x80) b |= 0x80;
111 fputc (b, f);
112 if (ferror (f) != 0) return false;
113 } while ((temp = temp >> 7) > 0);
114
115 return true;
116}
117
118
119bool ReadUL (FILE *f, unsigned long &n) {
120 if (fread (&n, sizeof (unsigned long), 1, f) <= 0) return false;
121 NTOHUL (n);
122 return true;
123}
124
125
126bool WriteUL (FILE *f, unsigned long n) {
127 HTONUL (n);
128 return (fwrite (&n, sizeof (unsigned long), 1, f) > 0);
129}
130
131bool ReadF (FILE *f, float &n) {
132 if (fread (&n, sizeof (float), 1, f) <= 0) return false;
133 NTOHF(n);
134 return true;
135}
136
137bool WriteF (FILE *f, float n) {
138 HTONF(n);
139 return (fwrite (&n, sizeof (float), 1, f) > 0);
140}
141
142bool ReadD (FILE *f, double &n) {
143 if (fread (&n, sizeof (double), 1, f) <= 0) return false;
144 NTOHD(n);
145 return true;
146}
147
148bool WriteD (FILE *f, double n) {
149 HTOND(n);
150 return (fwrite (&n, sizeof (double), 1, f) > 0);
151}
152
153bool ReadUCArray (FILE *f, UCArray &a) {
154 // clear the array in preparation
155 a.erase (a.begin(), a.end());
156
157 // read in the array size
158 unsigned long arraySize = 0;
159 if (!ReadVarLenUL (f, arraySize)) return false;
160
161 // reserve the needed space in advance
162 if (a.capacity() < arraySize + 1) {
163 a.reserve(arraySize + 1);
164 }
165
166 // read in the array
167 unsigned char b = 0;
168 while (arraySize > 0) {
169 b = fgetc (f);
170 if (feof(f)) return false;
171 a.push_back (b);
172
173 --arraySize;
174 }
175
176 return true;
177}
178
179bool WriteUCArray (FILE *f, const UCArray &a) {
180 // write out the array size
181 if (!WriteVarLenUL (f, a.size())) return false;
182
183 UCArray::const_iterator here = a.begin();
184 UCArray::const_iterator end = a.end();
185 while (here != end) {
186 fputc (*here, f);
187 if (ferror (f) != 0) return false;
188
189 ++here;
190 }
191
192 return true;
193}
194
195/*
196 * This array is designed for mapping upper and lower case letter
197 * together for a case independent comparison. The mappings are
198 * based upon ascii character sequences.
199 */
200static unsigned char casecharmap[] = {
201 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
202 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
203 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
204 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
205 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
206 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
207 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
208 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
209 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
210 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
211 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
212 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
213 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
214 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
215 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
216 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
217 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
218 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
219 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
220 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
221 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
222 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
223 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
224 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
225 '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
226 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
227 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
228 '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337',
229 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
230 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
231 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
232 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
233};
234
235
236int DictCompare (const UCArray &a1, const UCArray &a2) {
237 unsigned int l1 = a1.size();
238 unsigned int l2 = a2.size();
239 unsigned int l = (l1 < l2) ? l1 : l2;
240 int pos = 0;
241 register int diff = 0;
242
243 UCArray::const_iterator a1Here = a1.begin();
244 UCArray::const_iterator a2Here = a2.begin();
245
246 while (l--) {
247 if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) != 0)
248 return diff;
249 if (pos == 0 && (diff = *a1Here - *a2Here) != 0)
250 pos = diff;
251
252 ++a1Here;
253 ++a2Here;
254 }
255
256 return ((l1 - l2) ? (l1 - l2) : (pos));
257}
258
259// does the first string start with the second?
260bool StartsWith (const UCArray &a1, const UCArray &a2) {
261 unsigned int l1 = a1.size();
262 unsigned int l2 = a2.size();
263 if (l2 > l1) {
264 // if the prefix is longer than the string, it can't start with it
265 return false;
266 }
267 unsigned int l =l2;
268 UCArray::const_iterator a1Here = a1.begin();
269 UCArray::const_iterator a2Here = a2.begin();
270
271 while (l--) {
272 if ((*a1Here != *a2Here))
273 return false;
274 ++a1Here;
275 ++a2Here;
276 }
277 return true; // we have successfully matched the whole way
278
279}
280
281// does the first string start with the second, ignoring case?
282bool StartsWithCasefold(const UCArray &a1, const UCArray &a2) {
283 unsigned int l1 = a1.size();
284 unsigned int l2 = a2.size();
285 if (l2 > l1) {
286 // if the prefix is longer than the string, it can't start with it
287 return false;
288 }
289 unsigned int l =l2;
290 UCArray::const_iterator a1Here = a1.begin();
291 UCArray::const_iterator a2Here = a2.begin();
292
293 while (l--) {
294 if (casecharmap[*a1Here] != casecharmap[*a2Here])
295 return false;
296 ++a1Here;
297 ++a2Here;
298 }
299 return true; // we have successfully matched the whole way
300
301}
302
303
304unsigned long PrefixLen (const UCArray &a1, const UCArray &a2) {
305 unsigned long l = (a1.size() < a2.size()) ? a1.size() : a2.size();
306 unsigned long i = 0;
307
308 UCArray::const_iterator a1Here = a1.begin();
309 UCArray::const_iterator a2Here = a2.begin();
310
311 while (i < l && *a1Here == *a2Here) {
312 ++i; ++a1Here; ++a2Here;
313 }
314
315 return i;
316}
317
318bool WritePreSufStr (FILE *f, const UCArray *prev, const UCArray &a) {
319 unsigned char preLen;
320 unsigned char sufLen;
321
322 if (prev != NULL) preLen = PrefixLen (*prev, a);
323 else preLen = 0;
324 sufLen = a.size() - preLen;
325
326 // output the prefix length, suffix length, and the suffix
327 fputc (preLen, f);
328 if (ferror(f) != 0) return false;
329 fputc (sufLen, f);
330 if (ferror(f) != 0) return false;
331 char* tmp=GetCStr(a);
332 int ret=(fwrite (tmp+preLen, sizeof (char), sufLen, f) == sufLen);
333 delete []tmp;
334 return (ret != 0);
335}
336
337// a also used for prev
338bool ReadPreSufStr (FILE *f, UCArray &a) {
339 unsigned char preLen = 0;
340 unsigned char sufLen = 0;
341
342 preLen = fgetc(f);
343 sufLen = fgetc(f);
344
345 if (a.size() > preLen) a.erase (a.begin()+preLen, a.end());
346
347 // reserve the needed space in advance
348 if (a.capacity() < a.size() + sufLen + 1) {
349 a.reserve(a.size() + sufLen + 1);
350 }
351
352 while (sufLen > 0) {
353 unsigned char c = fgetc (f);
354 a.push_back (c);
355 --sufLen;
356 }
357
358 return true;
359}
360
Note: See TracBrowser for help on using the repository browser.