source: trunk/gsdl/src/mgpp/text/UCArray.cpp@ 1125

Last change on this file since 1125 was 1125, checked in by kjm18, 24 years ago

added GetCStr and BrowseCompare

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.2 KB
Line 
1/**************************************************************************
2 *
3 * UCArray.cpp -- vector based string class
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: UCArray.cpp 1125 2000-04-18 04:06:12Z kjm18 $
21 *
22 **************************************************************************/
23
24#include "UCArray.h"
25#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
26
27
28void SetCStr (UCArray &text, const char *cStr) {
29 text.erase(text.begin(), text.end());
30
31 while (*cStr != '\0') {
32 text.push_back (*cStr);
33 cStr++;
34 }
35}
36
37char * GetCStr(UCArray text) {
38
39 char *cstr = new char[text.size()+1];
40 UCArray::const_iterator here = text.begin();
41 UCArray::const_iterator end = text.end();
42
43 int i = 0;
44 while (here != end) {
45 cstr[i] = (char)*here;
46 here++; i++;
47 }
48 cstr[i]='\0';
49 return cstr;
50}
51
52ostream &operator<<(ostream &s, const UCArray &a) {
53 UCArray::const_iterator here = a.begin();
54 UCArray::const_iterator end = a.end();
55 while (here != end) {
56 s << *here;
57 here++;
58 }
59
60 return s;
61}
62
63
64bool ReadVarLenUL (FILE *f, unsigned long &n) {
65 register unsigned long temp = 0;
66 register unsigned int bitPos = 0;
67 unsigned char b = 0;
68
69 do {
70 b = fgetc (f);
71 if (feof(f)) return false;
72 temp |= (b & 0x7f) << bitPos;
73 bitPos += 7;
74 } while (b >= 0x80 && bitPos < 32);
75
76 n = temp;
77
78 return true;
79}
80
81bool WriteVarLenUL (FILE *f, unsigned long n) {
82 register unsigned long temp = n;
83 register unsigned char b = 0;
84 do {
85 b = static_cast<unsigned char> (temp & 0x7f);
86 if (temp >= 0x80) b |= 0x80;
87 fputc (b, f);
88 if (ferror (f) != 0) return false;
89 } while ((temp = temp >> 7) > 0);
90
91 return true;
92}
93
94
95bool ReadUL (FILE *f, unsigned long &n) {
96 if (fread (&n, sizeof (unsigned long), 1, f) <= 0) return false;
97 NTOHUL (n);
98 return true;
99}
100
101
102bool WriteUL (FILE *f, unsigned long n) {
103 HTONUL (n);
104 return (fwrite (&n, sizeof (unsigned long), 1, f) > 0);
105}
106
107bool ReadF (FILE *f, float &n) {
108 if (fread (&n, sizeof (float), 1, f) <= 0) return false;
109 NTOHF(n);
110 return true;
111}
112
113bool WriteF (FILE *f, float n) {
114 HTONF(n);
115 return (fwrite (&n, sizeof (float), 1, f) > 0);
116}
117
118bool ReadD (FILE *f, double &n) {
119 if (fread (&n, sizeof (double), 1, f) <= 0) return false;
120 NTOHD(n);
121 return true;
122}
123
124bool WriteD (FILE *f, double n) {
125 HTOND(n);
126 return (fwrite (&n, sizeof (double), 1, f) > 0);
127}
128
129bool ReadUCArray (FILE *f, UCArray &a) {
130 // clear the array in preparation
131 a.erase (a.begin(), a.end());
132
133 // read in the array size
134 unsigned long arraySize = 0;
135 if (!ReadVarLenUL (f, arraySize)) return false;
136
137 // read in the array
138 unsigned char b = 0;
139 while (arraySize > 0) {
140 b = fgetc (f);
141 if (feof(f)) return false;
142 a.push_back (b);
143
144 arraySize--;
145 }
146
147 return true;
148}
149
150bool WriteUCArray (FILE *f, const UCArray &a) {
151 // write out the array size
152 if (!WriteVarLenUL (f, a.size())) return false;
153
154 UCArray::const_iterator here = a.begin();
155 UCArray::const_iterator end = a.end();
156 while (here != end) {
157 fputc (*here, f);
158 if (ferror (f) != 0) return false;
159
160 here++;
161 }
162
163 return true;
164}
165
166/*
167 * This array is designed for mapping upper and lower case letter
168 * together for a case independent comparison. The mappings are
169 * based upon ascii character sequences.
170 */
171static unsigned char casecharmap[] = {
172 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
173 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
174 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
175 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
176 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
177 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
178 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
179 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
180 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
181 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
182 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
183 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
184 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
185 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
186 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
187 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
188 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
189 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
190 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
191 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
192 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
193 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
194 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
195 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
196 '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
197 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
198 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
199 '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337',
200 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
201 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
202 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
203 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
204};
205
206
207int DictCompare (const UCArray &a1, const UCArray &a2) {
208 unsigned int l1 = a1.size();
209 unsigned int l2 = a2.size();
210 unsigned int l = (l1 < l2) ? l1 : l2;
211 int pos = 0;
212 register int diff = 0;
213
214 UCArray::const_iterator a1Here = a1.begin();
215 UCArray::const_iterator a2Here = a2.begin();
216
217 while (l--) {
218 if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) != 0)
219 return diff;
220 if (pos == 0 && (diff = *a1Here - *a2Here) != 0)
221 pos = diff;
222
223 a1Here++;
224 a2Here++;
225 }
226
227 return ((l1 - l2) ? (l1 - l2) : (pos));
228}
229
230/* comparison for browse index - items match if the smaller word
231 is a prefix of the larger word, case independent
232*/
233int BrowseCompare (const UCArray &a1, const UCArray &a2) {
234 unsigned int l1 = a1.size();
235 unsigned int l2 = a2.size();
236 unsigned int l = (l1 < l2) ? l1 : l2; // l is the shorter of the two
237 int diff = 0;
238
239 UCArray::const_iterator a1Here = a1.begin();
240 UCArray::const_iterator a2Here = a2.begin();
241
242 while(l--) {
243 if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) !=0)
244 return diff;
245 a1Here++;
246 a2Here++;
247 }
248 return 0;
249
250}
251
252unsigned long PrefixLen (const UCArray &a1, const UCArray &a2) {
253 unsigned long l = (a1.size() < a2.size()) ? a1.size() : a2.size();
254 unsigned long i = 0;
255
256 UCArray::const_iterator a1Here = a1.begin();
257 UCArray::const_iterator a2Here = a2.begin();
258
259 while (i < l && *a1Here == *a2Here) {
260 i++; a1Here++; a2Here++;
261 }
262
263 return i;
264}
265
266bool WritePreSufStr (FILE *f, const UCArray *prev, const UCArray &a) {
267 unsigned char preLen;
268 unsigned char sufLen;
269
270 if (prev != NULL) preLen = PrefixLen (*prev, a);
271 else preLen = 0;
272 sufLen = a.size() - preLen;
273
274 // output the prefix length, suffix length, and the suffix
275 fputc (preLen, f);
276 if (ferror(f) != 0) return false;
277 fputc (sufLen, f);
278 if (ferror(f) != 0) return false;
279 return (fwrite ((char *)a.begin()+preLen, sizeof (char), sufLen, f) == sufLen);
280}
281
282// a also used for prev
283bool ReadPreSufStr (FILE *f, UCArray &a) {
284 unsigned char preLen = 0;
285 unsigned char sufLen = 0;
286
287 preLen = fgetc(f);
288 sufLen = fgetc(f);
289
290 if (a.size () > preLen) a.erase (a.begin()+preLen, a.end());
291 while (sufLen > 0) {
292 unsigned char c = fgetc (f);
293 a.push_back (c);
294 sufLen--;
295 }
296
297 return true;
298}
299
Note: See TracBrowser for help on using the repository browser.