/************************************************************************** * * UCArray.cpp -- vector based string class * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: UCArray.cpp 1279 2000-07-12 22:21:53Z sjboddie $ * **************************************************************************/ #include "UCArray.h" #include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */ void SetCStr (UCArray &text, const char *cStr) { text.erase(text.begin(), text.end()); while (*cStr != '\0') { text.push_back (*cStr); cStr++; } } char * GetCStr(UCArray text) { char *cstr = new char[text.size()+1]; UCArray::const_iterator here = text.begin(); UCArray::const_iterator end = text.end(); int i = 0; while (here != end) { cstr[i] = (char)*here; here++; i++; } cstr[i]='\0'; return cstr; } ostream &operator<<(ostream &s, const UCArray &a) { UCArray::const_iterator here = a.begin(); UCArray::const_iterator end = a.end(); while (here != end) { s << *here; here++; } return s; } bool ReadVarLenUL (FILE *f, unsigned long &n) { register unsigned long temp = 0; register unsigned int bitPos = 0; unsigned char b = 0; do { b = fgetc (f); if (feof(f)) return false; temp |= (b & 0x7f) << bitPos; bitPos += 7; } while (b >= 0x80 && bitPos < 32); n = temp; return true; } bool WriteVarLenUL (FILE *f, unsigned long n) { register unsigned long temp = n; register unsigned char b = 0; do { b = static_cast (temp & 0x7f); if (temp >= 0x80) b |= 0x80; fputc (b, f); if (ferror (f) != 0) return false; } while ((temp = temp >> 7) > 0); return true; } bool ReadUL (FILE *f, unsigned long &n) { if (fread (&n, sizeof (unsigned long), 1, f) <= 0) return false; NTOHUL (n); return true; } bool WriteUL (FILE *f, unsigned long n) { HTONUL (n); return (fwrite (&n, sizeof (unsigned long), 1, f) > 0); } bool ReadF (FILE *f, float &n) { if (fread (&n, sizeof (float), 1, f) <= 0) return false; NTOHF(n); return true; } bool WriteF (FILE *f, float n) { HTONF(n); return (fwrite (&n, sizeof (float), 1, f) > 0); } bool ReadD (FILE *f, double &n) { if (fread (&n, sizeof (double), 1, f) <= 0) return false; NTOHD(n); return true; } bool WriteD (FILE *f, double n) { HTOND(n); return (fwrite (&n, sizeof (double), 1, f) > 0); } bool ReadUCArray (FILE *f, UCArray &a) { // clear the array in preparation a.erase (a.begin(), a.end()); // read in the array size unsigned long arraySize = 0; if (!ReadVarLenUL (f, arraySize)) return false; // read in the array unsigned char b = 0; while (arraySize > 0) { b = fgetc (f); if (feof(f)) return false; a.push_back (b); arraySize--; } return true; } bool WriteUCArray (FILE *f, const UCArray &a) { // write out the array size if (!WriteVarLenUL (f, a.size())) return false; UCArray::const_iterator here = a.begin(); UCArray::const_iterator end = a.end(); while (here != end) { fputc (*here, f); if (ferror (f) != 0) return false; here++; } return true; } /* * This array is designed for mapping upper and lower case letter * together for a case independent comparison. The mappings are * based upon ascii character sequences. */ static unsigned char casecharmap[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\333', '\334', '\335', '\336', '\337', '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', }; int DictCompare (const UCArray &a1, const UCArray &a2) { unsigned int l1 = a1.size(); unsigned int l2 = a2.size(); unsigned int l = (l1 < l2) ? l1 : l2; int pos = 0; register int diff = 0; UCArray::const_iterator a1Here = a1.begin(); UCArray::const_iterator a2Here = a2.begin(); while (l--) { if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) != 0) return diff; if (pos == 0 && (diff = *a1Here - *a2Here) != 0) pos = diff; a1Here++; a2Here++; } return ((l1 - l2) ? (l1 - l2) : (pos)); } /* comparison for browse index - items match if the smaller word is a prefix of the larger word, case independent */ int BrowseCompare (const UCArray &a1, const UCArray &a2) { unsigned int l1 = a1.size(); unsigned int l2 = a2.size(); unsigned int l = (l1 < l2) ? l1 : l2; // l is the shorter of the two int diff = 0; UCArray::const_iterator a1Here = a1.begin(); UCArray::const_iterator a2Here = a2.begin(); while(l--) { if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) !=0) return diff; a1Here++; a2Here++; } return 0; } unsigned long PrefixLen (const UCArray &a1, const UCArray &a2) { unsigned long l = (a1.size() < a2.size()) ? a1.size() : a2.size(); unsigned long i = 0; UCArray::const_iterator a1Here = a1.begin(); UCArray::const_iterator a2Here = a2.begin(); while (i < l && *a1Here == *a2Here) { i++; a1Here++; a2Here++; } return i; } bool WritePreSufStr (FILE *f, const UCArray *prev, const UCArray &a) { unsigned char preLen; unsigned char sufLen; if (prev != NULL) preLen = PrefixLen (*prev, a); else preLen = 0; sufLen = a.size() - preLen; // output the prefix length, suffix length, and the suffix fputc (preLen, f); if (ferror(f) != 0) return false; fputc (sufLen, f); if (ferror(f) != 0) return false; return (fwrite ((char *)a.begin()+preLen, sizeof (char), sufLen, f) == sufLen); } // a also used for prev bool ReadPreSufStr (FILE *f, UCArray &a) { unsigned char preLen = 0; unsigned char sufLen = 0; preLen = fgetc(f); sufLen = fgetc(f); if (a.size () > preLen) a.erase (a.begin()+preLen, a.end()); while (sufLen > 0) { unsigned char c = fgetc (f); a.push_back (c); sufLen--; } return true; }