[1076] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * gsdlunicode.cpp --
|
---|
| 4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 5 | *
|
---|
| 6 | * A component of the Greenstone digital library software
|
---|
| 7 | * from the New Zealand Digital Library Project at the
|
---|
| 8 | * University of Waikato, New Zealand.
|
---|
| 9 | *
|
---|
| 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
| 24 | *********************************************************************/
|
---|
| 25 |
|
---|
| 26 | #include "gsdlunicode.h"
|
---|
| 27 |
|
---|
| 28 |
|
---|
| 29 | // unitool is currently in mg, if mg is not being used it should
|
---|
| 30 | // be moved into GSDLHOME/lib
|
---|
[21325] | 31 | // A copy of mgpp's unitool has now been moved into common-src/src/lib/
|
---|
[1076] | 32 | #include "unitool.h"
|
---|
| 33 |
|
---|
| 34 | #include "fileutil.h"
|
---|
| 35 |
|
---|
| 36 | #include <stdio.h>
|
---|
| 37 |
|
---|
[1233] | 38 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
| 39 | # include <ospace\std\iostream>
|
---|
| 40 | # include <ospace\std\fstream>
|
---|
| 41 | #elif defined(GSDL_USE_IOS_H)
|
---|
| 42 | # include <iostream.h>
|
---|
| 43 | # include <fstream.h>
|
---|
| 44 | #else
|
---|
| 45 | # include <iostream>
|
---|
| 46 | # include <fstream>
|
---|
| 47 | #endif
|
---|
[1076] | 48 |
|
---|
| 49 |
|
---|
| 50 | // converts a unicode encode text_t string to a utf-8
|
---|
| 51 | // encoded text_t string
|
---|
| 52 | text_t to_utf8 (text_t::const_iterator here, text_t::const_iterator end) {
|
---|
| 53 | text_t out;
|
---|
| 54 |
|
---|
| 55 | unsigned char thischar[MAXUTF8CHARLEN];
|
---|
| 56 | int i, charlen;
|
---|
| 57 |
|
---|
| 58 | while (here != end) {
|
---|
| 59 | charlen = output_utf8_char (*here, thischar, &thischar[MAXUTF8CHARLEN-1]);
|
---|
[8727] | 60 | for (i=0; i<charlen; ++i) out.push_back(thischar[i]);
|
---|
| 61 | ++here;
|
---|
[1076] | 62 | }
|
---|
| 63 |
|
---|
| 64 | return out;
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 | // converts a utf-8 encoded text_t string to a unicode
|
---|
| 68 | // encoded text_t string
|
---|
| 69 | text_t to_uni (const text_t &in) {
|
---|
| 70 | text_t out;
|
---|
| 71 | unsigned char *in_cstr = (unsigned char *)in.getcstr();
|
---|
| 72 | unsigned char *here = in_cstr;
|
---|
| 73 | unsigned char *end = in_cstr;
|
---|
| 74 |
|
---|
| 75 | unsigned short unichar;
|
---|
| 76 | int charlen = 0;
|
---|
| 77 |
|
---|
| 78 | // get the last valid character in the string
|
---|
[8727] | 79 | while (*end != '\0') ++end;
|
---|
| 80 | --end;
|
---|
[1076] | 81 |
|
---|
| 82 | while ((charlen = parse_utf8_char (here, end, &unichar)) > 0) {
|
---|
| 83 | out.push_back(unichar);
|
---|
| 84 | here += charlen;
|
---|
| 85 | }
|
---|
| 86 |
|
---|
[8727] | 87 | delete []in_cstr;
|
---|
[1076] | 88 |
|
---|
| 89 | return out;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 |
|
---|
[3667] | 93 | // this works for all unicode values < 65536...
|
---|
| 94 | void utf16outconvertclass::convert (char *out, size_t maxlen, size_t &len, status_t &status) {
|
---|
| 95 | // we should already have text_t* input set...
|
---|
| 96 | if (input == NULL || out == NULL)
|
---|
| 97 | {
|
---|
| 98 | status = finished;
|
---|
| 99 | return;
|
---|
| 100 | }
|
---|
| 101 | unsigned char *output = (unsigned char *)out;
|
---|
| 102 | text_t::iterator textend = input->end();
|
---|
| 103 | len = 0;
|
---|
[8727] | 104 | if (maxlen % 2) --maxlen; // we need an even number of output bytes...
|
---|
[3667] | 105 | while ((len < maxlen) && (texthere != textend)) {
|
---|
| 106 | unsigned short int uni_char=(unsigned short int) *texthere;
|
---|
| 107 | // big endian utf-16...
|
---|
| 108 | if (uni_char < 256) {
|
---|
| 109 | out[len]=0;
|
---|
| 110 | out[len+1]=uni_char;
|
---|
| 111 | } else {
|
---|
| 112 | out[len]=uni_char >> 8;
|
---|
| 113 | out[len+1]=uni_char & 255;
|
---|
| 114 | }
|
---|
| 115 | len+=2;
|
---|
| 116 | ++texthere;
|
---|
| 117 | }
|
---|
| 118 | if (texthere==textend)
|
---|
| 119 | status=finished;
|
---|
| 120 | else
|
---|
| 121 | status=unfinished;
|
---|
| 122 | }
|
---|
[1076] | 123 |
|
---|
[3667] | 124 |
|
---|
[1076] | 125 | utf8inconvertclass::utf8inconvertclass () {
|
---|
| 126 | utf8buflen = 0;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | void utf8inconvertclass::reset () {
|
---|
| 130 | start = NULL;
|
---|
| 131 | len = 0;
|
---|
| 132 | utf8buflen=0;
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | void utf8inconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 136 | output.clear();
|
---|
| 137 | output.reserve (len/3);
|
---|
| 138 |
|
---|
| 139 | if (start == NULL || len == 0) {
|
---|
| 140 | if (utf8buflen == 0) status = finished;
|
---|
| 141 | else status = stopped;
|
---|
| 142 | return;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | // don't want any funny sign conversions happening
|
---|
| 146 | unsigned char *here = (unsigned char *)start;
|
---|
| 147 | unsigned char *end = here+len-1;
|
---|
| 148 | unsigned short c;
|
---|
| 149 | size_t realcharlen;
|
---|
| 150 |
|
---|
| 151 | size_t charlen = getutf8charlen ();
|
---|
| 152 | while (len > 0) {
|
---|
| 153 | if (charlen == 0) {
|
---|
| 154 | // start parsing a new character
|
---|
| 155 | utf8buflen = 0;
|
---|
| 156 |
|
---|
| 157 | // fast common case
|
---|
| 158 | while (len > 3) {
|
---|
| 159 | realcharlen = parse_utf8_char (here, end, &c);
|
---|
| 160 | output.push_back (c);
|
---|
| 161 | here += realcharlen;
|
---|
| 162 | len -= realcharlen;
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | utf8buf[utf8buflen++] = *here;
|
---|
| 166 | ++here;
|
---|
| 167 | --len;
|
---|
| 168 | charlen = getutf8charlen ();
|
---|
| 169 |
|
---|
| 170 | } else if (utf8buflen < charlen) {
|
---|
| 171 | // assumes charlen is always less than MAXUTF8CHARLEN
|
---|
| 172 | utf8buf[utf8buflen++] = *here;
|
---|
| 173 | ++here;
|
---|
| 174 | --len;
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | if (utf8buflen == charlen) {
|
---|
| 178 | // got a complete character
|
---|
| 179 | realcharlen = parse_utf8_char (utf8buf, &utf8buf[utf8buflen-1], &c);
|
---|
| 180 | output.push_back (c);
|
---|
| 181 |
|
---|
| 182 | // move any unparsed characters. If an error occurred some of
|
---|
| 183 | // the characters might be unused.
|
---|
| 184 | int i;
|
---|
| 185 | int diff = utf8buflen - realcharlen;
|
---|
[8727] | 186 | for (i=0; i < diff; ++i) utf8buf[i] = utf8buf[i+diff];
|
---|
[1076] | 187 | utf8buflen = diff;
|
---|
| 188 | charlen = getutf8charlen ();
|
---|
| 189 | }
|
---|
| 190 | }
|
---|
| 191 |
|
---|
| 192 | start = (char *)here; // save current position
|
---|
| 193 |
|
---|
| 194 | if (utf8buflen == 0) status = finished;
|
---|
| 195 | else status = stopped;
|
---|
| 196 | }
|
---|
| 197 |
|
---|
| 198 |
|
---|
| 199 | // returns the length that the current contents of the
|
---|
| 200 | // utf8buf should be
|
---|
| 201 | size_t utf8inconvertclass::getutf8charlen () {
|
---|
| 202 | if (utf8buflen == 0) return 0;
|
---|
| 203 |
|
---|
| 204 | // one byte character
|
---|
| 205 | if (utf8buf[0] < 0x80) return 1;
|
---|
| 206 |
|
---|
| 207 | // error, is not the start of a utf-8 character
|
---|
| 208 | if (utf8buf[0] < 0xc0) return 1;
|
---|
| 209 |
|
---|
| 210 | // two bute character
|
---|
| 211 | if (utf8buf[0] < 0xe0) return 2;
|
---|
| 212 |
|
---|
| 213 | // three byte character
|
---|
| 214 | if (utf8buf[0] < 0xf0) return 3;
|
---|
| 215 |
|
---|
| 216 | // error, character too long for unicode
|
---|
| 217 | return 1;
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 |
|
---|
| 221 | void utf8outconvertclass::reset () {
|
---|
| 222 | input = NULL;
|
---|
| 223 | outs = NULL;
|
---|
| 224 | utf8buflen = 0;
|
---|
| 225 | utf8bufhere = 0;
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | // note that convert does not null-terminate the
|
---|
| 229 | // output array of characters
|
---|
| 230 | void utf8outconvertclass::convert (char *output, size_t maxlen,
|
---|
| 231 | size_t &len, status_t &status) {
|
---|
| 232 | if (input == NULL || output == NULL) {
|
---|
| 233 | if (utf8buflen == 0) status = finished;
|
---|
| 234 | else status = unfinished;
|
---|
| 235 | return;
|
---|
| 236 | }
|
---|
| 237 |
|
---|
| 238 | // don't want any funny sign conversions happening
|
---|
| 239 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 240 | text_t::iterator textend = input->end();
|
---|
| 241 | len = 0;
|
---|
| 242 | while (len < maxlen) {
|
---|
| 243 | // empty the contents of the internal buffer
|
---|
| 244 | if (utf8buflen > 0) {
|
---|
| 245 | while (len < maxlen && utf8bufhere < utf8buflen) {
|
---|
| 246 | *uoutput = utf8buf[utf8bufhere];
|
---|
[8727] | 247 | ++uoutput;
|
---|
| 248 | ++len;
|
---|
| 249 | ++utf8bufhere;
|
---|
[1076] | 250 | }
|
---|
| 251 |
|
---|
| 252 | if (utf8bufhere == utf8buflen) {
|
---|
| 253 | utf8bufhere = 0;
|
---|
| 254 | utf8buflen = 0;
|
---|
| 255 | }
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | // fill up the buffer with the next character
|
---|
| 259 | if (utf8buflen == 0) {
|
---|
| 260 | if (texthere == textend) break; // finished!
|
---|
| 261 | if (!rzws || (*texthere != 0x200b))
|
---|
| 262 | utf8buflen = output_utf8_char (*texthere, utf8buf,
|
---|
| 263 | &utf8buf[MAXUTF8CHARLEN-1]);
|
---|
[8727] | 264 | ++texthere;
|
---|
[1076] | 265 | utf8bufhere = 0;
|
---|
| 266 | }
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | if (texthere == textend && utf8buflen == 0) status = finished;
|
---|
| 270 | else status = unfinished;
|
---|
| 271 | }
|
---|
| 272 |
|
---|
| 273 |
|
---|
| 274 |
|
---|
| 275 |
|
---|
| 276 |
|
---|
| 277 |
|
---|
| 278 | mapdata_t::mapdata_t () {
|
---|
| 279 |
|
---|
| 280 | // reset all the map ptrs to be NULL
|
---|
[8727] | 281 | for (int i=0; i<256; ++i) {
|
---|
[1076] | 282 | ptrs[i] = (unsigned short *)NULL;
|
---|
| 283 | }
|
---|
| 284 |
|
---|
| 285 | // say nothing has been loaded
|
---|
| 286 | loaded = false;
|
---|
| 287 | }
|
---|
| 288 |
|
---|
| 289 |
|
---|
| 290 | mapconvert::mapconvert () {
|
---|
| 291 | absentc = 0;
|
---|
| 292 | }
|
---|
| 293 |
|
---|
| 294 | // setmapfile will cause loadmapfile to be called when conversion is
|
---|
| 295 | // needed
|
---|
[1870] | 296 | bool mapconvert::setmapfile (const text_t &themapfile, unsigned short theabsentc) {
|
---|
[1076] | 297 | // check to see if the mapfile has been already loaded
|
---|
[1870] | 298 | if (mapdata.loaded && mapfile == themapfile && absentc == theabsentc) return true;
|
---|
[1076] | 299 |
|
---|
| 300 | unloadmapfile ();
|
---|
[1870] | 301 | mapfile = themapfile;
|
---|
[1076] | 302 | absentc = theabsentc;
|
---|
| 303 |
|
---|
| 304 | return true;
|
---|
| 305 | }
|
---|
| 306 |
|
---|
| 307 |
|
---|
| 308 |
|
---|
| 309 | // loadmapfile should be called before any conversion is done
|
---|
[1870] | 310 | bool mapconvert::loadmapfile (const text_t &themapfile,
|
---|
[1076] | 311 | unsigned short theabsentc) {
|
---|
| 312 | FILE *mapfilein = (FILE *)NULL;
|
---|
| 313 |
|
---|
| 314 | // check to see if the mapfile has been already loaded
|
---|
[1870] | 315 | if (mapdata.loaded && mapfile == themapfile && absentc == theabsentc) return true;
|
---|
[1076] | 316 |
|
---|
| 317 | unloadmapfile ();
|
---|
[1870] | 318 | mapfile = themapfile;
|
---|
[1076] | 319 | absentc = theabsentc;
|
---|
| 320 |
|
---|
| 321 | // open the map file
|
---|
[1870] | 322 | char *cfilename = mapfile.getcstr();
|
---|
[1076] | 323 | if (cfilename == (char *)NULL) return false;
|
---|
| 324 | mapfilein = fopen(cfilename, "rb");
|
---|
[8727] | 325 | delete []cfilename; cfilename = NULL;
|
---|
[1076] | 326 |
|
---|
| 327 | if (mapfilein == (FILE *)NULL) return false;
|
---|
| 328 |
|
---|
| 329 | unsigned char c, n1, n2;
|
---|
| 330 | unsigned short *arrptr;
|
---|
| 331 | int i;
|
---|
| 332 | c = fgetc (mapfilein);
|
---|
| 333 | while (!feof (mapfilein)) {
|
---|
| 334 | if (mapdata.ptrs[c] == (unsigned short *)NULL) {
|
---|
| 335 | // allocate a new array
|
---|
| 336 | arrptr = new unsigned short[256];
|
---|
| 337 | mapdata.ptrs[c] = arrptr;
|
---|
| 338 | } else arrptr = mapdata.ptrs[c];
|
---|
| 339 |
|
---|
| 340 | // clear the array
|
---|
[8727] | 341 | for (i=0; i<256; ++i) arrptr[i] = 0;
|
---|
[1076] | 342 |
|
---|
| 343 | // read in this block
|
---|
| 344 | n1 = fgetc (mapfilein);
|
---|
| 345 | n2 = fgetc (mapfilein);
|
---|
| 346 | i=0;
|
---|
| 347 | while (!feof (mapfilein)) {
|
---|
| 348 | arrptr[i] = ((unsigned short)n1 << 8) | (unsigned short)n2;
|
---|
| 349 |
|
---|
[8727] | 350 | ++i;
|
---|
[1076] | 351 | if (i >= 256) break;
|
---|
| 352 | n1 = fgetc (mapfilein);
|
---|
| 353 | n2 = fgetc (mapfilein);
|
---|
| 354 | }
|
---|
| 355 |
|
---|
| 356 | c = fgetc (mapfilein);
|
---|
| 357 | }
|
---|
| 358 |
|
---|
| 359 | mapdata.loaded = true;
|
---|
| 360 |
|
---|
| 361 | return true;
|
---|
| 362 | }
|
---|
| 363 |
|
---|
| 364 | void mapconvert::unloadmapfile () {
|
---|
| 365 | if (!mapdata.loaded) return;
|
---|
| 366 |
|
---|
[8727] | 367 | for (int i=0; i<256; ++i) {
|
---|
[1076] | 368 | if (mapdata.ptrs[i] != (unsigned short *)NULL) {
|
---|
| 369 | delete [] mapdata.ptrs[i];
|
---|
| 370 | mapdata.ptrs[i] = (unsigned short *)NULL;
|
---|
| 371 | }
|
---|
| 372 | }
|
---|
| 373 |
|
---|
| 374 | mapdata.loaded = false;
|
---|
| 375 | }
|
---|
| 376 |
|
---|
| 377 |
|
---|
| 378 | unsigned short mapconvert::convert (unsigned short c) {
|
---|
| 379 | if (!mapdata.loaded) {
|
---|
[1870] | 380 | if (!mapfile.empty() && loadmapfile (mapfile, absentc)) {
|
---|
[1076] | 381 | // do nothing, successfully loaded database
|
---|
| 382 | } else return absentc;
|
---|
| 383 | }
|
---|
| 384 |
|
---|
| 385 | if (c == 0) return 0; // 0 always maps to 0...
|
---|
| 386 |
|
---|
| 387 | unsigned short n1 = c >> 8;
|
---|
| 388 | unsigned short n2 = c & 0xff;
|
---|
| 389 |
|
---|
| 390 | unsigned short *arrptr = mapdata.ptrs[n1];
|
---|
| 391 | if (arrptr == (unsigned short *)NULL) return absentc;
|
---|
| 392 |
|
---|
| 393 | if (arrptr[n2] == 0) return absentc;
|
---|
| 394 | return arrptr[n2];
|
---|
| 395 | }
|
---|
| 396 |
|
---|
| 397 | text_t mapconvert::convert (const text_t &instr) {
|
---|
| 398 | if (!mapdata.loaded) return absentc;
|
---|
| 399 |
|
---|
| 400 | text_t outstr;
|
---|
| 401 | text_t::const_iterator here = instr.begin();
|
---|
| 402 | text_t::const_iterator end = instr.end();
|
---|
| 403 |
|
---|
| 404 | while (here != end) {
|
---|
| 405 | outstr.push_back(this->convert(*here));
|
---|
[8727] | 406 | ++here;
|
---|
[1076] | 407 | }
|
---|
| 408 |
|
---|
| 409 | return outstr;
|
---|
| 410 | }
|
---|
| 411 |
|
---|
| 412 |
|
---|
| 413 |
|
---|
| 414 |
|
---|
| 415 | mapinconvertclass::mapinconvertclass () {
|
---|
[8666] | 416 | m_multibyte = 0;
|
---|
[1076] | 417 | mapbuflen = 0;
|
---|
| 418 | }
|
---|
| 419 |
|
---|
| 420 | void mapinconvertclass::reset () {
|
---|
| 421 | start = NULL;
|
---|
| 422 | len = 0;
|
---|
| 423 | mapbuflen=0;
|
---|
| 424 | }
|
---|
| 425 |
|
---|
| 426 | void mapinconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 427 | output.clear();
|
---|
| 428 |
|
---|
| 429 | if (start == NULL || len == 0) {
|
---|
| 430 | if (mapbuflen == 0) status = finished;
|
---|
| 431 | else status = stopped;
|
---|
| 432 | return;
|
---|
| 433 | }
|
---|
| 434 |
|
---|
| 435 | // don't want any funny sign conversions happening
|
---|
| 436 | unsigned char *here = (unsigned char *)start;
|
---|
| 437 |
|
---|
| 438 | size_t charlen = getmapcharlen ();
|
---|
| 439 | while (len > 0) {
|
---|
| 440 | if (charlen == 0) {
|
---|
| 441 | // start parsing a new character
|
---|
| 442 | mapbuflen = 0;
|
---|
| 443 | mapbuf[mapbuflen++] = *here;
|
---|
| 444 | ++here;
|
---|
| 445 | --len;
|
---|
| 446 | charlen = getmapcharlen ();
|
---|
| 447 |
|
---|
| 448 | } else if (mapbuflen < charlen) {
|
---|
| 449 | // assumes charlen is always less than MAXMAPCHARLEN
|
---|
| 450 | mapbuf[mapbuflen++] = *here;
|
---|
| 451 | ++here;
|
---|
| 452 | --len;
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 | if (mapbuflen == charlen) {
|
---|
| 456 | // got a complete character
|
---|
| 457 | if (charlen == 1) {
|
---|
[1927] | 458 | if (mapbuf[0] < 0x80) {
|
---|
| 459 | // ascii character
|
---|
| 460 | output.push_back (mapbuf[0]);
|
---|
| 461 | } else {
|
---|
| 462 | output.push_back (converter.convert((unsigned short)mapbuf[0]));
|
---|
| 463 | }
|
---|
[1076] | 464 |
|
---|
| 465 | } else {
|
---|
| 466 | // two byte character
|
---|
| 467 | output.push_back (converter.convert(((unsigned short)mapbuf[0] << 8) |
|
---|
| 468 | (unsigned short)mapbuf[1]));
|
---|
| 469 | }
|
---|
| 470 |
|
---|
| 471 | mapbuflen = 0;
|
---|
| 472 | charlen = 0;
|
---|
| 473 | }
|
---|
| 474 | }
|
---|
| 475 |
|
---|
| 476 | start = (char *)here; // save current position
|
---|
| 477 |
|
---|
| 478 | if (mapbuflen == 0) status = finished;
|
---|
| 479 | else status = stopped;
|
---|
| 480 | }
|
---|
| 481 |
|
---|
| 482 |
|
---|
| 483 |
|
---|
| 484 | mapoutconvertclass::mapoutconvertclass () {
|
---|
[8666] | 485 | m_multibyte = 0;
|
---|
[1076] | 486 | mapbuflen=0;
|
---|
| 487 | mapbufhere=0;
|
---|
| 488 | }
|
---|
| 489 |
|
---|
| 490 | void mapoutconvertclass::reset () {
|
---|
| 491 | input = NULL;
|
---|
| 492 | outs = NULL;
|
---|
| 493 | mapbuflen = 0;
|
---|
| 494 | mapbufhere = 0;
|
---|
| 495 | }
|
---|
| 496 |
|
---|
| 497 | // note that convert does not null-terminate the
|
---|
| 498 | // output array of characters
|
---|
| 499 | void mapoutconvertclass::convert (char *output, size_t maxlen,
|
---|
| 500 | size_t &len, status_t &status) {
|
---|
| 501 | unsigned short outc;
|
---|
| 502 |
|
---|
| 503 | if (input == NULL || output == NULL) {
|
---|
| 504 | if (mapbuflen == 0) status = finished;
|
---|
| 505 | else status = unfinished;
|
---|
| 506 | return;
|
---|
| 507 | }
|
---|
| 508 |
|
---|
| 509 | // don't want any funny sign conversions happening
|
---|
| 510 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 511 | text_t::iterator textend = input->end();
|
---|
| 512 | len = 0;
|
---|
| 513 | while (len < maxlen) {
|
---|
| 514 | // empty the contents of the internal buffer
|
---|
| 515 | if (mapbuflen > 0) {
|
---|
| 516 | while (len < maxlen && mapbufhere < mapbuflen) {
|
---|
| 517 | *uoutput = mapbuf[mapbufhere];
|
---|
[8727] | 518 | ++uoutput;
|
---|
| 519 | ++len;
|
---|
| 520 | ++mapbufhere;
|
---|
[1076] | 521 | }
|
---|
| 522 |
|
---|
| 523 | if (mapbufhere == mapbuflen) {
|
---|
| 524 | mapbufhere = 0;
|
---|
| 525 | mapbuflen = 0;
|
---|
| 526 | }
|
---|
| 527 | }
|
---|
| 528 |
|
---|
| 529 | // fill up the buffer with the next character
|
---|
| 530 | if (mapbuflen == 0) {
|
---|
| 531 | if (texthere == textend) break; // finished!
|
---|
| 532 | if (!rzws || (*texthere != 0x200b)) {
|
---|
| 533 | if (*texthere < 0x80) {
|
---|
| 534 | mapbuf[0] = (unsigned char)*texthere;
|
---|
| 535 | mapbuflen = 1;
|
---|
| 536 | } else {
|
---|
| 537 | outc = converter.convert (*texthere);
|
---|
[8666] | 538 | if (m_multibyte) {
|
---|
[1927] | 539 | mapbuf[0] = (unsigned char)(outc >> 8);
|
---|
| 540 | mapbuf[1] = (unsigned char)(outc & 0xff);
|
---|
| 541 | mapbuflen = 2;
|
---|
| 542 | } else {
|
---|
| 543 | mapbuf[0] = outc;
|
---|
| 544 | mapbuflen = 1;
|
---|
| 545 | }
|
---|
[1076] | 546 | }
|
---|
| 547 | }
|
---|
| 548 |
|
---|
[8727] | 549 | ++texthere;
|
---|
[1076] | 550 | mapbufhere = 0;
|
---|
| 551 | }
|
---|
| 552 | }
|
---|
| 553 |
|
---|
| 554 | if (texthere == textend && mapbuflen == 0) status = finished;
|
---|
| 555 | else status = unfinished;
|
---|
| 556 | }
|
---|
[1233] | 557 |
|
---|
| 558 |
|
---|
| 559 | bool simplemapconvert::loadmapfile (bool in) {
|
---|
| 560 | if (loaded) return true;
|
---|
| 561 | if (mapfile.empty()) return false;
|
---|
| 562 |
|
---|
| 563 | char *cfilename = mapfile.getcstr();
|
---|
| 564 | #ifdef GSDL_USE_IOS_H
|
---|
| 565 | ifstream mapfilein (cfilename, ios::in | ios::nocreate);
|
---|
| 566 | #else
|
---|
| 567 | ifstream mapfilein (cfilename, ios::in);
|
---|
| 568 | #endif
|
---|
[8727] | 569 | delete []cfilename;
|
---|
[1233] | 570 | if (!mapfilein) return false;
|
---|
| 571 |
|
---|
| 572 | char cline[2048];
|
---|
| 573 | text_t line;
|
---|
| 574 |
|
---|
| 575 | while (!mapfilein.eof()) {
|
---|
| 576 | mapfilein.getline (cline, 2048);
|
---|
| 577 | line.clear();
|
---|
| 578 | line.appendcstr (cline);
|
---|
| 579 | if (line.empty()) continue;
|
---|
| 580 | // remove comments
|
---|
| 581 | text_t::iterator end = line.end();
|
---|
| 582 | text_t::iterator here = findchar (line.begin(), end, '#');
|
---|
| 583 | if (here != end) {
|
---|
| 584 | line.erase (here, end);
|
---|
| 585 | if (line.empty()) continue;
|
---|
| 586 | }
|
---|
| 587 |
|
---|
| 588 | text_tarray parts;
|
---|
| 589 | splitchar (line.begin(), line.end(), '\t', parts);
|
---|
| 590 |
|
---|
| 591 | // do some simple sanity checks
|
---|
| 592 | if (parts.size() < 2) continue;
|
---|
| 593 | text_t::iterator begin1 = parts[0].begin();
|
---|
| 594 | text_t::iterator begin2 = parts[1].begin();
|
---|
| 595 | if (*begin1 != '0' || *(begin1+1) != 'x') continue;
|
---|
| 596 | if (*begin2 != '0' || *(begin2+1) != 'x') continue;
|
---|
| 597 | char *from = parts[0].getcstr();
|
---|
| 598 | char *to = parts[1].getcstr();
|
---|
[1236] | 599 | unsigned int f = 0, t = 0;
|
---|
[1233] | 600 | sscanf (from, "%i", &f);
|
---|
| 601 | sscanf (to, "%i", &t);
|
---|
[8727] | 602 | delete []from;
|
---|
| 603 | delete []to;
|
---|
[1233] | 604 |
|
---|
[1236] | 605 | if (in) mapping[(unsigned short)f] = (unsigned short)t;
|
---|
| 606 | else mapping[(unsigned short)t] = (unsigned short)f;
|
---|
[1233] | 607 | }
|
---|
| 608 |
|
---|
| 609 | loaded = true;
|
---|
| 610 | return true;
|
---|
| 611 | }
|
---|
| 612 |
|
---|
| 613 | unsigned short simplemapconvert::convert (unsigned short c, bool in) {
|
---|
| 614 |
|
---|
| 615 | if (!loaded)
|
---|
| 616 | if (!loadmapfile(in)) return absentc;
|
---|
| 617 |
|
---|
| 618 | return mapping[c];
|
---|
| 619 | }
|
---|
| 620 |
|
---|
| 621 |
|
---|
| 622 | void simplemapinconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 623 | output.clear();
|
---|
| 624 |
|
---|
| 625 | if (start == NULL || len == 0) {
|
---|
| 626 | status = finished;
|
---|
| 627 | return;
|
---|
| 628 | }
|
---|
| 629 |
|
---|
| 630 | // don't want any funny sign conversions happening
|
---|
| 631 | unsigned char *here = (unsigned char *)start;
|
---|
| 632 | while (len > 0) {
|
---|
| 633 |
|
---|
| 634 | if (*here < 0x80)
|
---|
| 635 | output.push_back (*here); // append this character
|
---|
| 636 | else
|
---|
| 637 | output.push_back (converter.convert(*here, true));
|
---|
| 638 |
|
---|
| 639 | ++here;
|
---|
| 640 | --len;
|
---|
| 641 | }
|
---|
| 642 |
|
---|
| 643 | start = (char *)here; // save current position
|
---|
| 644 | status = finished;
|
---|
| 645 | }
|
---|
| 646 |
|
---|
| 647 |
|
---|
| 648 | void simplemapoutconvertclass::convert (char *output, size_t maxlen,
|
---|
| 649 | size_t &len, status_t &status) {
|
---|
| 650 |
|
---|
| 651 | if (input == NULL || output == NULL) {
|
---|
| 652 | status = finished;
|
---|
| 653 | return;
|
---|
| 654 | }
|
---|
| 655 |
|
---|
| 656 | // don't want any funny sign conversions happening
|
---|
| 657 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 658 | text_t::iterator textend = input->end();
|
---|
| 659 | len = 0;
|
---|
| 660 | while ((len < maxlen) && (texthere != textend)) {
|
---|
| 661 |
|
---|
| 662 | if (*texthere < 0x80) *uoutput = (unsigned char)(*texthere);
|
---|
| 663 | else *uoutput = converter.convert (*texthere, false);
|
---|
| 664 |
|
---|
| 665 | ++uoutput;
|
---|
| 666 | ++len;
|
---|
| 667 | ++texthere;
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | if (texthere == textend) status = finished;
|
---|
| 671 | else status = unfinished;
|
---|
| 672 | }
|
---|