[1076] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * gsdlunicode.cpp --
|
---|
| 4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 5 | *
|
---|
| 6 | * A component of the Greenstone digital library software
|
---|
| 7 | * from the New Zealand Digital Library Project at the
|
---|
| 8 | * University of Waikato, New Zealand.
|
---|
| 9 | *
|
---|
| 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
| 24 | *********************************************************************/
|
---|
| 25 |
|
---|
| 26 | #include "gsdlunicode.h"
|
---|
| 27 |
|
---|
| 28 |
|
---|
| 29 | // unitool is currently in mg, if mg is not being used it should
|
---|
| 30 | // be moved into GSDLHOME/lib
|
---|
| 31 | #include "unitool.h"
|
---|
| 32 |
|
---|
| 33 | #include "fileutil.h"
|
---|
| 34 |
|
---|
| 35 | #include <stdio.h>
|
---|
| 36 |
|
---|
[1233] | 37 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
| 38 | # include <ospace\std\iostream>
|
---|
| 39 | # include <ospace\std\fstream>
|
---|
| 40 | #elif defined(GSDL_USE_IOS_H)
|
---|
| 41 | # include <iostream.h>
|
---|
| 42 | # include <fstream.h>
|
---|
| 43 | #else
|
---|
| 44 | # include <iostream>
|
---|
| 45 | # include <fstream>
|
---|
| 46 | #endif
|
---|
[1076] | 47 |
|
---|
| 48 |
|
---|
| 49 | // converts a unicode encode text_t string to a utf-8
|
---|
| 50 | // encoded text_t string
|
---|
| 51 | text_t to_utf8 (text_t::const_iterator here, text_t::const_iterator end) {
|
---|
| 52 | text_t out;
|
---|
| 53 |
|
---|
| 54 | unsigned char thischar[MAXUTF8CHARLEN];
|
---|
| 55 | int i, charlen;
|
---|
| 56 |
|
---|
| 57 | while (here != end) {
|
---|
| 58 | charlen = output_utf8_char (*here, thischar, &thischar[MAXUTF8CHARLEN-1]);
|
---|
| 59 | for (i=0; i<charlen; i++) out.push_back(thischar[i]);
|
---|
| 60 | here++;
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | return out;
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | // converts a utf-8 encoded text_t string to a unicode
|
---|
| 67 | // encoded text_t string
|
---|
| 68 | text_t to_uni (const text_t &in) {
|
---|
| 69 | text_t out;
|
---|
| 70 | unsigned char *in_cstr = (unsigned char *)in.getcstr();
|
---|
| 71 | unsigned char *here = in_cstr;
|
---|
| 72 | unsigned char *end = in_cstr;
|
---|
| 73 |
|
---|
| 74 | unsigned short unichar;
|
---|
| 75 | int charlen = 0;
|
---|
| 76 |
|
---|
| 77 | // get the last valid character in the string
|
---|
| 78 | while (*end != '\0') end++;
|
---|
| 79 | end--;
|
---|
| 80 |
|
---|
| 81 | while ((charlen = parse_utf8_char (here, end, &unichar)) > 0) {
|
---|
| 82 | out.push_back(unichar);
|
---|
| 83 | here += charlen;
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | delete in_cstr;
|
---|
| 87 |
|
---|
| 88 | return out;
|
---|
| 89 | }
|
---|
| 90 |
|
---|
| 91 |
|
---|
| 92 |
|
---|
| 93 | utf8inconvertclass::utf8inconvertclass () {
|
---|
| 94 | utf8buflen = 0;
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | void utf8inconvertclass::reset () {
|
---|
| 98 | start = NULL;
|
---|
| 99 | len = 0;
|
---|
| 100 | utf8buflen=0;
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | void utf8inconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 104 | output.clear();
|
---|
| 105 | output.reserve (len/3);
|
---|
| 106 |
|
---|
| 107 | if (start == NULL || len == 0) {
|
---|
| 108 | if (utf8buflen == 0) status = finished;
|
---|
| 109 | else status = stopped;
|
---|
| 110 | return;
|
---|
| 111 | }
|
---|
| 112 |
|
---|
| 113 | // don't want any funny sign conversions happening
|
---|
| 114 | unsigned char *here = (unsigned char *)start;
|
---|
| 115 | unsigned char *end = here+len-1;
|
---|
| 116 | unsigned short c;
|
---|
| 117 | size_t realcharlen;
|
---|
| 118 |
|
---|
| 119 | size_t charlen = getutf8charlen ();
|
---|
| 120 | while (len > 0) {
|
---|
| 121 | if (charlen == 0) {
|
---|
| 122 | // start parsing a new character
|
---|
| 123 | utf8buflen = 0;
|
---|
| 124 |
|
---|
| 125 | // fast common case
|
---|
| 126 | while (len > 3) {
|
---|
| 127 | realcharlen = parse_utf8_char (here, end, &c);
|
---|
| 128 | output.push_back (c);
|
---|
| 129 | here += realcharlen;
|
---|
| 130 | len -= realcharlen;
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | utf8buf[utf8buflen++] = *here;
|
---|
| 134 | ++here;
|
---|
| 135 | --len;
|
---|
| 136 | charlen = getutf8charlen ();
|
---|
| 137 |
|
---|
| 138 | } else if (utf8buflen < charlen) {
|
---|
| 139 | // assumes charlen is always less than MAXUTF8CHARLEN
|
---|
| 140 | utf8buf[utf8buflen++] = *here;
|
---|
| 141 | ++here;
|
---|
| 142 | --len;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | if (utf8buflen == charlen) {
|
---|
| 146 | // got a complete character
|
---|
| 147 | realcharlen = parse_utf8_char (utf8buf, &utf8buf[utf8buflen-1], &c);
|
---|
| 148 | output.push_back (c);
|
---|
| 149 |
|
---|
| 150 | // move any unparsed characters. If an error occurred some of
|
---|
| 151 | // the characters might be unused.
|
---|
| 152 | int i;
|
---|
| 153 | int diff = utf8buflen - realcharlen;
|
---|
| 154 | for (i=0; i < diff; i++) utf8buf[i] = utf8buf[i+diff];
|
---|
| 155 | utf8buflen = diff;
|
---|
| 156 | charlen = getutf8charlen ();
|
---|
| 157 | }
|
---|
| 158 | }
|
---|
| 159 |
|
---|
| 160 | start = (char *)here; // save current position
|
---|
| 161 |
|
---|
| 162 | if (utf8buflen == 0) status = finished;
|
---|
| 163 | else status = stopped;
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 |
|
---|
| 167 | // returns the length that the current contents of the
|
---|
| 168 | // utf8buf should be
|
---|
| 169 | size_t utf8inconvertclass::getutf8charlen () {
|
---|
| 170 | if (utf8buflen == 0) return 0;
|
---|
| 171 |
|
---|
| 172 | // one byte character
|
---|
| 173 | if (utf8buf[0] < 0x80) return 1;
|
---|
| 174 |
|
---|
| 175 | // error, is not the start of a utf-8 character
|
---|
| 176 | if (utf8buf[0] < 0xc0) return 1;
|
---|
| 177 |
|
---|
| 178 | // two bute character
|
---|
| 179 | if (utf8buf[0] < 0xe0) return 2;
|
---|
| 180 |
|
---|
| 181 | // three byte character
|
---|
| 182 | if (utf8buf[0] < 0xf0) return 3;
|
---|
| 183 |
|
---|
| 184 | // error, character too long for unicode
|
---|
| 185 | return 1;
|
---|
| 186 | }
|
---|
| 187 |
|
---|
| 188 |
|
---|
| 189 | void utf8outconvertclass::reset () {
|
---|
| 190 | input = NULL;
|
---|
| 191 | outs = NULL;
|
---|
| 192 | utf8buflen = 0;
|
---|
| 193 | utf8bufhere = 0;
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | // note that convert does not null-terminate the
|
---|
| 197 | // output array of characters
|
---|
| 198 | void utf8outconvertclass::convert (char *output, size_t maxlen,
|
---|
| 199 | size_t &len, status_t &status) {
|
---|
| 200 | if (input == NULL || output == NULL) {
|
---|
| 201 | if (utf8buflen == 0) status = finished;
|
---|
| 202 | else status = unfinished;
|
---|
| 203 | return;
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | // don't want any funny sign conversions happening
|
---|
| 207 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 208 | text_t::iterator textend = input->end();
|
---|
| 209 | len = 0;
|
---|
| 210 | while (len < maxlen) {
|
---|
| 211 | // empty the contents of the internal buffer
|
---|
| 212 | if (utf8buflen > 0) {
|
---|
| 213 | while (len < maxlen && utf8bufhere < utf8buflen) {
|
---|
| 214 | *uoutput = utf8buf[utf8bufhere];
|
---|
| 215 | uoutput++;
|
---|
| 216 | len++;
|
---|
| 217 | utf8bufhere++;
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 | if (utf8bufhere == utf8buflen) {
|
---|
| 221 | utf8bufhere = 0;
|
---|
| 222 | utf8buflen = 0;
|
---|
| 223 | }
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 | // fill up the buffer with the next character
|
---|
| 227 | if (utf8buflen == 0) {
|
---|
| 228 | if (texthere == textend) break; // finished!
|
---|
| 229 | if (!rzws || (*texthere != 0x200b))
|
---|
| 230 | utf8buflen = output_utf8_char (*texthere, utf8buf,
|
---|
| 231 | &utf8buf[MAXUTF8CHARLEN-1]);
|
---|
| 232 | texthere++;
|
---|
| 233 | utf8bufhere = 0;
|
---|
| 234 | }
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | if (texthere == textend && utf8buflen == 0) status = finished;
|
---|
| 238 | else status = unfinished;
|
---|
| 239 | }
|
---|
| 240 |
|
---|
| 241 |
|
---|
| 242 |
|
---|
| 243 |
|
---|
| 244 |
|
---|
| 245 |
|
---|
| 246 | mapdata_t::mapdata_t () {
|
---|
| 247 | int i;
|
---|
| 248 |
|
---|
| 249 | // reset all the map ptrs to be NULL
|
---|
| 250 | for (i=0; i<256; i++) {
|
---|
| 251 | ptrs[i] = (unsigned short *)NULL;
|
---|
| 252 | }
|
---|
| 253 |
|
---|
| 254 | // say nothing has been loaded
|
---|
| 255 | loaded = false;
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 |
|
---|
| 259 | mapconvert::mapconvert () {
|
---|
| 260 | absentc = 0;
|
---|
| 261 | }
|
---|
| 262 |
|
---|
| 263 | // setmapfile will cause loadmapfile to be called when conversion is
|
---|
| 264 | // needed
|
---|
[1870] | 265 | bool mapconvert::setmapfile (const text_t &themapfile, unsigned short theabsentc) {
|
---|
[1076] | 266 | // check to see if the mapfile has been already loaded
|
---|
[1870] | 267 | if (mapdata.loaded && mapfile == themapfile && absentc == theabsentc) return true;
|
---|
[1076] | 268 |
|
---|
| 269 | unloadmapfile ();
|
---|
[1870] | 270 | mapfile = themapfile;
|
---|
[1076] | 271 | absentc = theabsentc;
|
---|
| 272 |
|
---|
| 273 | return true;
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 |
|
---|
| 277 |
|
---|
| 278 | // loadmapfile should be called before any conversion is done
|
---|
[1870] | 279 | bool mapconvert::loadmapfile (const text_t &themapfile,
|
---|
[1076] | 280 | unsigned short theabsentc) {
|
---|
| 281 | FILE *mapfilein = (FILE *)NULL;
|
---|
| 282 |
|
---|
| 283 | // check to see if the mapfile has been already loaded
|
---|
[1870] | 284 | if (mapdata.loaded && mapfile == themapfile && absentc == theabsentc) return true;
|
---|
[1076] | 285 |
|
---|
| 286 | unloadmapfile ();
|
---|
[1870] | 287 | mapfile = themapfile;
|
---|
[1076] | 288 | absentc = theabsentc;
|
---|
| 289 |
|
---|
| 290 | // open the map file
|
---|
[1870] | 291 | char *cfilename = mapfile.getcstr();
|
---|
[1076] | 292 | if (cfilename == (char *)NULL) return false;
|
---|
| 293 | mapfilein = fopen(cfilename, "rb");
|
---|
| 294 | delete cfilename;
|
---|
| 295 |
|
---|
| 296 | if (mapfilein == (FILE *)NULL) return false;
|
---|
| 297 |
|
---|
| 298 | unsigned char c, n1, n2;
|
---|
| 299 | unsigned short *arrptr;
|
---|
| 300 | int i;
|
---|
| 301 | c = fgetc (mapfilein);
|
---|
| 302 | while (!feof (mapfilein)) {
|
---|
| 303 | if (mapdata.ptrs[c] == (unsigned short *)NULL) {
|
---|
| 304 | // allocate a new array
|
---|
| 305 | arrptr = new unsigned short[256];
|
---|
| 306 | mapdata.ptrs[c] = arrptr;
|
---|
| 307 | } else arrptr = mapdata.ptrs[c];
|
---|
| 308 |
|
---|
| 309 | // clear the array
|
---|
| 310 | for (i=0; i<256; i++) arrptr[i] = 0;
|
---|
| 311 |
|
---|
| 312 | // read in this block
|
---|
| 313 | n1 = fgetc (mapfilein);
|
---|
| 314 | n2 = fgetc (mapfilein);
|
---|
| 315 | i=0;
|
---|
| 316 | while (!feof (mapfilein)) {
|
---|
| 317 | arrptr[i] = ((unsigned short)n1 << 8) | (unsigned short)n2;
|
---|
| 318 |
|
---|
| 319 | i++;
|
---|
| 320 | if (i >= 256) break;
|
---|
| 321 | n1 = fgetc (mapfilein);
|
---|
| 322 | n2 = fgetc (mapfilein);
|
---|
| 323 | }
|
---|
| 324 |
|
---|
| 325 | c = fgetc (mapfilein);
|
---|
| 326 | }
|
---|
| 327 |
|
---|
| 328 | mapdata.loaded = true;
|
---|
| 329 |
|
---|
| 330 | return true;
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | void mapconvert::unloadmapfile () {
|
---|
| 334 | if (!mapdata.loaded) return;
|
---|
| 335 |
|
---|
| 336 | int i;
|
---|
| 337 | for (i=0; i<256; i++) {
|
---|
| 338 | if (mapdata.ptrs[i] != (unsigned short *)NULL) {
|
---|
| 339 | delete [] mapdata.ptrs[i];
|
---|
| 340 | mapdata.ptrs[i] = (unsigned short *)NULL;
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 |
|
---|
| 344 | mapdata.loaded = false;
|
---|
| 345 | }
|
---|
| 346 |
|
---|
| 347 |
|
---|
| 348 | unsigned short mapconvert::convert (unsigned short c) {
|
---|
| 349 | if (!mapdata.loaded) {
|
---|
[1870] | 350 | if (!mapfile.empty() && loadmapfile (mapfile, absentc)) {
|
---|
[1076] | 351 | // do nothing, successfully loaded database
|
---|
| 352 | } else return absentc;
|
---|
| 353 | }
|
---|
| 354 |
|
---|
| 355 | if (c == 0) return 0; // 0 always maps to 0...
|
---|
| 356 |
|
---|
| 357 | unsigned short n1 = c >> 8;
|
---|
| 358 | unsigned short n2 = c & 0xff;
|
---|
| 359 |
|
---|
| 360 | unsigned short *arrptr = mapdata.ptrs[n1];
|
---|
| 361 | if (arrptr == (unsigned short *)NULL) return absentc;
|
---|
| 362 |
|
---|
| 363 | if (arrptr[n2] == 0) return absentc;
|
---|
| 364 | return arrptr[n2];
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | text_t mapconvert::convert (const text_t &instr) {
|
---|
| 368 | if (!mapdata.loaded) return absentc;
|
---|
| 369 |
|
---|
| 370 | text_t outstr;
|
---|
| 371 | text_t::const_iterator here = instr.begin();
|
---|
| 372 | text_t::const_iterator end = instr.end();
|
---|
| 373 |
|
---|
| 374 | while (here != end) {
|
---|
| 375 | outstr.push_back(this->convert(*here));
|
---|
| 376 | here++;
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 | return outstr;
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 |
|
---|
| 383 |
|
---|
| 384 |
|
---|
| 385 | mapinconvertclass::mapinconvertclass () {
|
---|
[1927] | 386 | multibyte = 0;
|
---|
[1076] | 387 | mapbuflen = 0;
|
---|
| 388 | }
|
---|
| 389 |
|
---|
| 390 | void mapinconvertclass::reset () {
|
---|
| 391 | start = NULL;
|
---|
| 392 | len = 0;
|
---|
| 393 | mapbuflen=0;
|
---|
| 394 | }
|
---|
| 395 |
|
---|
| 396 | void mapinconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 397 | output.clear();
|
---|
| 398 |
|
---|
| 399 | if (start == NULL || len == 0) {
|
---|
| 400 | if (mapbuflen == 0) status = finished;
|
---|
| 401 | else status = stopped;
|
---|
| 402 | return;
|
---|
| 403 | }
|
---|
| 404 |
|
---|
| 405 | // don't want any funny sign conversions happening
|
---|
| 406 | unsigned char *here = (unsigned char *)start;
|
---|
| 407 |
|
---|
| 408 | size_t charlen = getmapcharlen ();
|
---|
| 409 | while (len > 0) {
|
---|
| 410 | if (charlen == 0) {
|
---|
| 411 | // start parsing a new character
|
---|
| 412 | mapbuflen = 0;
|
---|
| 413 | mapbuf[mapbuflen++] = *here;
|
---|
| 414 | ++here;
|
---|
| 415 | --len;
|
---|
| 416 | charlen = getmapcharlen ();
|
---|
| 417 |
|
---|
| 418 | } else if (mapbuflen < charlen) {
|
---|
| 419 | // assumes charlen is always less than MAXMAPCHARLEN
|
---|
| 420 | mapbuf[mapbuflen++] = *here;
|
---|
| 421 | ++here;
|
---|
| 422 | --len;
|
---|
| 423 | }
|
---|
| 424 |
|
---|
| 425 | if (mapbuflen == charlen) {
|
---|
| 426 | // got a complete character
|
---|
| 427 | if (charlen == 1) {
|
---|
[1927] | 428 | if (mapbuf[0] < 0x80) {
|
---|
| 429 | // ascii character
|
---|
| 430 | output.push_back (mapbuf[0]);
|
---|
| 431 | } else {
|
---|
| 432 | output.push_back (converter.convert((unsigned short)mapbuf[0]));
|
---|
| 433 | }
|
---|
[1076] | 434 |
|
---|
| 435 | } else {
|
---|
| 436 | // two byte character
|
---|
| 437 | output.push_back (converter.convert(((unsigned short)mapbuf[0] << 8) |
|
---|
| 438 | (unsigned short)mapbuf[1]));
|
---|
| 439 | }
|
---|
| 440 |
|
---|
| 441 | mapbuflen = 0;
|
---|
| 442 | charlen = 0;
|
---|
| 443 | }
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 | start = (char *)here; // save current position
|
---|
| 447 |
|
---|
| 448 | if (mapbuflen == 0) status = finished;
|
---|
| 449 | else status = stopped;
|
---|
| 450 | }
|
---|
| 451 |
|
---|
| 452 |
|
---|
| 453 |
|
---|
| 454 | mapoutconvertclass::mapoutconvertclass () {
|
---|
[1927] | 455 | multibyte = 0;
|
---|
[1076] | 456 | mapbuflen=0;
|
---|
| 457 | mapbufhere=0;
|
---|
| 458 | }
|
---|
| 459 |
|
---|
| 460 | void mapoutconvertclass::reset () {
|
---|
| 461 | input = NULL;
|
---|
| 462 | outs = NULL;
|
---|
| 463 | mapbuflen = 0;
|
---|
| 464 | mapbufhere = 0;
|
---|
| 465 | }
|
---|
| 466 |
|
---|
| 467 | // note that convert does not null-terminate the
|
---|
| 468 | // output array of characters
|
---|
| 469 | void mapoutconvertclass::convert (char *output, size_t maxlen,
|
---|
| 470 | size_t &len, status_t &status) {
|
---|
| 471 | unsigned short outc;
|
---|
| 472 |
|
---|
| 473 | if (input == NULL || output == NULL) {
|
---|
| 474 | if (mapbuflen == 0) status = finished;
|
---|
| 475 | else status = unfinished;
|
---|
| 476 | return;
|
---|
| 477 | }
|
---|
| 478 |
|
---|
| 479 | // don't want any funny sign conversions happening
|
---|
| 480 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 481 | text_t::iterator textend = input->end();
|
---|
| 482 | len = 0;
|
---|
| 483 | while (len < maxlen) {
|
---|
| 484 | // empty the contents of the internal buffer
|
---|
| 485 | if (mapbuflen > 0) {
|
---|
| 486 | while (len < maxlen && mapbufhere < mapbuflen) {
|
---|
| 487 | *uoutput = mapbuf[mapbufhere];
|
---|
| 488 | uoutput++;
|
---|
| 489 | len++;
|
---|
| 490 | mapbufhere++;
|
---|
| 491 | }
|
---|
| 492 |
|
---|
| 493 | if (mapbufhere == mapbuflen) {
|
---|
| 494 | mapbufhere = 0;
|
---|
| 495 | mapbuflen = 0;
|
---|
| 496 | }
|
---|
| 497 | }
|
---|
| 498 |
|
---|
| 499 | // fill up the buffer with the next character
|
---|
| 500 | if (mapbuflen == 0) {
|
---|
| 501 | if (texthere == textend) break; // finished!
|
---|
| 502 | if (!rzws || (*texthere != 0x200b)) {
|
---|
| 503 | if (*texthere < 0x80) {
|
---|
| 504 | mapbuf[0] = (unsigned char)*texthere;
|
---|
| 505 | mapbuflen = 1;
|
---|
| 506 | } else {
|
---|
| 507 | outc = converter.convert (*texthere);
|
---|
[1927] | 508 | if (multibyte) {
|
---|
| 509 | mapbuf[0] = (unsigned char)(outc >> 8);
|
---|
| 510 | mapbuf[1] = (unsigned char)(outc & 0xff);
|
---|
| 511 | mapbuflen = 2;
|
---|
| 512 | } else {
|
---|
| 513 | mapbuf[0] = outc;
|
---|
| 514 | mapbuflen = 1;
|
---|
| 515 | }
|
---|
[1076] | 516 | }
|
---|
| 517 | }
|
---|
| 518 |
|
---|
| 519 | texthere++;
|
---|
| 520 | mapbufhere = 0;
|
---|
| 521 | }
|
---|
| 522 | }
|
---|
| 523 |
|
---|
| 524 | if (texthere == textend && mapbuflen == 0) status = finished;
|
---|
| 525 | else status = unfinished;
|
---|
| 526 | }
|
---|
[1233] | 527 |
|
---|
| 528 |
|
---|
| 529 | bool simplemapconvert::loadmapfile (bool in) {
|
---|
| 530 | if (loaded) return true;
|
---|
| 531 | if (mapfile.empty()) return false;
|
---|
| 532 |
|
---|
| 533 | char *cfilename = mapfile.getcstr();
|
---|
| 534 | #ifdef GSDL_USE_IOS_H
|
---|
| 535 | ifstream mapfilein (cfilename, ios::in | ios::nocreate);
|
---|
| 536 | #else
|
---|
| 537 | ifstream mapfilein (cfilename, ios::in);
|
---|
| 538 | #endif
|
---|
| 539 | delete cfilename;
|
---|
| 540 | if (!mapfilein) return false;
|
---|
| 541 |
|
---|
| 542 | char cline[2048];
|
---|
| 543 | text_t line;
|
---|
| 544 |
|
---|
| 545 | while (!mapfilein.eof()) {
|
---|
| 546 | mapfilein.getline (cline, 2048);
|
---|
| 547 | line.clear();
|
---|
| 548 | line.appendcstr (cline);
|
---|
| 549 | if (line.empty()) continue;
|
---|
| 550 | // remove comments
|
---|
| 551 | text_t::iterator end = line.end();
|
---|
| 552 | text_t::iterator here = findchar (line.begin(), end, '#');
|
---|
| 553 | if (here != end) {
|
---|
| 554 | line.erase (here, end);
|
---|
| 555 | if (line.empty()) continue;
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 | text_tarray parts;
|
---|
| 559 | splitchar (line.begin(), line.end(), '\t', parts);
|
---|
| 560 |
|
---|
| 561 | // do some simple sanity checks
|
---|
| 562 | if (parts.size() < 2) continue;
|
---|
| 563 | text_t::iterator begin1 = parts[0].begin();
|
---|
| 564 | text_t::iterator begin2 = parts[1].begin();
|
---|
| 565 | if (*begin1 != '0' || *(begin1+1) != 'x') continue;
|
---|
| 566 | if (*begin2 != '0' || *(begin2+1) != 'x') continue;
|
---|
| 567 | char *from = parts[0].getcstr();
|
---|
| 568 | char *to = parts[1].getcstr();
|
---|
[1236] | 569 | unsigned int f = 0, t = 0;
|
---|
[1233] | 570 | sscanf (from, "%i", &f);
|
---|
| 571 | sscanf (to, "%i", &t);
|
---|
| 572 | delete from;
|
---|
| 573 | delete to;
|
---|
| 574 |
|
---|
[1236] | 575 | if (in) mapping[(unsigned short)f] = (unsigned short)t;
|
---|
| 576 | else mapping[(unsigned short)t] = (unsigned short)f;
|
---|
[1233] | 577 | }
|
---|
| 578 |
|
---|
| 579 | loaded = true;
|
---|
| 580 | return true;
|
---|
| 581 | }
|
---|
| 582 |
|
---|
| 583 | unsigned short simplemapconvert::convert (unsigned short c, bool in) {
|
---|
| 584 |
|
---|
| 585 | if (!loaded)
|
---|
| 586 | if (!loadmapfile(in)) return absentc;
|
---|
| 587 |
|
---|
| 588 | return mapping[c];
|
---|
| 589 | }
|
---|
| 590 |
|
---|
| 591 |
|
---|
| 592 | void simplemapinconvertclass::convert (text_t &output, status_t &status) {
|
---|
| 593 | output.clear();
|
---|
| 594 |
|
---|
| 595 | if (start == NULL || len == 0) {
|
---|
| 596 | status = finished;
|
---|
| 597 | return;
|
---|
| 598 | }
|
---|
| 599 |
|
---|
| 600 | // don't want any funny sign conversions happening
|
---|
| 601 | unsigned char *here = (unsigned char *)start;
|
---|
| 602 | while (len > 0) {
|
---|
| 603 |
|
---|
| 604 | if (*here < 0x80)
|
---|
| 605 | output.push_back (*here); // append this character
|
---|
| 606 | else
|
---|
| 607 | output.push_back (converter.convert(*here, true));
|
---|
| 608 |
|
---|
| 609 | ++here;
|
---|
| 610 | --len;
|
---|
| 611 | }
|
---|
| 612 |
|
---|
| 613 | start = (char *)here; // save current position
|
---|
| 614 | status = finished;
|
---|
| 615 | }
|
---|
| 616 |
|
---|
| 617 |
|
---|
| 618 | void simplemapoutconvertclass::convert (char *output, size_t maxlen,
|
---|
| 619 | size_t &len, status_t &status) {
|
---|
| 620 |
|
---|
| 621 | if (input == NULL || output == NULL) {
|
---|
| 622 | status = finished;
|
---|
| 623 | return;
|
---|
| 624 | }
|
---|
| 625 |
|
---|
| 626 | // don't want any funny sign conversions happening
|
---|
| 627 | unsigned char *uoutput = (unsigned char *)output;
|
---|
| 628 | text_t::iterator textend = input->end();
|
---|
| 629 | len = 0;
|
---|
| 630 | while ((len < maxlen) && (texthere != textend)) {
|
---|
| 631 |
|
---|
| 632 | if (*texthere < 0x80) *uoutput = (unsigned char)(*texthere);
|
---|
| 633 | else *uoutput = converter.convert (*texthere, false);
|
---|
| 634 |
|
---|
| 635 | ++uoutput;
|
---|
| 636 | ++len;
|
---|
| 637 | ++texthere;
|
---|
| 638 | }
|
---|
| 639 |
|
---|
| 640 | if (texthere == textend) status = finished;
|
---|
| 641 | else status = unfinished;
|
---|
| 642 | }
|
---|