Changeset 94 for trunk/gsdl
- Timestamp:
- 1999-01-04T16:32:21+13:00 (25 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/lib/Makefile
r93 r94 26 26 CC = gcc 27 27 CCFLAGS = 28 DEFS = - O2 -g28 DEFS = -g 29 29 RANLIB = ranlib 30 30 INCLUDES = -I../packages/mg-1.3d/lib … … 49 49 OBJECTS = text_t.o display.o cfgread.o fileutil.o gsdlunicode.o 50 50 51 all 51 all: gsdllib.a 52 52 53 gsdllib.a 53 gsdllib.a: $(OBJECTS) 54 54 rm -f gsdllib.a 55 55 $(AR) cru gsdllib.a $(OBJECTS) … … 59 59 rm -f $(OBJECTS) gsdllib.a 60 60 61 install: 62 61 63 depend: 62 64 makedepend -Y -- $(DEFS) $(INCLUDES) $(CCFLAGS) -- $(SOURCES) … … 65 67 66 68 text_t.o: text_t.h 67 display.o: display.h text_t.h 69 display.o: display.h text_t.h gsdlunicode.h 68 70 cfgread.o: cfgread.h text_t.h 69 71 fileutil.o: fileutil.h text_t.h 72 gsdlunicode.o: gsdlunicode.h text_t.h ../packages/mg-1.3d/lib/unitool.h 73 gsdlunicode.o: fileutil.h -
trunk/gsdl/lib/gsdlunicode.cpp
r93 r94 9 9 #include <stdio.h> 10 10 11 12 mapdata_t::mapdata_t () {13 int i;14 15 // reset all the map ptrs to be NULL16 for (i=0; i<256; i++) {17 ptrs[i] = (unsigned short *)NULL;18 }19 20 // say nothing has been loaded21 loaded = false;22 }23 24 25 mapconvert::mapconvert (const text_t &thegsdlhome, const text_t &theencoding,26 unsigned short theabsentc) {27 gsdlhome = thegsdlhome;28 encoding = theencoding;29 absentc = theabsentc;30 }31 32 unsigned short mapconvert::convert (unsigned short c) {33 if (!loadmapfile()) return absentc;34 35 if (c == 0) return 0; // 0 always maps to 0...36 37 unsigned short n1 = c >> 8;38 unsigned short n2 = c & 0xff;39 40 unsigned short *arrptr = mapdata.ptrs[n1];41 if (arrptr == (unsigned short *)NULL) return absentc;42 43 if (arrptr[n2] == 0) return absentc;44 return arrptr[n2];45 }46 47 text_t mapconvert::convert (const text_t &instr) {48 if (!loadmapfile()) return absentc;49 50 text_t outstr;51 text_t::const_iterator here = instr.begin();52 text_t::const_iterator end = instr.end();53 54 while (here != end) {55 outstr.push_back(this->convert(*here));56 here++;57 }58 59 return outstr;60 }61 62 bool mapconvert::loadmapfile () {63 FILE *mapfilein = (FILE *)NULL;64 65 // check to see if the mapfile has been already loaded66 if (mapdata.loaded) return true;67 68 // open the map file69 text_t filename = filename_cat (gsdlhome, "unicode");70 filename = filename_cat (filename, encoding);71 filename += ".ump";72 char *cfilename = filename.getcstr();73 if (cfilename == (char *)NULL) return false;74 mapfilein = fopen(cfilename, "rb");75 delete cfilename;76 77 if (mapfilein == (FILE *)NULL) return false;78 79 unsigned char c, n1, n2;80 unsigned short *arrptr;81 int i;82 c = fgetc (mapfilein);83 while (!feof (mapfilein)) {84 if (mapdata.ptrs[c] == (unsigned short *)NULL) {85 // allocate a new array86 arrptr = new unsigned short[256];87 mapdata.ptrs[c] = arrptr;88 } else arrptr = mapdata.ptrs[c];89 90 // clear the array91 for (i=0; i<256; i++) arrptr[i] = 0;92 93 // read in this block94 n1 = fgetc (mapfilein);95 n2 = fgetc (mapfilein);96 i=0;97 while (!feof (mapfilein)) {98 arrptr[i] = ((unsigned short)n1 << 8) | (unsigned short)n2;99 100 i++;101 if (i >= 256) break;102 n1 = fgetc (mapfilein);103 n2 = fgetc (mapfilein);104 }105 106 c = fgetc (mapfilein);107 }108 109 mapdata.loaded = true;110 111 return true;112 }113 114 void mapconvert::unloadmapfile () {115 if (!mapdata.loaded) return;116 117 int i;118 for (i=0; i<256; i++) {119 if (mapdata.ptrs[i] != (unsigned short *)NULL) {120 delete [] mapdata.ptrs[i];121 mapdata.ptrs[i] = (unsigned short *)NULL;122 }123 }124 125 mapdata.loaded = false;126 }127 11 128 12 … … 189 73 190 74 void utf8inconvertclass::convert (text_t &output, status_t &status) { 75 output.clear(); 76 191 77 if (start == NULL || len == 0) { 192 78 if (utf8buflen == 0) status = finished; … … 261 147 262 148 263 264 gbinconvertclass::gbinconvertclass (const text_t &gsdlhome) {265 gbbuflen = 0;266 gb2unimap = new mapconvert (gsdlhome, "gbku", 0x25a1);267 }268 269 void gbinconvertclass::reset () {270 start = NULL;271 len = 0;272 gbbuflen=0;273 }274 275 void gbinconvertclass::convert (text_t &output, status_t &status) {276 if (start == NULL || len == 0 || gb2unimap == NULL) {277 if (gbbuflen == 0) status = finished;278 else status = stopped;279 return;280 }281 282 // don't want any funny sign conversions happening283 unsigned char *here = (unsigned char *)start;284 285 size_t charlen = getgbcharlen ();286 unsigned short c;287 size_t realcharlen;288 while (len > 0) {289 if (charlen == 0) {290 // start parsing a new character291 gbbuflen = 0;292 gbbuf[gbbuflen++] = *here;293 ++here;294 --len;295 charlen = getgbcharlen ();296 297 } else if (gbbuflen < charlen) {298 // assumes charlen is always less than MAXGBCHARLEN299 gbbuf[gbbuflen++] = *here;300 ++here;301 --len;302 }303 304 if (gbbuflen == charlen) {305 // got a complete character306 if (charlen == 1) {307 // ascii character308 output.push_back (gbbuf[0]);309 310 } else {311 // two byte character312 output.push_back (gb2unimap->convert(((unsigned short)gbbuf[0] << 8) |313 (unsigned short)gbbuf[1]));314 }315 316 gbbuflen = 0;317 charlen = 0;318 }319 }320 321 start = (char *)here; // save current position322 323 if (gbbuflen == 0) status = finished;324 else status = stopped;325 }326 327 // returns the length that the current contents of the328 // gbbuf should be329 size_t gbinconvertclass::getgbcharlen () {330 if (gbbuflen == 0) return 0;331 332 // one byte character333 if (gbbuf[0] < 0x80) return 1;334 335 // other characters are two byte336 return 2;337 }338 339 340 341 342 343 344 149 void utf8outconvertclass::reset () { 345 150 input = NULL; … … 397 202 398 203 399 gboutconvertclass::gboutconvertclass (const text_t &gsdlhome){ 400 gbbuflen=0; 401 gbbufhere=0; 402 uni2gbmap = new mapconvert (gsdlhome, "ugbk", 0xa1f5); 403 } 404 405 void gboutconvertclass::reset () { 204 205 206 mapdata_t::mapdata_t () { 207 int i; 208 209 // reset all the map ptrs to be NULL 210 for (i=0; i<256; i++) { 211 ptrs[i] = (unsigned short *)NULL; 212 } 213 214 // say nothing has been loaded 215 loaded = false; 216 } 217 218 219 mapconvert::mapconvert () { 220 absentc = 0; 221 } 222 223 224 // loadmapfile should be called before any conversion is done 225 bool mapconvert::loadmapfile (const text_t &thegsdlhome, 226 const text_t &theencoding, 227 unsigned short theabsentc) { 228 FILE *mapfilein = (FILE *)NULL; 229 230 // check to see if the mapfile has been already loaded 231 if (mapdata.loaded && gsdlhome == thegsdlhome && 232 encoding == theencoding && absentc == theabsentc) 233 return true; 234 235 unloadmapfile (); 236 gsdlhome = thegsdlhome; 237 encoding = theencoding; 238 absentc = theabsentc; 239 240 // open the map file 241 text_t filename = filename_cat (gsdlhome, "unicode"); 242 filename = filename_cat (filename, encoding); 243 filename += ".ump"; 244 char *cfilename = filename.getcstr(); 245 if (cfilename == (char *)NULL) return false; 246 mapfilein = fopen(cfilename, "rb"); 247 delete cfilename; 248 249 if (mapfilein == (FILE *)NULL) return false; 250 251 unsigned char c, n1, n2; 252 unsigned short *arrptr; 253 int i; 254 c = fgetc (mapfilein); 255 while (!feof (mapfilein)) { 256 if (mapdata.ptrs[c] == (unsigned short *)NULL) { 257 // allocate a new array 258 arrptr = new unsigned short[256]; 259 mapdata.ptrs[c] = arrptr; 260 } else arrptr = mapdata.ptrs[c]; 261 262 // clear the array 263 for (i=0; i<256; i++) arrptr[i] = 0; 264 265 // read in this block 266 n1 = fgetc (mapfilein); 267 n2 = fgetc (mapfilein); 268 i=0; 269 while (!feof (mapfilein)) { 270 arrptr[i] = ((unsigned short)n1 << 8) | (unsigned short)n2; 271 272 i++; 273 if (i >= 256) break; 274 n1 = fgetc (mapfilein); 275 n2 = fgetc (mapfilein); 276 } 277 278 c = fgetc (mapfilein); 279 } 280 281 mapdata.loaded = true; 282 283 return true; 284 } 285 286 void mapconvert::unloadmapfile () { 287 if (!mapdata.loaded) return; 288 289 int i; 290 for (i=0; i<256; i++) { 291 if (mapdata.ptrs[i] != (unsigned short *)NULL) { 292 delete [] mapdata.ptrs[i]; 293 mapdata.ptrs[i] = (unsigned short *)NULL; 294 } 295 } 296 297 mapdata.loaded = false; 298 } 299 300 301 unsigned short mapconvert::convert (unsigned short c) { 302 if (!mapdata.loaded) return absentc; 303 304 if (c == 0) return 0; // 0 always maps to 0... 305 306 unsigned short n1 = c >> 8; 307 unsigned short n2 = c & 0xff; 308 309 unsigned short *arrptr = mapdata.ptrs[n1]; 310 if (arrptr == (unsigned short *)NULL) return absentc; 311 312 if (arrptr[n2] == 0) return absentc; 313 return arrptr[n2]; 314 } 315 316 text_t mapconvert::convert (const text_t &instr) { 317 if (!mapdata.loaded) return absentc; 318 319 text_t outstr; 320 text_t::const_iterator here = instr.begin(); 321 text_t::const_iterator end = instr.end(); 322 323 while (here != end) { 324 outstr.push_back(this->convert(*here)); 325 here++; 326 } 327 328 return outstr; 329 } 330 331 332 333 334 mapinconvertclass::mapinconvertclass () { 335 mapbuflen = 0; 336 } 337 338 void mapinconvertclass::reset () { 339 start = NULL; 340 len = 0; 341 mapbuflen=0; 342 } 343 344 void mapinconvertclass::convert (text_t &output, status_t &status) { 345 output.clear(); 346 347 if (start == NULL || len == 0) { 348 if (mapbuflen == 0) status = finished; 349 else status = stopped; 350 return; 351 } 352 353 // don't want any funny sign conversions happening 354 unsigned char *here = (unsigned char *)start; 355 356 size_t charlen = getmapcharlen (); 357 unsigned short c; 358 size_t realcharlen; 359 while (len > 0) { 360 if (charlen == 0) { 361 // start parsing a new character 362 mapbuflen = 0; 363 mapbuf[mapbuflen++] = *here; 364 ++here; 365 --len; 366 charlen = getmapcharlen (); 367 368 } else if (mapbuflen < charlen) { 369 // assumes charlen is always less than MAXMAPCHARLEN 370 mapbuf[mapbuflen++] = *here; 371 ++here; 372 --len; 373 } 374 375 if (mapbuflen == charlen) { 376 // got a complete character 377 if (charlen == 1) { 378 // ascii character 379 output.push_back (mapbuf[0]); 380 381 } else { 382 // two byte character 383 output.push_back (converter.convert(((unsigned short)mapbuf[0] << 8) | 384 (unsigned short)mapbuf[1])); 385 } 386 387 mapbuflen = 0; 388 charlen = 0; 389 } 390 } 391 392 start = (char *)here; // save current position 393 394 if (mapbuflen == 0) status = finished; 395 else status = stopped; 396 } 397 398 399 400 mapoutconvertclass::mapoutconvertclass () { 401 mapbuflen=0; 402 mapbufhere=0; 403 } 404 405 void mapoutconvertclass::reset () { 406 406 input = NULL; 407 407 outs = NULL; 408 gbbuflen = 0;409 gbbufhere = 0;408 mapbuflen = 0; 409 mapbufhere = 0; 410 410 } 411 411 412 412 // note that convert does not null-terminate the 413 413 // output array of characters 414 void gboutconvertclass::convert (char *output, size_t maxlen,414 void mapoutconvertclass::convert (char *output, size_t maxlen, 415 415 size_t &len, status_t &status) { 416 416 unsigned short outc; 417 417 418 if (input == NULL || output == NULL || uni2gbmap == NULL) {419 if ( gbbuflen == 0) status = finished;418 if (input == NULL || output == NULL) { 419 if (mapbuflen == 0) status = finished; 420 420 else status = unfinished; 421 421 return; … … 428 428 while (len < maxlen) { 429 429 // empty the contents of the internal buffer 430 if ( gbbuflen > 0) {431 while (len < maxlen && gbbufhere < gbbuflen) {432 *uoutput = gbbuf[gbbufhere];430 if (mapbuflen > 0) { 431 while (len < maxlen && mapbufhere < mapbuflen) { 432 *uoutput = mapbuf[mapbufhere]; 433 433 uoutput++; 434 434 len++; 435 gbbufhere++;436 } 437 438 if ( gbbufhere == gbbuflen) {439 gbbufhere = 0;440 gbbuflen = 0;435 mapbufhere++; 436 } 437 438 if (mapbufhere == mapbuflen) { 439 mapbufhere = 0; 440 mapbuflen = 0; 441 441 } 442 442 } 443 443 444 444 // fill up the buffer with the next character 445 if ( gbbuflen == 0) {445 if (mapbuflen == 0) { 446 446 if (texthere == textend) break; // finished! 447 447 if (!rzws || (*texthere != 0x200b)) { 448 448 if (*texthere < 0x80) { 449 gbbuf[0] = (unsigned char)*texthere;450 gbbuflen = 1;449 mapbuf[0] = (unsigned char)*texthere; 450 mapbuflen = 1; 451 451 } else { 452 outc = uni2gbmap->convert (*texthere);453 gbbuf[0] = (unsigned char)(outc >> 8);454 gbbuf[1] = (unsigned char)(outc & 0xff);455 gbbuflen = 2;452 outc = converter.convert (*texthere); 453 mapbuf[0] = (unsigned char)(outc >> 8); 454 mapbuf[1] = (unsigned char)(outc & 0xff); 455 mapbuflen = 2; 456 456 } 457 457 } 458 458 459 459 texthere++; 460 gbbufhere = 0;460 mapbufhere = 0; 461 461 } 462 462 } 463 463 464 if (texthere == textend && gbbuflen == 0) status = finished;464 if (texthere == textend && mapbuflen == 0) status = finished; 465 465 else status = unfinished; 466 466 } -
trunk/gsdl/lib/gsdlunicode.h
r93 r94 3 3 4 4 #include "text_t.h" 5 6 // mapdata_t is used by mapconvert to hold the map file data7 class mapdata_t {8 public:9 mapdata_t();10 bool loaded;11 unsigned short *ptrs[256];12 };13 14 15 // mapconvert is used in situations where conversion is best16 // done using a map file. The mapfile should reside in17 // gsdlhome/unicode.18 class mapconvert {19 public:20 mapconvert (const text_t &thegsdlhome, const text_t &theencoding,21 unsigned short theabsentc);22 ~mapconvert () {unloadmapfile();};23 24 unsigned short convert (unsigned short c);25 text_t convert (const text_t &instr);26 27 protected:28 text_t gsdlhome;29 text_t encoding;30 unsigned short absentc;31 mapdata_t mapdata;32 33 bool loadmapfile ();34 void unloadmapfile ();35 };36 37 38 5 39 6 … … 65 32 // utf8buf should be 66 33 size_t getutf8charlen (); 67 };68 69 70 #define MAXGBCHARLEN 271 72 // convert from a gb char stream to the unicode text_t class73 class gbinconvertclass : public inconvertclass {74 public:75 gbinconvertclass(const text_t &gsdlhome);76 ~gbinconvertclass () {delete gb2unimap;};77 void reset ();78 void convert (text_t &output, status_t &status);79 80 protected:81 // buffer to hold unconverted characters in a stream82 unsigned char gbbuf[MAXGBCHARLEN];83 size_t gbbuflen;84 85 // note: multiple instances of gbinconvert class are expensive86 // as each will have its own copy of the map file data. This87 // could be reduced by making gb2unimap static, but then it88 // wouldn't be thread safe.89 mapconvert *gb2unimap;90 91 // returns the length that the current contents of the92 // gbbuf should be93 size_t getgbcharlen ();94 34 }; 95 35 … … 129 69 130 70 71 // mapdata_t is used by mapconvert to hold the map file data 72 class mapdata_t { 73 public: 74 mapdata_t(); 75 bool loaded; 76 unsigned short *ptrs[256]; 77 }; 78 79 // mapconvert is used in situations where conversion is best 80 // done using a map file. The mapfile should reside in 81 // gsdlhome/unicode. 82 class mapconvert { 83 public: 84 mapconvert (); 85 ~mapconvert () {unloadmapfile();}; 86 87 // loadmapfile should be called before any conversion is done 88 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding, 89 unsigned short theabsentc); 90 void unloadmapfile (); 91 92 unsigned short convert (unsigned short c); 93 94 // note that this version of convert has different semantics to 95 // the convertclass version. 96 text_t convert (const text_t &instr); 97 98 protected: 99 text_t gsdlhome; 100 text_t encoding; 101 unsigned short absentc; 102 mapdata_t mapdata; 103 }; 131 104 132 105 133 // Convert from a text_t class to a gb char stream 134 class gboutconvertclass : public rzwsoutconvertclass { 106 107 #define MAXMAPCHARLEN 2 108 109 // convert from a gb char stream to the unicode text_t class 110 class mapinconvertclass : public inconvertclass { 135 111 public: 136 gboutconvertclass (const text_t &gsdhome); 137 ~gboutconvertclass () {delete uni2gbmap;}; 112 mapinconvertclass(); 113 114 // loadmapfile should be called before any conversion takes 115 // place 116 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding, 117 unsigned short theabsentc) { 118 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc); 119 }; 120 138 121 void reset (); 139 // note that convert does not null-terminate the 140 // output array of characters 122 void convert (text_t &output, status_t &status); 123 124 protected: 125 // buffer to hold unconverted characters in a stream 126 unsigned char mapbuf[MAXMAPCHARLEN]; 127 size_t mapbuflen; 128 129 // note: multiple instances of mapinconvert class are expensive 130 // as each will have its own copy of the map file data. This 131 // could be reduced by making map2unimap static, but then it 132 // wouldn't be thread safe. 133 mapconvert converter; 134 135 // returns the length that the current contents of the 136 // mapbuf should be 137 inline size_t getmapcharlen () { 138 if (mapbuflen == 0) return 0; 139 if (mapbuf[0] < 0x80) return 1; 140 return 2; 141 } 142 }; 143 144 145 // Convert from a text_t class to a map char stream 146 class mapoutconvertclass : public rzwsoutconvertclass { 147 public: 148 mapoutconvertclass (); 149 150 // loadmapfile should be called before any conversion takes 151 // place 152 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding, 153 unsigned short theabsentc) { 154 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc); 155 }; 156 157 void reset (); 141 158 void convert (char *output, size_t maxlen, 142 159 size_t &len, status_t &status); 143 160 144 161 protected: 145 unsigned char gbbuf[MAXGBCHARLEN];146 size_t gbbuflen;147 size_t gbbufhere;162 unsigned char mapbuf[MAXMAPCHARLEN]; 163 size_t mapbuflen; 164 size_t mapbufhere; 148 165 149 mapconvert *uni2gbmap;166 mapconvert converter; 150 167 }; 151 168 -
trunk/gsdl/lib/text_t.cpp
r12 r94 24 24 /* 25 25 $Log$ 26 Revision 1.2 1999/01/04 03:32:17 rjmcnab 27 28 Wrote general map file based in and out converters. Fixed bugs related 29 to Chinese charater searching. text_t now has a encoding attribute. Added 30 an encoding option to the preferences. 31 26 32 Revision 1.1 1998/11/17 09:11:29 rjmcnab 27 33 … … 70 76 71 77 #include "text_t.h" 78 #include "unitool.h" 72 79 73 80 //////////////////////////////////// … … 77 84 text_t::text_t () 78 85 { 86 setencoding(0); 79 87 clear (); 80 88 } … … 82 90 text_t::text_t (int i) 83 91 { 92 setencoding(0); 84 93 clear (); 85 94 appendint (i); … … 87 96 88 97 text_t::text_t (char *s) 89 { 98 { 99 setencoding(0); 90 100 clear (); 91 101 appendcstr (s); … … 223 233 while (ithere != itend) 224 234 { 225 if (*ithere >= 256) cstr[len] = ' '; 226 else cstr[len] = (*ithere); 235 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere); 236 else { 237 // put a space or a question mark depending on what 238 // the character is. Question marks tell the user that 239 // they are missing some information. 240 if (is_unicode_space (*ithere)) cstr[len] = ' '; 241 else cstr[len] = '?'; 242 } 227 243 len++; 228 244 ithere++; … … 241 257 while (ithere != itend) 242 258 { 243 if (*ithere >= 256) cstr[len] = ' '; 244 else cstr[len] = (*ithere); 259 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere); 260 else { 261 // put a space or a question mark depending on what 262 // the character is. Question marks tell the user that 263 // they are missing some information. 264 if (is_unicode_space (*ithere)) cstr[len] = ' '; 265 else cstr[len] = '?'; 266 } 245 267 len++; 246 268 ithere++; … … 425 447 void inconvertclass::convert (text_t &output, status_t &status) 426 448 { 449 output.clear(); 450 427 451 if (start == NULL || len == 0) 428 452 { … … 442 466 start = (char *)here; // save current position 443 467 status = finished; 468 } 469 470 // will treat the text_t as a 8-bit string and convert 471 // it to a 16-bit string using the about convert method. 472 text_t inconvertclass::convert (const text_t &t) { 473 text_t out; 474 text_t tmpout; 475 status_t status; 476 text_t::const_iterator here = t.begin(); 477 text_t::const_iterator end = t.end(); 478 unsigned char cbuf[256]; 479 size_t cbuflen = 0; 480 481 while (here != end) { 482 while (here != end && cbuflen < 256) { 483 cbuf[cbuflen++] = (unsigned char)(*here & 0xff); 484 here++; 485 } 486 487 if (cbuflen > 0) { 488 setinput ((char *)cbuf, cbuflen); 489 status = unfinished; 490 while (status == unfinished) { 491 convert (tmpout, status); 492 out += tmpout; 493 } 494 cbuflen = 0; 495 } 496 } 497 498 out.setencoding (0); // unicode 499 500 return out; 444 501 } 445 502 … … 498 555 { 499 556 if (*texthere < 256) *uoutput = (unsigned char)(*texthere); 500 else *uoutput = 32; // put a space where a char >= 256 exists 557 else { 558 // put a space or a question mark depending on what 559 // the character is. Question marks tell the user that 560 // they are missing some information. 561 if (is_unicode_space (*texthere)) *uoutput = ' '; 562 else *uoutput = '?'; 563 } 501 564 ++uoutput; 502 565 ++len; … … 508 571 } 509 572 573 // will convert the 16-bit string to a 8-bit stream 574 // and place the result in a text_t. This method uses 575 // the above convert function. 576 text_t outconvertclass::convert (const text_t &t) { 577 text_t out; 578 unsigned char cbuf[256]; 579 size_t cbuflen = 0; 580 status_t status = unfinished; 581 582 setinput ((text_t *)&t); // discard constant 583 while (status == unfinished) { 584 convert ((char *)cbuf, 256, cbuflen, status); 585 out.appendcarr ((char *)cbuf, cbuflen); 586 } 587 588 out.setencoding (1); // other encoding 589 590 return out; 591 } 592 593 510 594 void outconvertclass::setostream (ostream *theouts) 511 595 { -
trunk/gsdl/lib/text_t.h
r12 r94 75 75 protected: 76 76 usvector text; 77 unsigned short encoding; // 0 = unicode, 1 = other 77 78 78 79 public: … … 82 83 text_t (char *s); // assumed to be a normal c string 83 84 85 void setencoding (unsigned short theencoding) {encoding=theencoding;}; 86 unsigned short getencoding () {return encoding;}; 87 84 88 // basic container support 85 89 iterator begin () {return text.begin();} … … 92 96 void push_back(unsigned short c) {text.push_back(c);} 93 97 void pop_back() {text.pop_back();} 94 text_t &operator=(const text_t &x) {text=x.text; return *this;}98 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;} 95 99 reference operator[](size_type n) {return text[n];}; 96 100 const_reference operator[](size_type n) const {return text[n];}; … … 255 259 void reset (); 256 260 void setinput (char *thestart, size_t thelen); 261 262 // output will be cleared before the conversion 257 263 virtual void convert (text_t &output, status_t &status); 264 265 // will treat the text_t as a 8-bit string and convert 266 // it to a 16-bit string using the about convert method. 267 text_t convert (const text_t &t); 258 268 259 269 protected: … … 290 300 size_t &len, status_t &status); 291 301 302 // will convert the 16-bit string to a 8-bit stream 303 // and place the result in a text_t. This method uses 304 // the above convert function. 305 text_t convert (const text_t &t); 306 292 307 void setostream (ostream *theouts); 293 308 ostream *getostream (); -
trunk/gsdl/src/library/Makefile
r91 r94 25 25 AR = ar 26 26 CC = gcc 27 CCFLAGS = - O2 -g27 CCFLAGS = -g 28 28 DEFS = -DNZDL -DQUIET -DSHORT_SUFFIX -DPARADOCNUM -DUSE_FASTCGI 29 29 RANLIB = ranlib … … 73 73 rm -f $(OBJECTS) 74 74 75 install: 76 75 77 depend: 76 78 makedepend -Y -- $(DEFS) $(INCLUDES) $(CCFLAGS) -- $(SOURCES) … … 80 82 # DO NOT DELETE 81 83 82 browse.o: browse.h ../../lib/text_t.h gdbmclass.h locateinfo.h cgiargs.h 83 gdbmclass.o: ../../lib/text_t.h gdbmclass.h locateinfo.h 84 browse.o: browse.h ../../lib/text_t.h gdbmclass.h cgiargs.h 85 gdbmclass.o: ../../lib/text_t.h gdbmclass.h 86 gdbmclass.o: ../../packages/mg-1.3d/lib/unitool.h ../../lib/gsdlunicode.h 87 gdbmclass.o: ../../lib/fileutil.h 84 88 cgiargs.o: cgiargs.h ../../lib/text_t.h 85 89 querycache.o: querycache.h ../../lib/text_t.h queryinfo.h … … 106 110 queryinfo.o: queryinfo.h ../../lib/text_t.h 107 111 libinterface.o: libinterface.h browse.h ../../lib/text_t.h gdbmclass.h 108 libinterface.o: locateinfo.h cgiargs.h queryinfo.h mgsearch.h querycache.h 109 libinterface.o: ../../lib/display.h 112 libinterface.o: cgiargs.h queryinfo.h mgsearch.h querycache.h 113 libinterface.o: ../../lib/display.h ../../lib/gsdlunicode.h 114 libinterface.o: ../../lib/fileutil.h ../../lib/cfgread.h 110 115 mgsearch.o: mgq.h mgsearch.h ../../lib/text_t.h querycache.h queryinfo.h 111 mgsearch.o: locateinfo.h 112 locateinfo.o: locateinfo.h ../../lib/text_t.h 113 cgiwrap.o: libinterface.h browse.h ../../lib/text_t.h gdbmclass.h 114 cgiwrap.o: locateinfo.h cgiargs.h queryinfo.h mgsearch.h querycache.h 115 cgiwrap.o: ../../lib/display.h 116 mgsearch.o: locateinfo.h ../../lib/gsdlunicode.h 117 mgsearch.o: ../../packages/mg-1.3d/lib/unitool.h 118 locateinfo.o: locateinfo.h ../../lib/text_t.h ../../lib/fileutil.h 119 cgiwrap.o: libinterface.h browse.h ../../lib/text_t.h gdbmclass.h cgiargs.h 120 cgiwrap.o: queryinfo.h mgsearch.h querycache.h ../../lib/display.h 121 cgiwrap.o: ../../lib/gsdlunicode.h -
trunk/gsdl/src/library/cgiargs.cpp
r4 r94 1 1 #include "cgiargs.h" 2 2 #include "gsdlunicode.h" 3 3 4 4 … … 64 64 ostream &operator<<(ostream &outs, const cgiargsclass &args) 65 65 { 66 outconvertclass text_t2ascii;66 utf8outconvertclass text_t2utf8; 67 67 cgiargsclass::const_iterator here = args.begin (); 68 68 cgiargsclass::const_iterator end = args.end (); … … 72 72 while (here != end) 73 73 { 74 outs << text_t2 ascii<< " \"" << (*here).first << "\"=\"" <<74 outs << text_t2utf8 << " \"" << (*here).first << "\"=\"" << 75 75 (*here).second << "\"\n"; 76 76 here++; -
trunk/gsdl/src/library/libinterface.cpp
r93 r94 9 9 #include "cfgread.h" 10 10 #include "gsdlunicode.h" 11 #include "unitool.h" 11 12 12 13 #include <assert.h> … … 107 108 // convert %xx and + to their appropriate equivalents 108 109 decode (value); 110 value.setencoding(1); // other encoding 109 111 // store this key=value pair 110 112 if (!key.empty()) args.setarg (key, value); … … 112 114 } 113 115 114 text_t cgisafe ( text_t &intext)116 text_t cgisafe (const text_t &intext) 115 117 { 116 118 text_t outtext; 117 119 118 text_t:: iterator here = intext.begin ();119 text_t:: iterator end = intext.end ();120 text_t::const_iterator here = intext.begin (); 121 text_t::const_iterator end = intext.end (); 120 122 unsigned short c; 121 123 text_t ttmp; … … 150 152 libinterface::libinterface() { 151 153 browse = NULL; 152 gbinconvert = NULL;153 gboutconvert = NULL;154 154 } 155 155 … … 277 277 srand(time(NULL)); 278 278 279 gbinconvert = new gbinconvertclass (gsdlhome);280 281 279 utf8outconvert.set_rzws(1); 282 gboutconvert = new gboutconvertclass (gsdlhome); 283 if (gboutconvert != NULL) gboutconvert->set_rzws(1); 280 gboutconvert.set_rzws(1); 284 281 285 282 return collection_init(collection); … … 305 302 check_args (args); 306 303 304 // get the input encoding 305 text_t &arg_w = args["w"]; 306 inconvertclass *inconvert = NULL; 307 if (arg_w == "8") { 308 inconvert = &utf8inconvert; 309 } else if (arg_w == "g") { 310 // The map files will only be loaded the first time they are 311 // needed. The loading is done here to reduce the memory load 312 // for collections which don't need to convert to GB. 313 gbinconvert.loadmapfile (gsdlhome, "gbku", 0x25a1); 314 inconvert = &gbinconvert; 315 } else { 316 inconvert = &asciiinconvert; // default 317 } 318 319 // see if the next page will have a different encoding 320 if (args.getarg("nw") != NULL) args["w"] = args["nw"]; 321 322 // convert arguments which aren't in unicode to unicode 323 args_tounicode (args, *inconvert); 324 325 // remember the state of the compressed arguments 307 326 lastcomparg = get_compressed_args (args); 308 309 327 logout << args; 310 328 311 329 // get the output encoding 312 text_t &arg_n = args["n"];330 text_t &arg_nw = args["w"]; 313 331 outconvertclass *outconvert = NULL; 314 if (arg_n == "8") {332 if (arg_nw == "8") { 315 333 outconvert = &utf8outconvert; 316 } else if (arg_n == "g" && gboutconvert != NULL) { 317 outconvert = gboutconvert; 318 } 319 else outconvert = &asciioutconvert; // default 320 321 if (outconvert == NULL) return err; 334 } else if (arg_nw == "g") { 335 gboutconvert.loadmapfile (gsdlhome, "ugbk", 0xa1f5); 336 outconvert = &gboutconvert; 337 } else { 338 outconvert = &asciioutconvert; // default 339 } 340 322 341 323 342 // dispatch the request … … 346 365 argconfigstr = 347 366 "+a[p]" // action: q=query, b=browse, t=targetdoc, p=page, a1=auxiliary 348 " n[]"// encoding: w=western, 8=utf8, 7=utf7, g=GB2312, k=GBK367 "w[]" // encoding: w=western, 8=utf8, 7=utf7, g=GB2312, k=GBK 349 368 "t[1]" // query type: 0=boolean, 1=ranked 350 369 "i[c]" // index: c=chapter, p=paragraph, t=title, b=book … … 479 498 { 480 499 args.setarg("c", get_collection_name()); 481 if (args["n"].empty()) args.setarg("n", cfg_info.defaultencoding); 482 } 483 484 485 void libinterface::define_general_macros (cgiargsclass &args, ostream &logout) 486 { 500 if (args["w"].empty()) args.setarg("w", cfg_info.defaultencoding); 501 } 502 503 void libinterface::args_tounicode (cgiargsclass &args, 504 inconvertclass &inconvert) { 505 utf8outconvertclass text_t2utf8; 506 cgiargsclass::iterator here = args.begin(); 507 cgiargsclass::iterator end = args.end(); 508 509 while (here != end) { 510 if (here->second.getencoding() > 0) { 511 here->second = inconvert.convert(here->second); 512 } 513 514 here++; 515 } 516 } 517 518 519 void libinterface::define_general_macros (cgiargsclass &args, outconvertclass &outconvert, 520 ostream &logout) { 487 521 disp.setmacro("httpprefix", "Global", httpprefix); 488 522 disp.setmacro("gwcgi", "Global", gwcgi); 489 523 490 disp.setmacro("collection", "Global", cgisafe(args["c"])); 524 disp.setmacro("numdocs", "Global", (int)cfg_info.numdocs); 525 526 disp.setmacro("collection", "Global", cgisafe(outconvert.convert(args["c"]))); 491 527 disp.setmacro("compressedoptions", "Global", get_compressed_args(args)); 492 disp.setmacro("urlsafequerystring", "Global", cgisafe( args["q"]));528 disp.setmacro("urlsafequerystring", "Global", cgisafe(outconvert.convert(args["q"]))); 493 529 494 530 // need to escape any quotes in querystring to prevent them upsetting the html … … 498 534 while (here != end) { 499 535 if (*here == '"') querystring += """; 536 else if (*here == '&') querystring += "&"; 537 else if (*here == '<') querystring += "<"; 538 else if (*here == '>') querystring += ">"; 500 539 else querystring.push_back(*here); 501 540 here ++; … … 503 542 disp.setmacro("querystring", "Global", querystring); 504 543 505 506 544 if (args.getintarg("x") == 0) disp.setmacro("notdetached", "Global", "1"); 507 545 if (args["d"][0] == 'T') disp.setmacro("istitle", "Global", "1"); … … 517 555 // prepare_page prepares to write out a page using the current 518 556 // page parameters and defines any general macros 519 void libinterface::prepare_page (cgiargsclass &args, o stream &logout)520 {557 void libinterface::prepare_page (cgiargsclass &args, outconvertclass &outconvert, 558 ostream &logout) { 521 559 // get page parameters 522 560 text_t pageparams = text_t("collection=") + args["c"]; … … 527 565 528 566 disp.openpage(pageparams, MACROPRECEDENCE); 529 define_general_macros(args, logout);567 define_general_macros(args, outconvert, logout); 530 568 define_collection_macros(args, logout); 531 569 } … … 717 755 718 756 // prepare to print out the page 719 prepare_page(args, logout);757 prepare_page(args, outconvert, logout); 720 758 define_query_macros(args, queryparams, queryresults, logout); 721 759 … … 764 802 gdbm_info info; 765 803 766 prepare_page(args, logout);804 prepare_page(args, outconvert, logout); 767 805 768 806 // get browse bar unless page has been detached … … 806 844 do_query(args, queryparams, queryresults, logout); 807 845 808 prepare_page(args, logout);846 prepare_page(args, outconvert, logout); 809 847 810 848 if (args["g"][1] == '0') { … … 976 1014 text_t &arg_p = args["p"]; 977 1015 978 prepare_page(args, logout);1016 prepare_page(args, outconvert, logout); 979 1017 980 1018 if (arg_p == "preferences") … … 999 1037 text_t word, buffer; 1000 1038 while (here != end) { 1001 if (((*here >= 65) && (*here <= 90)) || 1002 ((*here >= 97) && (*here <= 122)) || 1003 ((*here >= '0') && (*here <= '9')) || 1004 ((*here >= 192) && (*here <= 214)) || 1005 ((*here >= 216) && (*here <= 246)) || 1006 ((*here >= 248) && (*here <= 255))) { 1039 if (is_unicode_letdig(*here)) { 1007 1040 // not word boundary 1008 1041 word.push_back(*here); … … 1069 1102 disp.setmacro ("stemoption", "preferences", stemoption); 1070 1103 1104 1105 // the encodingoption 1106 text_t encodingoption; 1107 const text_t &arg_w = args["w"]; 1108 1109 encodingoption += "\n<select name=\"nw\">\n"; 1110 encodingoption += " <option value=\"w\""; 1111 if (arg_w == "w") encodingoption += " selected"; 1112 encodingoption += ">Western (ISO-8859-1)\n"; 1113 encodingoption += " <option value=\"g\""; 1114 if (arg_w == "g") encodingoption += " selected"; 1115 encodingoption += ">Simplified Chinese (GB2312)\n"; 1116 encodingoption += " <option value=\"8\""; 1117 if (arg_w == "8") encodingoption += " selected"; 1118 encodingoption += ">Unicode (UTF-8)\n"; 1119 encodingoption += "</select>\n"; 1120 1121 disp.setmacro ("encodingoption", "preferences", encodingoption); 1071 1122 1072 1123 // the maxdocoption -
trunk/gsdl/src/library/libinterface.h
r93 r94 110 110 inconvertclass asciiinconvert; 111 111 utf8inconvertclass utf8inconvert; 112 gbinconvertclass *gbinconvert;112 mapinconvertclass gbinconvert; 113 113 outconvertclass asciioutconvert; 114 114 utf8outconvertclass utf8outconvert; 115 gboutconvertclass *gboutconvert;115 mapoutconvertclass gboutconvert; 116 116 117 117 … … 130 130 virtual void add_default_args (cgiargsclass &args); 131 131 virtual void check_args (cgiargsclass &args); 132 virtual void args_tounicode (cgiargsclass &args, inconvertclass &inconvert); 132 133 133 virtual void define_general_macros (cgiargsclass &args, ostream &logout); 134 virtual void prepare_page (cgiargsclass &args, ostream &logout); 134 virtual void define_general_macros (cgiargsclass &args, outconvertclass &outconvert, 135 ostream &logout); 136 virtual void prepare_page (cgiargsclass &args, outconvertclass &outconvert, 137 ostream &logout); 135 138 136 139 -
trunk/gsdl/src/library/mgq.c
r4 r94 151 151 int (*sender)(char *,int,int,float,void *), void *ptr) 152 152 { 153 int i; char *word;153 int i; 154 154 for (i = 0; i < qtl->num; i++) 155 155 if (sender != NULL) { 156 word = word2str(qtl->QTE[i].Term); 157 (* sender)(word, strlen(word), qtl->QTE[i].Count, (float)0.0, ptr); 156 /* word = word2str(qtl->QTE[i].Term); 157 (* sender)(word, strlen(word), qtl->QTE[i].Count, (float)0.0, ptr); */ 158 (* sender)(qtl->QTE[i].Term+1, qtl->QTE[i].Term[0], 159 qtl->QTE[i].Count, (float)0.0, ptr); 158 160 } 159 161 } … … 164 166 { 165 167 int i = 0; 166 char *word;167 168 168 if (sender == NULL) return; 169 169 for (i = 0; i < qtl->num; i++) 170 170 { 171 word = word2str(qtl->TE[i].Word); 172 (* sender)(word, strlen(word), qtl->TE[i].Count, (float)0.0, ptr); 171 /* word = word2str(qtl->TE[i].Word); 172 (* sender)(word, strlen(word), qtl->TE[i].Count, (float)0.0, ptr);*/ 173 (* sender)(qtl->TE[i].Word+1, qtl->TE[i].Word[0], 174 qtl->TE[i].Count, (float)0.0, ptr); 173 175 } 174 176 } -
trunk/gsdl/src/library/mgsearch.cpp
r91 r94 73 73 queryresultsclass *queryresults = (queryresultsclass *)info; 74 74 75 text_t term; 76 term.setcarr(Word, ULen); 75 77 termfreqclass termfreq; 76 termfreq.termstr .setcarr(Word, ULen);78 termfreq.termstr = to_uni(term); 77 79 termfreq.termfreq = Freq; 78 80 queryresults->terms.push_back(termfreq); … … 85 87 float Weight, void *info) { 86 88 87 // convert term from utf8 to unicode88 89 text_t term; 89 utf8inconvertclass inconvert; 90 convertclass::status_t status; 91 inconvert.reset (); 92 inconvert.setinput (Word, ULen); 93 inconvert.convert (term, status); 94 90 term.setcarr(Word, ULen); 95 91 queryresultsclass *queryresults = (queryresultsclass *)info; 96 queryresults->termvariants.push_back(t erm);92 queryresults->termvariants.push_back(to_uni(term)); 97 93 98 94 return 0; … … 255 251 256 252 // quotedquery will be deleted on the next call to this function 257 quotedquery = t tquotedquery.getcstr ();258 char *querystring = t tquerystring.getcstr();253 quotedquery = to_utf8(ttquotedquery).getcstr (); 254 char *querystring = to_utf8(ttquerystring).getcstr(); 259 255 260 256 // submit the query … … 318 314 319 315 320 void mgsearchclass::filterquery (text_t &ttquerystring) 321 { 322 316 void mgsearchclass::filterquery (text_t &ttquerystring) { 323 317 text_t::iterator ithere = ttquerystring.begin (); 324 318 text_t::iterator itend = ttquerystring.end (); 325 unsigned short c; 326 327 // remove all non alphanumeric characters below 127 328 while (ithere != itend) 329 { 330 c = *ithere; 331 332 // if ((c <= 127) && !((c >= '0' && c <= '9') || 333 // (c >= 'A' && c <= 'Z') || 334 // (c >= 'a' && c <= 'z'))) 335 if (!(((c >= 65) && (c <= 90)) || 336 ((c >= 97) && (c <= 122)) || 337 ((c >= 192) && (c <= 214)) || 338 ((c >= 216) && (c <= 246)) || 339 ((c >= 248) && (c <= 255)) || 340 ((c >= '0') && (c <= '9')) || 341 (c == 176))) 342 (*ithere) = ' '; 343 344 ithere++; 345 } 319 320 // remove all non alphanumeric characters 321 while (ithere != itend) { 322 if (!is_unicode_letdig(*ithere)) (*ithere) = ' '; 323 ithere++; 324 } 346 325 } 347 326
Note:
See TracChangeset
for help on using the changeset viewer.