Changeset 7705


Ignore:
Timestamp:
2004-07-06T15:36:50+12:00 (20 years ago)
Author:
jrm21
Message:

can now read in utf-8 encoded non-ascii chars from main.cfg file.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/lib/cfgread.cpp

    r3528 r7705  
    112112
    113113  text_t curvalue;
    114   char c;
    115   filein.get(c);
     114  unsigned char c1;
     115  filein.get(c1);
    116116
    117117  // skip white space
    118   while (!filein.eof() && isspace(c)) { filein.get(c); }
     118  while (!filein.eof() && isspace(c1)) { filein.get(c1); }
    119119
    120120    // ignore comments
    121   while (c == '#') {
    122     while (!filein.eof() && c!='\n' && c!='\r') { filein.get(c); }
     121  while (c1 == '#') {
     122    while (!filein.eof() && c1!='\n' && c1!='\r') { filein.get(c1); }
    123123    // skip white space...
    124     while (!filein.eof() && isspace(c)) { filein.get(c); }
     124    while (!filein.eof() && isspace(c1)) { filein.get(c1); }
    125125  }
    126126
    127127  // deal with all the records on this line (possibly multi-line)
     128
    128129  while (!filein.eof()) {
    129     if (c=='\n' || c=='\r') { // shouldn't happen?
     130    if (c1=='\n' || c1=='\r') { // shouldn't happen?
    130131      break;
    131132    }
     
    134135    curvalue.clear();
    135136     
    136       // see if this is a quoted phrase
    137     if (c=='\'' || c=='\"') { // starts with a quote
    138       char quote, old_c;
    139       quote = c;
    140       old_c = c;
    141       filein.get(c);
    142       while (!filein.eof() && (c != quote || old_c == '\\') ) {
    143     /* Turn eol into space, in case other parsing bits expect eol to
    144        also mean end of parsing... */
    145     if (c=='\r' || c=='\n') c=' ';
    146     curvalue.push_back(c);
    147     old_c = c;
    148     filein.get(c);
     137    bool inquote=false;
     138    unsigned char quotemark='"';
     139    unsigned char preceding; // 1-char state to allow \" and \'
     140    // see if this is a quoted phrase
     141    if (c1=='\'' || c1=='\"') { // starts with a quote
     142      inquote=true;
     143      quotemark = c1;
     144      preceding = c1; // just to initialise
     145      filein.get(c1);
     146    }
     147
     148    // get token or a whole phrase
     149    while (!filein.eof()) {
     150      if (isspace(c1)) {
     151    if (! inquote) {
     152      // end of token, not inside quote marks
     153      break;
     154    } else {
     155      // inside quote marks.
     156      /* Turn eol into space, in case other parsing bits expect eol to
     157         also mean end of parsing... */
     158      c1=' ';
     159    }
    149160      }
    150       // get the character after the closing quote...
    151       filein.get(c);
    152     } else { // it's not a quoted phrase
    153       // get the token
    154       while (!filein.eof() && !isspace(c)) {
    155     curvalue.push_back(c);
    156     filein.get(c);
     161      if (c1 == quotemark && inquote && preceding != '\\') {
     162    // end of quoted phrase found
     163    inquote=false;
     164    filein.get(c1);
     165    continue;
    157166      }
     167
     168      // add current char to token/phrase
     169      // see if current byte is part of a multibyte char (utf-8 only!)
     170      unsigned short int c; // text_t uses 16bit unicode
     171      if (c1 < 0x80) {
     172    c=c1;
     173      } else if (c1 >= 0xc0 && c1 <= 0xdf) {
     174    // 2-byte utf-8
     175    unsigned char c2;
     176    // two byte character
     177    if (!filein.eof()) filein.get(c2);
     178    c = ((c1 & 0x1f) << 6) + (c2 & 0x3f);
     179      } else if (c1 >= 0xe0 && c1 <= 0xef) {
     180    // 3-byte character
     181    unsigned char c2, c3;
     182    if (!filein.eof()) filein.get(c2);
     183    if (!filein.eof()) filein.get(c3);
     184    c = ((c1 & 0xf) << 12) + ((c2 & 0x3f) << 6) + (c3 & 0x3f);
     185      } // we don't do group2/plane0 (4,5,6-byte utf-8)
     186
     187      curvalue.push_back(c); // 16bit unicode
     188      if (inquote)
     189    preceding = c1;
     190
     191      filein.get(c1);
    158192    }
    159193    // we now have a token or a phrase
    160194   
    161195    // see if we've reached the end of the line
    162     if (c == '\n' || c == '\r') {
     196    if (c1 == '\n' || c1 == '\r') {
    163197      if (curvalue != "\\") { // the line DOESN'T continue. End of line.
    164198    values.push_back(curvalue);
    165     break;
     199    break; // end of token/phrase
    166200      } else {
    167201    // swallow up the EOL chars
    168     while (!filein.eof() && (c=='\r' || c=='\n')) filein.get(c);
     202    while (!filein.eof() && (c1=='\r' || c1=='\n')) filein.get(c1);
     203    // the current token "\\" will be cleared below
    169204      }
    170205    } else { // no new line seen
     
    175210
    176211    // remove whitespace (but not newline/CR chars) before next token
    177     while (!filein.eof() && (c==' ' || c=='\t')) filein.get(c);
     212    while (!filein.eof() && (c1==' ' || c1=='\t')) filein.get(c1);
    178213
    179214  } // while(1)
Note: See TracChangeset for help on using the changeset viewer.