Context Navigation

← Previous Changeset
Next Changeset →

Changeset 7705

Timestamp:

2004-07-06T15:36:50+12:00 (20 years ago)

Author:

jrm21

Message:

can now read in utf-8 encoded non-ascii chars from main.cfg file.

File:

: 1 edited

trunk/gsdl/lib/cfgread.cpp (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/lib/cfgread.cpp

-              r3528
+              r7705
   text_t curvalue;
   char c;
   filein.get(c);
+  unsigned char c1;
+  filein.get(c1);
   // skip white space
   while (!filein.eof() && isspace(c)) { filein.get(c); }
+  while (!filein.eof() && isspace(c1)) { filein.get(c1); }
     // ignore comments
   while (c == '#') {
     while (!filein.eof() && c!='\n' && c!='\r') { filein.get(c); }
+  while (c1 == '#') {
+    while (!filein.eof() && c1!='\n' && c1!='\r') { filein.get(c1); }
     // skip white space...
     while (!filein.eof() && isspace(c)) { filein.get(c); }
+    while (!filein.eof() && isspace(c1)) { filein.get(c1); }
+  }
   // deal with all the records on this line (possibly multi-line)
   while (!filein.eof()) {
     if (c=='\n' || c=='\r') { // shouldn't happen?
+    if (c1=='\n' || c1=='\r') { // shouldn't happen?
       break;
+    }
 …
     curvalue.clear();
+      // see if this is a quoted phrase
+    if (c=='\'' || c=='\"') { // starts with a quote
+      char quote, old_c;
+      quote = c;
+      old_c = c;
+      filein.get(c);
+      while (!filein.eof() && (c != quote || old_c == '\\') ) {
+    /* Turn eol into space, in case other parsing bits expect eol to
+       also mean end of parsing... */
+    if (c=='\r' || c=='\n') c=' ';
+    curvalue.push_back(c);
+    old_c = c;
+    filein.get(c);
+    bool inquote=false;
+    unsigned char quotemark='"';
+    unsigned char preceding; // 1-char state to allow \" and \'
+    // see if this is a quoted phrase
+    if (c1=='\'' || c1=='\"') { // starts with a quote
+      inquote=true;
+      quotemark = c1;
+      preceding = c1; // just to initialise
+      filein.get(c1);
+    }
+    // get token or a whole phrase
+    while (!filein.eof()) {
+      if (isspace(c1)) {
+    if (! inquote) {
+      // end of token, not inside quote marks
+      break;
+    } else {
+      // inside quote marks.
+      /* Turn eol into space, in case other parsing bits expect eol to
+         also mean end of parsing... */
+      c1=' ';
+    }
+      }
+      // get the character after the closing quote...
+      filein.get(c);
+    } else { // it's not a quoted phrase
+      // get the token
+      while (!filein.eof() && !isspace(c)) {
+    curvalue.push_back(c);
+    filein.get(c);
+      if (c1 == quotemark && inquote && preceding != '\\') {
+    // end of quoted phrase found
+    inquote=false;
+    filein.get(c1);
+    continue;
+      }
+      // add current char to token/phrase
+      // see if current byte is part of a multibyte char (utf-8 only!)
+      unsigned short int c; // text_t uses 16bit unicode
+      if (c1 < 0x80) {
+    c=c1;
+      } else if (c1 >= 0xc0 && c1 <= 0xdf) {
+    // 2-byte utf-8
+    unsigned char c2;
+    // two byte character
+    if (!filein.eof()) filein.get(c2);
+    c = ((c1 & 0x1f) << 6) + (c2 & 0x3f);
+      } else if (c1 >= 0xe0 && c1 <= 0xef) {
+    // 3-byte character
+    unsigned char c2, c3;
+    if (!filein.eof()) filein.get(c2);
+    if (!filein.eof()) filein.get(c3);
+    c = ((c1 & 0xf) << 12) + ((c2 & 0x3f) << 6) + (c3 & 0x3f);
+      } // we don't do group2/plane0 (4,5,6-byte utf-8)
+      curvalue.push_back(c); // 16bit unicode
+      if (inquote)
+    preceding = c1;
+      filein.get(c1);
+    }
     // we now have a token or a phrase
     // see if we've reached the end of the line
     if (c == '\n' || c == '\r') {
+    if (c1 == '\n' || c1 == '\r') {
       if (curvalue != "\\") { // the line DOESN'T continue. End of line.
     values.push_back(curvalue);
     break;
+    break; // end of token/phrase
       } else {
     // swallow up the EOL chars
+    while (!filein.eof() && (c=='\r' || c=='\n')) filein.get(c);
+    while (!filein.eof() && (c1=='\r' || c1=='\n')) filein.get(c1);
+    // the current token "\\" will be cleared below
+      }
     } else { // no new line seen
 …
     // remove whitespace (but not newline/CR chars) before next token
     while (!filein.eof() && (c==' ' || c=='\t')) filein.get(c);
+    while (!filein.eof() && (c1==' ' || c1=='\t')) filein.get(c1);
   } // while(1)

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 7705

Legend:

trunk/gsdl/lib/cfgread.cpp

Download in other formats: