Changeset 477


Ignore:
Timestamp:
1999-08-31T19:59:11+12:00 (25 years ago)
Author:
rjmcnab
Message:

Generalised spaces to unicode spaces, added ability to automatically detect
whether the input file is Unicode or UTF-8 and read the file in the
appropriate way, and improved the error messages slightly.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/lib/display.cpp

    r415 r477  
    1212/*
    1313   $Log$
     14   Revision 1.13  1999/08/31 07:59:11  rjmcnab
     15   Generalised spaces to unicode spaces, added ability to automatically detect
     16   whether the input file is Unicode or UTF-8 and read the file in the
     17   appropriate way, and improved the error messages slightly.
     18
    1419   Revision 1.12  1999/07/21 20:46:12  rjmcnab
    1520   fixed small bug
     
    5863#include "display.h"
    5964#include "gsdlunicode.h"
     65#include "unitool.h"
    6066#include <assert.h>
    6167
     
    576582/////////////////////////////////////
    577583
    578 inline int my_isspace (char ch)
    579 {
    580   unsigned char c = ch;
    581   return (((c > 0) && (c <= 31)) || (c == ' '));
    582 }
    583 
    584 
    585 inline int my_isalpha (char c)
     584inline int my_isalpha (unsigned short c)
    586585{
    587586  return ((c >= 'A' && c <= 'Z') ||
     
    589588}
    590589
    591 
    592 // as we are using one character lookahead the
    593 // value of line might be off by one.
    594 inline char my_get (istream &fin, int &line)
    595 {
    596   char c;
    597   fin.get(c);
    598   if (c == '\n') line++;
    599   return c;
    600 }
    601590
    602591
     
    657646
    658647
     648// as we are using one character lookahead the
     649// value of line might be off by one.
     650// the input file must be in the utf-8 or unicode format
     651// initially for each file isunicode should be set to 0 and
     652// bigendian should be set to 1
     653// 0 will be returned when the end of the file has been found
     654unsigned short my_uni_get (istream &fin, int &line,
     655               int &isunicode, int &bigendian) {
     656  unsigned short c = 0;
     657 
     658  if (isunicode) {
     659    // unicode text
     660    // get the next two characters
     661    unsigned char c1 = 0, c2 = 0;
     662    if (!fin.eof()) fin.get(c1);
     663    if (!fin.eof()) fin.get(c2);
     664    else c1 = 0;
     665
     666    // if they indicate the order get the next character
     667    // otherwise just get these characters
     668    if (c1 == 0xff && c2 == 0xfe) {
     669      bigendian = 0;
     670      c = my_uni_get (fin, line, isunicode, bigendian);
     671    } else if (c1 == 0xfe && c2 == 0xff) {
     672      bigendian = 1;
     673      c = my_uni_get (fin, line, isunicode, bigendian);
     674    } else c = (bigendian) ? (c1*256+c2) : (c2*256+c1);
     675   
     676  } else {
     677    // utf-8 text
     678    // how many characters we get depends on what we find
     679    unsigned char c1 = 0, c2 = 0, c3 = 0;
     680    while (!fin.eof()) {
     681      fin.get(c1);
     682      if (c1 == 0xfe || c1 == 0xff) {
     683    // switch to unicode
     684    isunicode = 1;
     685    if (!fin.eof()) fin.get(c2);
     686
     687    if (c1 == 0xff && c2 == 0xfe) bigendian = 0;
     688    else bigendian = 1;
     689
     690    c = my_uni_get (fin, line, isunicode, bigendian);
     691    break;
     692
     693      } else if (c1 <= 0x7f) {
     694    // one byte character
     695    c = c1;
     696    break;
     697
     698      } else if (c1 >= 0xc0 && c1 <= 0xdf) {
     699    // two byte character
     700    if (!fin.eof()) fin.get(c2);
     701    c = ((c1 & 0x1f) << 6) + (c2 & 0x3f);
     702    break;
     703
     704      } else if (c1 >= 0xe0 && c1 <= 0xef) {
     705    // three byte character
     706    if (!fin.eof()) fin.get(c2);
     707    if (!fin.eof()) fin.get(c3);
     708    c = ((c1 & 0xf) << 12) + ((c2 & 0x3f) << 6) + (c3 & 0x3f);
     709    break;
     710      }
     711
     712      // if we get here there was an error in the file, we should
     713      // be able to recover from it however, maybe the file is in
     714      // another encoding
     715    }
     716  }
     717
     718  if (c == '\n') line++;
     719  return c;
     720}
     721
     722
     723
    659724// loads a default macro file (if it isn't already loaded)
    660725// returns 0 if didn't need to load the file (it was already loaded)
    661726//         1 if was (re)loaded
    662727//        -1 an error occurred while trying to load the file
    663 int displayclass::loaddefaultmacros (text_t thisfilename)
    664 {
     728int displayclass::loaddefaultmacros (text_t thisfilename) {
    665729  // convert the filename to a C string
    666730  char *filenamestr = thisfilename.getcstr();
     
    674738
    675739  text_t package = "Global";
    676   int line  = 1;
    677   char c = my_get(fin, line); // pre-fetch the next character
     740  int line = 1;
     741  int isunicode = 0, bigendian = 1;
     742
     743  // pre-fetch the next character
     744  unsigned short c = my_uni_get(fin, line, isunicode, bigendian);
    678745
    679746  text_t macropackage, macroname, macroparameters, macrovalue;
    680747  int err; // for keeping track of whether an error occurred somewhere
    681748
    682   while (!fin.eof())
    683     {
    684       // expect: white space, comment, "package", or macroname
    685       if (my_isspace(c))
    686     {
    687       // found some white-space
    688       c = my_get(fin, line);
    689     }
    690       else if (c == '#')
    691     {
    692       // found the start of a comment
    693       // skip all characters up to the end of the line
    694       c = my_get(fin, line); // skip the '#'
    695       while (!fin.eof ())
    696         {
    697           if (c == '\n') break;
    698           c = my_get(fin, line);
    699         }
    700 
     749  while (!fin.eof()) {
     750    // expect: white space, comment, "package", or macroname
     751    if (is_unicode_space(c)) {
     752      // found some white-space
     753      c = my_uni_get(fin, line, isunicode, bigendian);
     754
     755    } else if (c == '#') {
     756      // found the start of a comment
     757      // skip all characters up to the end of the line
     758      c = my_uni_get(fin, line, isunicode, bigendian); // skip the '#'
     759      while (!fin.eof ()) {
     760    if (c == '\n') break;
     761    c = my_uni_get(fin, line, isunicode, bigendian);
     762      }
     763
     764    } else if (c == 'p') {
     765      // found the start of 'package' (hopefully)
     766      // get everything up to the next space
     767      text_t tmp;
     768      while (!fin.eof() && my_isalpha(c)) {
     769    tmp.push_back(c);
     770    c = my_uni_get(fin, line, isunicode, bigendian);
     771      }
     772      // see if we have a package name
     773      if (tmp == "package") {
     774    // skip all white space
     775    while (!fin.eof() && is_unicode_space(c))
     776      c = my_uni_get(fin, line, isunicode, bigendian);
     777   
     778    // get the package name
     779    tmp.clear(); // init tmp
     780    while (!fin.eof() && my_isalpha(c)) {
     781      tmp.push_back(c);
     782      c = my_uni_get(fin, line, isunicode, bigendian);
     783    }
     784    package = tmp;
     785    if (package.empty()) package = "Global";
     786
     787      } else {
     788    // error
     789    if (logout != NULL) {
     790      (*logout) << text_t2ascii << "Expected 'package' on line " << line
     791            << " of " << thisfilename << "\n";
     792    }
     793      }
     794     
     795    } else if (c == '_') {
     796      // found the start of a macro (hopefully)
     797      c = my_uni_get(fin, line, isunicode, bigendian); // skip the _
     798
     799      // init variables
     800      err = 0;
     801      macropackage = package;
     802      macroname.clear(); // init macroname
     803      macroparameters.clear(); // init macroname
     804      macrovalue.clear(); // init macroname
     805     
     806      // get the macro name
     807      while ((!fin.eof()) && (!is_unicode_space(c)) &&
     808         (c != '\\') && (c != '_') &&(c != ':') &&
     809         (macroname.size() < 80)) {
     810    macroname.push_back(c);
     811    c = my_uni_get(fin, line, isunicode, bigendian);
     812      }
     813     
     814      if (c == ':') {
     815    // we actually had the macro package
     816    c = my_uni_get(fin, line, isunicode, bigendian); // skip :
     817    macropackage = macroname;
     818    macroname.clear ();
     819   
     820    // get the macro name (honest!)
     821    while ((!fin.eof()) && (!is_unicode_space(c)) &&
     822           (c != '\\') && (c != '_') &&(c != ':') &&
     823           (macroname.size() < 80)) {
     824      macroname.push_back(c);
     825      c = my_uni_get(fin, line, isunicode, bigendian);
     826    }
     827      }
     828     
     829      if (!err && c == '_') {
     830    c = my_uni_get(fin, line, isunicode, bigendian); // skip the _
     831   
     832    // skip all white space
     833    while (!fin.eof() && is_unicode_space(c)) c = my_uni_get(fin, line, isunicode, bigendian);
     834      } else if (!err) err = 1;
     835
     836      // get the macro parameters (optional)
     837      if (!err && c == '[') {
     838    c = my_uni_get(fin, line, isunicode, bigendian); // skip the [
     839    while ((!fin.eof()) && (c != '\n') && (c != '\\') && (c != ']')) {
     840      macroparameters.push_back(c);
     841      c = my_uni_get(fin, line, isunicode, bigendian);
     842    }
     843   
     844    if (c == ']') {
     845      c = my_uni_get(fin, line, isunicode, bigendian); // skip the ]
     846     
     847      // skip all white space
     848      while (!fin.eof() && is_unicode_space(c)) c = my_uni_get(fin, line, isunicode, bigendian);
    701849    }
    702       else if (c == 'p')
    703     {
    704       // found the start of 'package' (hopefully)
    705       // get everything up to the next space
    706       text_t tmp;
    707       while (!fin.eof() && my_isalpha(c))
    708         {
    709           tmp.push_back((unsigned char)c);
    710           c = my_get(fin, line);
    711         }
    712       // see if we have a package name
    713       if (tmp == "package")
    714         {
    715           // skip all white space
    716           while (!fin.eof() && my_isspace(c))
    717         c = my_get(fin, line);
    718 
    719           // get the package name
    720           tmp.clear(); // init tmp
    721           while (!fin.eof() && my_isalpha(c))
    722         {
    723           tmp.push_back((unsigned char)c);
    724           c = my_get(fin, line);
    725         }
    726           package = to_uni(tmp); // convert from utf-8 to unicode
    727           if (package.empty()) package = "Global";
    728 
    729         }
    730       else
    731         {
    732           // error
    733           if (logout != NULL) {
    734         (*logout) << "Expected 'package' on line " << line << "\n";
    735           }
    736         }
    737 
    738     }
    739       else if (c == '_')
    740     {
    741       // found the start of a macro (hopefully)
    742       c = my_get(fin, line); // skip the _
    743 
    744       // init variables
    745       err = 0;
    746       macropackage = package;
    747       macroname.clear(); // init macroname
    748       macroparameters.clear(); // init macroname
    749       macrovalue.clear(); // init macroname
    750 
    751       // get the macro name
    752       while ((!fin.eof()) && (!my_isspace(c)) &&
    753          (c != '\\') && (c != '_') &&(c != ':') &&
    754          (macroname.size() < 80))
    755         {
    756           macroname.push_back((unsigned char)c);
    757           c = my_get(fin, line);
    758         }
    759       macroname = to_uni(macroname); // convert from utf-8 to unicode
     850    else if (!err) err = 2;
     851      }
     852
     853      // get the macro value
     854      if (!err && c == '{') {
     855    c = my_uni_get(fin, line, isunicode, bigendian); // skip the {
     856    while ((!fin.eof()) && (c != '}')) {
     857      if (c == '\\') {
     858        macrovalue.push_back(c); // keep the '\'
     859        c = my_uni_get(fin, line, isunicode, bigendian); // store the *next* value regardless
     860        if (!fin.eof()) macrovalue.push_back(c);
     861        c = my_uni_get(fin, line, isunicode, bigendian);
     862      }
     863      macrovalue.push_back(c);
     864      c = my_uni_get(fin, line, isunicode, bigendian);
     865    }
     866   
     867    if (c == '}') {
     868      c = my_uni_get(fin, line, isunicode, bigendian); // skip the }
    760869     
    761       if (c == ':')
    762         {
    763           // we actually had the macro package
    764           c = my_get(fin, line); // skip :
    765           macropackage = macroname;
    766           macroname.clear ();
    767 
    768           // get the macro name (honest!)
    769           while ((!fin.eof()) && (!my_isspace(c)) &&
    770              (c != '\\') && (c != '_') &&(c != ':') &&
    771              (macroname.size() < 80))
    772         {
    773           macroname.push_back((unsigned char)c);
    774           c = my_get(fin, line);
    775         }
    776           macroname = to_uni(macroname); // convert from utf-8 to unicode
    777         }
    778 
    779       if (!err && c == '_') {
    780         c = my_get(fin, line); // skip the _
     870      // define the macro
     871      err = setdefaultmacro (macropackage, macroname, macroparameters,
     872                 thisfilename, macrovalue);
     873      if ((err == -1 || err == -3) && logout != NULL) {
     874        (*logout) << text_t2ascii << "Warning: redefinition of _" <<
     875          package << ":" << macroname << "_[" << macroparameters <<
     876          "] on line ";
     877        (*logout) << line;
     878        (*logout) << text_t2ascii << " of " << thisfilename << "\n";
    781879       
    782         // skip all white space
    783         while (!fin.eof() && my_isspace(c)) c = my_get(fin, line);
    784       } else if (!err) err = 1;
    785 
    786       // get the macro parameters (optional)
    787       if (!err && c == '[')
    788         {
    789           c = my_get(fin, line); // skip the [
    790           while ((!fin.eof()) && (c != '\n') && (c != '\\') && (c != ']'))
    791         {
    792           macroparameters.push_back((unsigned char)c);
    793           c = my_get(fin, line);
    794         }
    795           macroparameters = to_uni(macroparameters);
    796 
    797           if (c == ']')
    798         {
    799           c = my_get(fin, line); // skip the ]
     880      } else if (err == -2 && logout != NULL) {
     881        (*logout) << text_t2ascii << "Warning: _" <<
     882          package << ":" << macroname << "_[" << macroparameters <<
     883          "] on line ";
     884        (*logout) << line;
     885        (*logout) << text_t2ascii << " of " <<
     886          thisfilename << " hides a Global macro with the same name\n";
     887
     888      } else if (err == -4 && logout != NULL) {
     889        (*logout) << text_t2ascii << "Error: macro name expected on line ";
     890        (*logout) << line ;
     891        (*logout) << text_t2ascii << " of " << thisfilename << "\n";
     892      }       
    800893     
    801           // skip all white space
    802           while (!fin.eof() && my_isspace(c)) c = my_get(fin, line);
    803         }
    804           else if (!err) err = 2;
    805         }
    806 
    807       // get the macro value
    808       if (!err && c == '{')
    809         {
    810           c = my_get(fin, line); // skip the {
    811           while ((!fin.eof()) && (c != '}'))
    812         {
    813           if (c == '\\')
    814             {
    815               macrovalue.push_back((unsigned char)c); // keep the '\'
    816               c = my_get(fin, line); // store the *next* value regardless
    817               if (!fin.eof()) macrovalue.push_back((unsigned char)c);
    818               c = my_get(fin, line);
    819             }
    820           macrovalue.push_back((unsigned char)c);
    821           c = my_get(fin, line);
    822         }
    823           macrovalue = to_uni(macrovalue);
    824 
    825           if (c == '}')
    826         {
    827           c = my_get(fin, line); // skip the }
    828      
    829           // define the macro
    830           err = setdefaultmacro (macropackage, macroname, macroparameters,
    831                      thisfilename, macrovalue);
    832           if ((err == -1 || err == -3) && logout != NULL)
    833             {
    834               (*logout) << text_t2ascii << "Warning: redefinition of _" <<
    835             package << ":" << macroname << "_[" << macroparameters <<
    836             "] on line ";
    837               (*logout) << line;
    838               (*logout) << text_t2ascii << " of " << thisfilename << "\n";
    839        
    840             }
    841           else if (err == -2 && logout != NULL)
    842             {
    843               (*logout) << text_t2ascii << "Warning: _" <<
    844             package << ":" << macroname << "_[" << macroparameters <<
    845             "] on line ";
    846               (*logout) << line;
    847               (*logout) << text_t2ascii << " of " <<
    848             thisfilename << " hides a Global macro with the same name\n";
    849             }
    850           else if (err == -4 && logout != NULL)
    851             {
    852               (*logout) << text_t2ascii << "Error: macro name expected on line ";
    853               (*logout) << line ;
    854               (*logout) << text_t2ascii << " of " << thisfilename << "\n";
    855             }         
    856      
    857           err = 0; // for the test below
    858         }
    859           else if (!err) err = 3;
    860         }
    861       else if (!err) err = 4;
    862 
    863       if (err)
    864         {
    865           // found an error, skip to the end of the line
    866           if (logout != NULL) {
    867         (*logout) << text_t2ascii << "Error: ";
    868         if (err == 1) (*logout) << text_t2ascii << "'_'";
    869         else if (err == 2) (*logout) << text_t2ascii << "']'";
    870         else if (err == 3) (*logout) << text_t2ascii << "'}'";
    871         else if (err == 4) (*logout) << text_t2ascii << "'{'";
    872         (*logout) << text_t2ascii << " expected on line ";
    873         (*logout) << line ;
    874         (*logout) << text_t2ascii << " of " << thisfilename << "\n";
    875           }
    876           while (!fin.eof ())
    877         {
    878           if (c == '\n') break;
    879           c = my_get(fin, line);
    880         }
    881         }
    882 
    883     }
    884       else
    885     {
    886       // found an error, skip to the end of the line
    887       if (logout != NULL) {
    888         (*logout) << "Error: Unexpected input on line " << line << "\n";
    889       }
    890       while (!fin.eof ())
    891         {
    892           if (c == '\n') break;
    893           c = my_get(fin, line);
    894         }
    895      
    896     }
    897     }
     894      err = 0; // for the test below
     895    }
     896    else if (!err) err = 3;
     897      }
     898      else if (!err) err = 4;
     899     
     900      if (err) {
     901    // found an error, skip to the end of the line
     902    if (logout != NULL) {
     903      (*logout) << text_t2ascii << "Error: ";
     904      if (err == 1) (*logout) << text_t2ascii << "'_'";
     905      else if (err == 2) (*logout) << text_t2ascii << "']'";
     906      else if (err == 3) (*logout) << text_t2ascii << "'}'";
     907      else if (err == 4) (*logout) << text_t2ascii << "'{'";
     908      (*logout) << text_t2ascii << " expected on line ";
     909      (*logout) << line ;
     910      (*logout) << text_t2ascii << " of " << thisfilename << "\n";
     911    }
     912    while (!fin.eof ()) {
     913      if (c == '\n') break;
     914      c = my_uni_get(fin, line, isunicode, bigendian);
     915    }
     916      }
     917     
     918    } else {
     919      // found an error, skip to the end of the line
     920      if (logout != NULL) {
     921    (*logout) << text_t2ascii << "Error: Unexpected input on line " << line
     922          << " of " << thisfilename << "\n";
     923      }
     924      while (!fin.eof ()) {
     925    if (c == '\n') break;
     926    c = my_uni_get(fin, line, isunicode, bigendian);
     927      }
     928     
     929    }
     930  }
    898931 
    899932  fin.close ();
     
    10391072     
    10401073      // get the macroname
    1041       while (tthere != ttend && (!my_isspace(c)) &&
     1074      while (tthere != ttend && (!is_unicode_space(c)) &&
    10421075         (c != '\\') && (c != '_') &&(c != ':') &&
    10431076         (macroname.size() < 80))
     
    10551088   
    10561089          // get the macro name (honest!)
    1057           while ((tthere != ttend) && (!my_isspace(c)) &&
     1090          while ((tthere != ttend) && (!is_unicode_space(c)) &&
    10581091             (c != '\\') && (c != '_') &&(c != ':') &&
    10591092             (macroname.size() < 80))
     
    12651298      if (*here == '"') quotecount++;
    12661299      else if (quotecount == 1) string1.push_back(*here);
    1267       else if ((quotecount == 2) && (*here != ' ') && (*here != '\n'))
     1300      else if ((quotecount == 2) && !is_unicode_space (*here))
    12681301    op.push_back(*here);
    12691302      else if (quotecount == 3) string2.push_back(*here);
     
    12981331    combineop.clear();
    12991332    while (here != end && *here != '"') {
    1300       if ((*here != ' ') && (*here != '\n')) combineop.push_back(*here);
     1333      if (!is_unicode_space(*here)) combineop.push_back(*here);
    13011334      here++;
    13021335    }
     
    13461379     
    13471380      // ignore initial whitespace
    1348       while ((hereit!=endit)&&my_isspace(c)) c=my_ttnextchar(hereit,endit);
     1381      while ((hereit!=endit)&&is_unicode_space(c)) c=my_ttnextchar(hereit,endit);
    13491382     
    13501383      // look for the end of the parameter
Note: See TracChangeset for help on using the changeset viewer.