Changeset 477
- Timestamp:
- 1999-08-31T19:59:11+12:00 (25 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/lib/display.cpp
r415 r477 12 12 /* 13 13 $Log$ 14 Revision 1.13 1999/08/31 07:59:11 rjmcnab 15 Generalised spaces to unicode spaces, added ability to automatically detect 16 whether the input file is Unicode or UTF-8 and read the file in the 17 appropriate way, and improved the error messages slightly. 18 14 19 Revision 1.12 1999/07/21 20:46:12 rjmcnab 15 20 fixed small bug … … 58 63 #include "display.h" 59 64 #include "gsdlunicode.h" 65 #include "unitool.h" 60 66 #include <assert.h> 61 67 … … 576 582 ///////////////////////////////////// 577 583 578 inline int my_isspace (char ch) 579 { 580 unsigned char c = ch; 581 return (((c > 0) && (c <= 31)) || (c == ' ')); 582 } 583 584 585 inline int my_isalpha (char c) 584 inline int my_isalpha (unsigned short c) 586 585 { 587 586 return ((c >= 'A' && c <= 'Z') || … … 589 588 } 590 589 591 592 // as we are using one character lookahead the593 // value of line might be off by one.594 inline char my_get (istream &fin, int &line)595 {596 char c;597 fin.get(c);598 if (c == '\n') line++;599 return c;600 }601 590 602 591 … … 657 646 658 647 648 // as we are using one character lookahead the 649 // value of line might be off by one. 650 // the input file must be in the utf-8 or unicode format 651 // initially for each file isunicode should be set to 0 and 652 // bigendian should be set to 1 653 // 0 will be returned when the end of the file has been found 654 unsigned short my_uni_get (istream &fin, int &line, 655 int &isunicode, int &bigendian) { 656 unsigned short c = 0; 657 658 if (isunicode) { 659 // unicode text 660 // get the next two characters 661 unsigned char c1 = 0, c2 = 0; 662 if (!fin.eof()) fin.get(c1); 663 if (!fin.eof()) fin.get(c2); 664 else c1 = 0; 665 666 // if they indicate the order get the next character 667 // otherwise just get these characters 668 if (c1 == 0xff && c2 == 0xfe) { 669 bigendian = 0; 670 c = my_uni_get (fin, line, isunicode, bigendian); 671 } else if (c1 == 0xfe && c2 == 0xff) { 672 bigendian = 1; 673 c = my_uni_get (fin, line, isunicode, bigendian); 674 } else c = (bigendian) ? (c1*256+c2) : (c2*256+c1); 675 676 } else { 677 // utf-8 text 678 // how many characters we get depends on what we find 679 unsigned char c1 = 0, c2 = 0, c3 = 0; 680 while (!fin.eof()) { 681 fin.get(c1); 682 if (c1 == 0xfe || c1 == 0xff) { 683 // switch to unicode 684 isunicode = 1; 685 if (!fin.eof()) fin.get(c2); 686 687 if (c1 == 0xff && c2 == 0xfe) bigendian = 0; 688 else bigendian = 1; 689 690 c = my_uni_get (fin, line, isunicode, bigendian); 691 break; 692 693 } else if (c1 <= 0x7f) { 694 // one byte character 695 c = c1; 696 break; 697 698 } else if (c1 >= 0xc0 && c1 <= 0xdf) { 699 // two byte character 700 if (!fin.eof()) fin.get(c2); 701 c = ((c1 & 0x1f) << 6) + (c2 & 0x3f); 702 break; 703 704 } else if (c1 >= 0xe0 && c1 <= 0xef) { 705 // three byte character 706 if (!fin.eof()) fin.get(c2); 707 if (!fin.eof()) fin.get(c3); 708 c = ((c1 & 0xf) << 12) + ((c2 & 0x3f) << 6) + (c3 & 0x3f); 709 break; 710 } 711 712 // if we get here there was an error in the file, we should 713 // be able to recover from it however, maybe the file is in 714 // another encoding 715 } 716 } 717 718 if (c == '\n') line++; 719 return c; 720 } 721 722 723 659 724 // loads a default macro file (if it isn't already loaded) 660 725 // returns 0 if didn't need to load the file (it was already loaded) 661 726 // 1 if was (re)loaded 662 727 // -1 an error occurred while trying to load the file 663 int displayclass::loaddefaultmacros (text_t thisfilename) 664 { 728 int displayclass::loaddefaultmacros (text_t thisfilename) { 665 729 // convert the filename to a C string 666 730 char *filenamestr = thisfilename.getcstr(); … … 674 738 675 739 text_t package = "Global"; 676 int line = 1; 677 char c = my_get(fin, line); // pre-fetch the next character 740 int line = 1; 741 int isunicode = 0, bigendian = 1; 742 743 // pre-fetch the next character 744 unsigned short c = my_uni_get(fin, line, isunicode, bigendian); 678 745 679 746 text_t macropackage, macroname, macroparameters, macrovalue; 680 747 int err; // for keeping track of whether an error occurred somewhere 681 748 682 while (!fin.eof()) 683 { 684 // expect: white space, comment, "package", or macroname 685 if (my_isspace(c)) 686 { 687 // found some white-space 688 c = my_get(fin, line); 689 } 690 else if (c == '#') 691 { 692 // found the start of a comment 693 // skip all characters up to the end of the line 694 c = my_get(fin, line); // skip the '#' 695 while (!fin.eof ()) 696 { 697 if (c == '\n') break; 698 c = my_get(fin, line); 699 } 700 749 while (!fin.eof()) { 750 // expect: white space, comment, "package", or macroname 751 if (is_unicode_space(c)) { 752 // found some white-space 753 c = my_uni_get(fin, line, isunicode, bigendian); 754 755 } else if (c == '#') { 756 // found the start of a comment 757 // skip all characters up to the end of the line 758 c = my_uni_get(fin, line, isunicode, bigendian); // skip the '#' 759 while (!fin.eof ()) { 760 if (c == '\n') break; 761 c = my_uni_get(fin, line, isunicode, bigendian); 762 } 763 764 } else if (c == 'p') { 765 // found the start of 'package' (hopefully) 766 // get everything up to the next space 767 text_t tmp; 768 while (!fin.eof() && my_isalpha(c)) { 769 tmp.push_back(c); 770 c = my_uni_get(fin, line, isunicode, bigendian); 771 } 772 // see if we have a package name 773 if (tmp == "package") { 774 // skip all white space 775 while (!fin.eof() && is_unicode_space(c)) 776 c = my_uni_get(fin, line, isunicode, bigendian); 777 778 // get the package name 779 tmp.clear(); // init tmp 780 while (!fin.eof() && my_isalpha(c)) { 781 tmp.push_back(c); 782 c = my_uni_get(fin, line, isunicode, bigendian); 783 } 784 package = tmp; 785 if (package.empty()) package = "Global"; 786 787 } else { 788 // error 789 if (logout != NULL) { 790 (*logout) << text_t2ascii << "Expected 'package' on line " << line 791 << " of " << thisfilename << "\n"; 792 } 793 } 794 795 } else if (c == '_') { 796 // found the start of a macro (hopefully) 797 c = my_uni_get(fin, line, isunicode, bigendian); // skip the _ 798 799 // init variables 800 err = 0; 801 macropackage = package; 802 macroname.clear(); // init macroname 803 macroparameters.clear(); // init macroname 804 macrovalue.clear(); // init macroname 805 806 // get the macro name 807 while ((!fin.eof()) && (!is_unicode_space(c)) && 808 (c != '\\') && (c != '_') &&(c != ':') && 809 (macroname.size() < 80)) { 810 macroname.push_back(c); 811 c = my_uni_get(fin, line, isunicode, bigendian); 812 } 813 814 if (c == ':') { 815 // we actually had the macro package 816 c = my_uni_get(fin, line, isunicode, bigendian); // skip : 817 macropackage = macroname; 818 macroname.clear (); 819 820 // get the macro name (honest!) 821 while ((!fin.eof()) && (!is_unicode_space(c)) && 822 (c != '\\') && (c != '_') &&(c != ':') && 823 (macroname.size() < 80)) { 824 macroname.push_back(c); 825 c = my_uni_get(fin, line, isunicode, bigendian); 826 } 827 } 828 829 if (!err && c == '_') { 830 c = my_uni_get(fin, line, isunicode, bigendian); // skip the _ 831 832 // skip all white space 833 while (!fin.eof() && is_unicode_space(c)) c = my_uni_get(fin, line, isunicode, bigendian); 834 } else if (!err) err = 1; 835 836 // get the macro parameters (optional) 837 if (!err && c == '[') { 838 c = my_uni_get(fin, line, isunicode, bigendian); // skip the [ 839 while ((!fin.eof()) && (c != '\n') && (c != '\\') && (c != ']')) { 840 macroparameters.push_back(c); 841 c = my_uni_get(fin, line, isunicode, bigendian); 842 } 843 844 if (c == ']') { 845 c = my_uni_get(fin, line, isunicode, bigendian); // skip the ] 846 847 // skip all white space 848 while (!fin.eof() && is_unicode_space(c)) c = my_uni_get(fin, line, isunicode, bigendian); 701 849 } 702 else if (c == 'p') 703 { 704 // found the start of 'package' (hopefully) 705 // get everything up to the next space 706 text_t tmp; 707 while (!fin.eof() && my_isalpha(c)) 708 { 709 tmp.push_back((unsigned char)c); 710 c = my_get(fin, line); 711 } 712 // see if we have a package name 713 if (tmp == "package") 714 { 715 // skip all white space 716 while (!fin.eof() && my_isspace(c)) 717 c = my_get(fin, line); 718 719 // get the package name 720 tmp.clear(); // init tmp 721 while (!fin.eof() && my_isalpha(c)) 722 { 723 tmp.push_back((unsigned char)c); 724 c = my_get(fin, line); 725 } 726 package = to_uni(tmp); // convert from utf-8 to unicode 727 if (package.empty()) package = "Global"; 728 729 } 730 else 731 { 732 // error 733 if (logout != NULL) { 734 (*logout) << "Expected 'package' on line " << line << "\n"; 735 } 736 } 737 738 } 739 else if (c == '_') 740 { 741 // found the start of a macro (hopefully) 742 c = my_get(fin, line); // skip the _ 743 744 // init variables 745 err = 0; 746 macropackage = package; 747 macroname.clear(); // init macroname 748 macroparameters.clear(); // init macroname 749 macrovalue.clear(); // init macroname 750 751 // get the macro name 752 while ((!fin.eof()) && (!my_isspace(c)) && 753 (c != '\\') && (c != '_') &&(c != ':') && 754 (macroname.size() < 80)) 755 { 756 macroname.push_back((unsigned char)c); 757 c = my_get(fin, line); 758 } 759 macroname = to_uni(macroname); // convert from utf-8 to unicode 850 else if (!err) err = 2; 851 } 852 853 // get the macro value 854 if (!err && c == '{') { 855 c = my_uni_get(fin, line, isunicode, bigendian); // skip the { 856 while ((!fin.eof()) && (c != '}')) { 857 if (c == '\\') { 858 macrovalue.push_back(c); // keep the '\' 859 c = my_uni_get(fin, line, isunicode, bigendian); // store the *next* value regardless 860 if (!fin.eof()) macrovalue.push_back(c); 861 c = my_uni_get(fin, line, isunicode, bigendian); 862 } 863 macrovalue.push_back(c); 864 c = my_uni_get(fin, line, isunicode, bigendian); 865 } 866 867 if (c == '}') { 868 c = my_uni_get(fin, line, isunicode, bigendian); // skip the } 760 869 761 if (c == ':') 762 { 763 // we actually had the macro package 764 c = my_get(fin, line); // skip : 765 macropackage = macroname; 766 macroname.clear (); 767 768 // get the macro name (honest!) 769 while ((!fin.eof()) && (!my_isspace(c)) && 770 (c != '\\') && (c != '_') &&(c != ':') && 771 (macroname.size() < 80)) 772 { 773 macroname.push_back((unsigned char)c); 774 c = my_get(fin, line); 775 } 776 macroname = to_uni(macroname); // convert from utf-8 to unicode 777 } 778 779 if (!err && c == '_') { 780 c = my_get(fin, line); // skip the _ 870 // define the macro 871 err = setdefaultmacro (macropackage, macroname, macroparameters, 872 thisfilename, macrovalue); 873 if ((err == -1 || err == -3) && logout != NULL) { 874 (*logout) << text_t2ascii << "Warning: redefinition of _" << 875 package << ":" << macroname << "_[" << macroparameters << 876 "] on line "; 877 (*logout) << line; 878 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 781 879 782 // skip all white space 783 while (!fin.eof() && my_isspace(c)) c = my_get(fin, line); 784 } else if (!err) err = 1; 785 786 // get the macro parameters (optional) 787 if (!err && c == '[') 788 { 789 c = my_get(fin, line); // skip the [ 790 while ((!fin.eof()) && (c != '\n') && (c != '\\') && (c != ']')) 791 { 792 macroparameters.push_back((unsigned char)c); 793 c = my_get(fin, line); 794 } 795 macroparameters = to_uni(macroparameters); 796 797 if (c == ']') 798 { 799 c = my_get(fin, line); // skip the ] 880 } else if (err == -2 && logout != NULL) { 881 (*logout) << text_t2ascii << "Warning: _" << 882 package << ":" << macroname << "_[" << macroparameters << 883 "] on line "; 884 (*logout) << line; 885 (*logout) << text_t2ascii << " of " << 886 thisfilename << " hides a Global macro with the same name\n"; 887 888 } else if (err == -4 && logout != NULL) { 889 (*logout) << text_t2ascii << "Error: macro name expected on line "; 890 (*logout) << line ; 891 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 892 } 800 893 801 // skip all white space 802 while (!fin.eof() && my_isspace(c)) c = my_get(fin, line); 803 } 804 else if (!err) err = 2; 805 } 806 807 // get the macro value 808 if (!err && c == '{') 809 { 810 c = my_get(fin, line); // skip the { 811 while ((!fin.eof()) && (c != '}')) 812 { 813 if (c == '\\') 814 { 815 macrovalue.push_back((unsigned char)c); // keep the '\' 816 c = my_get(fin, line); // store the *next* value regardless 817 if (!fin.eof()) macrovalue.push_back((unsigned char)c); 818 c = my_get(fin, line); 819 } 820 macrovalue.push_back((unsigned char)c); 821 c = my_get(fin, line); 822 } 823 macrovalue = to_uni(macrovalue); 824 825 if (c == '}') 826 { 827 c = my_get(fin, line); // skip the } 828 829 // define the macro 830 err = setdefaultmacro (macropackage, macroname, macroparameters, 831 thisfilename, macrovalue); 832 if ((err == -1 || err == -3) && logout != NULL) 833 { 834 (*logout) << text_t2ascii << "Warning: redefinition of _" << 835 package << ":" << macroname << "_[" << macroparameters << 836 "] on line "; 837 (*logout) << line; 838 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 839 840 } 841 else if (err == -2 && logout != NULL) 842 { 843 (*logout) << text_t2ascii << "Warning: _" << 844 package << ":" << macroname << "_[" << macroparameters << 845 "] on line "; 846 (*logout) << line; 847 (*logout) << text_t2ascii << " of " << 848 thisfilename << " hides a Global macro with the same name\n"; 849 } 850 else if (err == -4 && logout != NULL) 851 { 852 (*logout) << text_t2ascii << "Error: macro name expected on line "; 853 (*logout) << line ; 854 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 855 } 856 857 err = 0; // for the test below 858 } 859 else if (!err) err = 3; 860 } 861 else if (!err) err = 4; 862 863 if (err) 864 { 865 // found an error, skip to the end of the line 866 if (logout != NULL) { 867 (*logout) << text_t2ascii << "Error: "; 868 if (err == 1) (*logout) << text_t2ascii << "'_'"; 869 else if (err == 2) (*logout) << text_t2ascii << "']'"; 870 else if (err == 3) (*logout) << text_t2ascii << "'}'"; 871 else if (err == 4) (*logout) << text_t2ascii << "'{'"; 872 (*logout) << text_t2ascii << " expected on line "; 873 (*logout) << line ; 874 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 875 } 876 while (!fin.eof ()) 877 { 878 if (c == '\n') break; 879 c = my_get(fin, line); 880 } 881 } 882 883 } 884 else 885 { 886 // found an error, skip to the end of the line 887 if (logout != NULL) { 888 (*logout) << "Error: Unexpected input on line " << line << "\n"; 889 } 890 while (!fin.eof ()) 891 { 892 if (c == '\n') break; 893 c = my_get(fin, line); 894 } 895 896 } 897 } 894 err = 0; // for the test below 895 } 896 else if (!err) err = 3; 897 } 898 else if (!err) err = 4; 899 900 if (err) { 901 // found an error, skip to the end of the line 902 if (logout != NULL) { 903 (*logout) << text_t2ascii << "Error: "; 904 if (err == 1) (*logout) << text_t2ascii << "'_'"; 905 else if (err == 2) (*logout) << text_t2ascii << "']'"; 906 else if (err == 3) (*logout) << text_t2ascii << "'}'"; 907 else if (err == 4) (*logout) << text_t2ascii << "'{'"; 908 (*logout) << text_t2ascii << " expected on line "; 909 (*logout) << line ; 910 (*logout) << text_t2ascii << " of " << thisfilename << "\n"; 911 } 912 while (!fin.eof ()) { 913 if (c == '\n') break; 914 c = my_uni_get(fin, line, isunicode, bigendian); 915 } 916 } 917 918 } else { 919 // found an error, skip to the end of the line 920 if (logout != NULL) { 921 (*logout) << text_t2ascii << "Error: Unexpected input on line " << line 922 << " of " << thisfilename << "\n"; 923 } 924 while (!fin.eof ()) { 925 if (c == '\n') break; 926 c = my_uni_get(fin, line, isunicode, bigendian); 927 } 928 929 } 930 } 898 931 899 932 fin.close (); … … 1039 1072 1040 1073 // get the macroname 1041 while (tthere != ttend && (! my_isspace(c)) &&1074 while (tthere != ttend && (!is_unicode_space(c)) && 1042 1075 (c != '\\') && (c != '_') &&(c != ':') && 1043 1076 (macroname.size() < 80)) … … 1055 1088 1056 1089 // get the macro name (honest!) 1057 while ((tthere != ttend) && (! my_isspace(c)) &&1090 while ((tthere != ttend) && (!is_unicode_space(c)) && 1058 1091 (c != '\\') && (c != '_') &&(c != ':') && 1059 1092 (macroname.size() < 80)) … … 1265 1298 if (*here == '"') quotecount++; 1266 1299 else if (quotecount == 1) string1.push_back(*here); 1267 else if ((quotecount == 2) && (*here != ' ') && (*here != '\n'))1300 else if ((quotecount == 2) && !is_unicode_space (*here)) 1268 1301 op.push_back(*here); 1269 1302 else if (quotecount == 3) string2.push_back(*here); … … 1298 1331 combineop.clear(); 1299 1332 while (here != end && *here != '"') { 1300 if ( (*here != ' ') && (*here != '\n')) combineop.push_back(*here);1333 if (!is_unicode_space(*here)) combineop.push_back(*here); 1301 1334 here++; 1302 1335 } … … 1346 1379 1347 1380 // ignore initial whitespace 1348 while ((hereit!=endit)&& my_isspace(c)) c=my_ttnextchar(hereit,endit);1381 while ((hereit!=endit)&&is_unicode_space(c)) c=my_ttnextchar(hereit,endit); 1349 1382 1350 1383 // look for the end of the parameter
Note:
See TracChangeset
for help on using the changeset viewer.