Changeset 14 for trunk/dlexer.d
- Timestamp:
- 01/12/05 04:48:37 (7 years ago)
- Files:
-
- trunk/dlexer.d (modified) (38 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/dlexer.d
r13 r14 11 11 import std.string; 12 12 import std.ctype; 13 14 private import std.c.stdlib; // for strtod and strtold 13 15 14 16 alias std.ctype.isdigit isdigit; … … 289 291 290 292 // A language token: 291 structToken {293 class Token { 292 294 // The kind of token: 293 295 uint value; 296 Token next; 294 297 295 298 // The value for the token: 296 299 union { 297 300 // The identifier or value of the token: 298 Identifier *ident;301 Identifier ident; 299 302 300 303 char[] ustring; … … 311 314 312 315 char[] toString() { 313 return toktostr[token]; 316 return toktostr[value]; 317 }; 318 319 static char[] toChars(uint value) { 320 return toktostr[value]; 314 321 }; 315 322 } … … 328 335 if (!isdigit(x) || (x == '8') || (x == '9')) return 0; 329 336 return -1; 337 } 338 339 class Loc { 340 public: 341 int linnum; 342 343 this() { 344 } 330 345 } 331 346 … … 443 458 protected: 444 459 Token token; 445 Identifier*[char[]] identifiers; 460 Identifier[char[]] identifiers; 461 Loc loc; 446 462 447 463 private: … … 472 488 473 489 case 0x1A: 474 error( format("unterminated string constant starting at %s", s));490 error("unterminated string constant starting at %s", s); 475 491 return null; 476 492 … … 517 533 518 534 case 0x1A: 519 error( format("unterminated string constant starting at %s", s));535 error("unterminated string constant starting at %s", s); 520 536 return null; 521 537 522 538 case '"': 523 539 if (n & 1) { 524 error( format("odd number (%d) of hex characters in hex string", n));540 error("odd number (%d) of hex characters in hex string", n); 525 541 return null; 526 542 } … … 536 552 c -= 'A' - 10; 537 553 else 538 error( format("non-hex character '%c'", c));554 error("non-hex character '%c'", c); 539 555 if (n & 1) { 540 556 v = (v << 4) | c; … … 562 578 563 579 // Possible to buffer up errors until some limit n. 564 void error(char[] msg ) {565 throw new DLexerException(this, msg);580 void error(char[] msg, ...) { 581 throw new DLexerException(this, format(msg, _arguments)); 566 582 } 567 583 568 584 // This function consumes a full D language token and returns it in the structure Token: 569 585 // null is returned if the end of the file is reached. 570 uint nextToken() {586 uint scan(Token t) { 571 587 uint start; 572 588 573 t oken.ident = null;574 t oken.uns64value = 0;575 t oken.value = TOKidentifier;589 t.ident = null; 590 t.uns64value = 0; 591 t.value = TOKidentifier; 576 592 577 593 // Past the end of file? Return an EOF token: 578 594 if (p >= file.length) { 579 t oken.value = TOKeof;580 return t oken.value;595 t.value = TOKeof; 596 return t.value; 581 597 } 582 598 … … 595 611 if (file[p] == '.') { 596 612 ++p; 597 t oken.value = TOKdotdotdot;613 t.value = TOKdotdotdot; 598 614 } else 599 t oken.value = TOKslice;615 t.value = TOKslice; 600 616 } else 601 t oken.value = TOKdot;602 return t oken.value;617 t.value = TOKdot; 618 return t.value; 603 619 604 620 case '&': … … 606 622 if (file[p] == '=') { 607 623 ++p; 608 t oken.value = TOKandass;624 t.value = TOKandass; 609 625 } else if (file[p] == '&') { 610 626 ++p; 611 t oken.value = TOKandand;627 t.value = TOKandand; 612 628 } else 613 t oken.value = TOKand;614 return t oken.value;629 t.value = TOKand; 630 return t.value; 615 631 616 632 case '|': … … 618 634 if (file[p] == '=') { 619 635 ++p; 620 t oken.value = TOKorass;636 t.value = TOKorass; 621 637 } else if (file[p] == '|') { 622 638 ++p; 623 t oken.value = TOKoror;639 t.value = TOKoror; 624 640 } else 625 t oken.value = TOKor;626 return t oken.value;641 t.value = TOKor; 642 return t.value; 627 643 628 644 case '-': … … 630 646 if (file[p] == '=') { 631 647 ++p; 632 t oken.value = TOKminass;648 t.value = TOKminass; 633 649 } else if (file[p] == '-') { 634 650 ++p; 635 t oken.value = TOKminusminus;651 t.value = TOKminusminus; 636 652 } else 637 t oken.value = TOKmin;638 return t oken.value;653 t.value = TOKmin; 654 return t.value; 639 655 640 656 case '+': … … 642 658 if (file[p] == '=') { 643 659 ++p; 644 t oken.value = TOKaddass;660 t.value = TOKaddass; 645 661 } else if (file[p] == '+') { 646 662 ++p; 647 t oken.value = TOKplusplus;663 t.value = TOKplusplus; 648 664 } else 649 t oken.value = TOKadd;650 return t oken.value;665 t.value = TOKadd; 666 return t.value; 651 667 652 668 case '=': … … 656 672 if (file[p] == '=') { 657 673 ++p; 658 t oken.value = TOKidentity;674 t.value = TOKidentity; 659 675 } else 660 t oken.value = TOKequal;676 t.value = TOKequal; 661 677 } else 662 t oken.value = TOKassign;663 return t oken.value;678 t.value = TOKassign; 679 return t.value; 664 680 665 681 case '<': … … 667 683 if (file[p] == '=') { 668 684 ++p; 669 t oken.value = TOKle; // <=685 t.value = TOKle; // <= 670 686 } else if (file[p] == '<') { 671 687 ++p; 672 688 if (file[p] == '=') { 673 689 ++p; 674 t oken.value = TOKshlass; // <<=690 t.value = TOKshlass; // <<= 675 691 } else 676 t oken.value = TOKshl; // <<692 t.value = TOKshl; // << 677 693 } else if (file[p] == '>') { 678 694 ++p; 679 695 if (file[p] == '=') { 680 696 ++p; 681 t oken.value = TOKleg; // <>=697 t.value = TOKleg; // <>= 682 698 } else 683 t oken.value = TOKlg; // <>699 t.value = TOKlg; // <> 684 700 } else 685 t oken.value = TOKlt; // <686 return t oken.value;701 t.value = TOKlt; // < 702 return t.value; 687 703 688 704 case '>': … … 690 706 if (file[p] == '=') { 691 707 ++p; 692 t oken.value = TOKge; // >=708 t.value = TOKge; // >= 693 709 } else if (file[p] == '>') { 694 710 ++p; 695 711 if (file[p] == '=') { 696 712 ++p; 697 t oken.value = TOKshrass; // >>=713 t.value = TOKshrass; // >>= 698 714 } else if (file[p] == '>') { 699 715 ++p; 700 716 if (file[p] == '=') { 701 717 ++p; 702 t oken.value = TOKushrass; // >>>=718 t.value = TOKushrass; // >>>= 703 719 } else 704 t oken.value = TOKushr; // >>>720 t.value = TOKushr; // >>> 705 721 } else 706 t oken.value = TOKshr; // >>722 t.value = TOKshr; // >> 707 723 } else 708 t oken.value = TOKgt; // >709 return t oken.value;724 t.value = TOKgt; // > 725 return t.value; 710 726 711 727 case '!': … … 715 731 if (file[p] == '=') { 716 732 ++p; 717 t oken.value = TOKnotidentity; // !==733 t.value = TOKnotidentity; // !== 718 734 } else 719 t oken.value = TOKnotequal; // !=735 t.value = TOKnotequal; // != 720 736 } else if (file[p] == '<') { 721 737 ++p; … … 724 740 if (file[p] == '=') { 725 741 ++p; 726 t oken.value = TOKunord; // !<>=742 t.value = TOKunord; // !<>= 727 743 } else 728 t oken.value = TOKue; // !<>744 t.value = TOKue; // !<> 729 745 } else if (file[p] == '=') { 730 746 ++p; 731 t oken.value = TOKug; // !<=747 t.value = TOKug; // !<= 732 748 } else 733 t oken.value = TOKuge; // !<749 t.value = TOKuge; // !< 734 750 } else if (file[p] == '>') { 735 751 ++p; 736 752 if (file[p] == '=') { 737 753 ++p; 738 t oken.value = TOKul; // !>=754 t.value = TOKul; // !>= 739 755 } else 740 t oken.value = TOKule; // !>756 t.value = TOKule; // !> 741 757 } else 742 t oken.value = TOKnot; // !743 return t oken.value;758 t.value = TOKnot; // ! 759 return t.value; 744 760 745 761 case '*': … … 747 763 if (file[p] == '=') { 748 764 ++p; 749 t oken.value = TOKmulass;765 t.value = TOKmulass; 750 766 } else 751 t oken.value = TOKmul;752 return t oken.value;767 t.value = TOKmul; 768 return t.value; 753 769 case '%': 754 770 ++p; 755 771 if (file[p] == '=') { 756 772 ++p; 757 t oken.value = TOKmodass;773 t.value = TOKmodass; 758 774 } else 759 t oken.value = TOKmod;760 return t oken.value;775 t.value = TOKmod; 776 return t.value; 761 777 case '^': 762 778 ++p; 763 779 if (file[p] == '=') { 764 780 ++p; 765 t oken.value = TOKxorass;781 t.value = TOKxorass; 766 782 } else 767 t oken.value = TOKxor;768 return t oken.value;783 t.value = TOKxor; 784 return t.value; 769 785 case '~': 770 786 ++p; 771 787 if (file[p] == '=') { 772 788 ++p; 773 t oken.value = TOKcatass;789 t.value = TOKcatass; 774 790 } else 775 t oken.value = TOKtilde;776 return t oken.value;777 778 case '(': ++p; t oken.value = TOKlparen; return token.value;779 case ')': ++p; t oken.value = TOKrparen; return token.value;791 t.value = TOKtilde; 792 return t.value; 793 794 case '(': ++p; t.value = TOKlparen; return t.value; 795 case ')': ++p; t.value = TOKrparen; return t.value; 780 796 case '[': 781 797 ++p; 782 798 if (file[p] == ']') { 783 799 ++p; 784 t oken.value = TOKarray;800 t.value = TOKarray; 785 801 } else 786 t oken.value = TOKlbracket;787 return t oken.value;788 case ']': ++p; t oken.value = TOKrbracket; return token.value;789 case '{': ++p; t oken.value = TOKlcurly; return token.value;790 case '}': ++p; t oken.value = TOKrcurly; return token.value;791 case ':': ++p; t oken.value = TOKcolon; return token.value;792 case ';': ++p; t oken.value = TOKsemicolon; return token.value;793 case '?': ++p; t oken.value = TOKquestion; return token.value;794 case ',': ++p; t oken.value = TOKcomma; return token.value;802 t.value = TOKlbracket; 803 return t.value; 804 case ']': ++p; t.value = TOKrbracket; return t.value; 805 case '{': ++p; t.value = TOKlcurly; return t.value; 806 case '}': ++p; t.value = TOKrcurly; return t.value; 807 case ':': ++p; t.value = TOKcolon; return t.value; 808 case ';': ++p; t.value = TOKsemicolon; return t.value; 809 case '?': ++p; t.value = TOKquestion; return t.value; 810 case ',': ++p; t.value = TOKcomma; return t.value; 795 811 796 812 case '/': … … 798 814 switch (file[p]) { 799 815 case '=': 800 t oken.value = TOKdivass;801 return t oken.value;816 t.value = TOKdivass; 817 return t.value; 802 818 803 819 case '/': … … 856 872 857 873 default: 858 t oken.value = TOKdiv;859 return t oken.value;874 t.value = TOKdiv; 875 return t.value; 860 876 } 861 877 break; … … 882 898 ++p; 883 899 ++p; 884 t oken.value = TOKcharv;885 t oken.ustring = tok;886 return t oken.value;900 t.value = TOKcharv; 901 t.ustring = tok; 902 return t.value; 887 903 } else { 888 904 tok.length = 1; … … 890 906 ++p; 891 907 ++p; 892 t oken.value = TOKcharv;893 t oken.ustring = tok;894 return t oken.value;908 t.value = TOKcharv; 909 t.ustring = tok; 910 return t.value; 895 911 } 896 912 } else { … … 906 922 ++p; 907 923 } 908 t oken.ustring = file[start .. p];909 t oken.value = TOKcharv;910 ++p; 911 return t oken.value;924 t.ustring = file[start .. p]; 925 t.value = TOKcharv; 926 ++p; 927 return t.value; 912 928 } 913 929 } … … 947 963 } 948 964 } 949 t oken.value = TOKstring;965 t.value = TOKstring; 950 966 tok.length = l; 951 t oken.ustring = tok;952 ++p; 953 return t oken.value;967 t.ustring = tok; 968 ++p; 969 return t.value; 954 970 } else { 955 971 // Just copy over the escape strings: … … 964 980 ++p; 965 981 } 966 t oken.value = TOKstring;967 t oken.ustring = file[start .. p];968 ++p; 969 return t oken.value;982 t.value = TOKstring; 983 t.ustring = file[start .. p]; 984 ++p; 985 return t.value; 970 986 } 971 987 } … … 980 996 } 981 997 case '`': 982 t oken.value = TOKstring;983 t oken.ustring = wysiwygString(file[p]);984 return t oken.value;998 t.value = TOKstring; 999 t.ustring = wysiwygString(file[p]); 1000 return t.value; 985 1001 986 1002 case 'x': … … 991 1007 goto case_ident; 992 1008 } 993 t oken.value = TOKstring;994 t oken.ustring = hexString();995 return t oken.value;1009 t.value = TOKstring; 1010 t.ustring = hexString(); 1011 return t.value; 996 1012 997 1013 // Identifier start with _ or a-z,A-Z: … … 1015 1031 char[] tok = file[start .. p]; 1016 1032 if (tok in keywords) { 1017 t oken.value = keywords[tok];1033 t.value = keywords[tok]; 1018 1034 } else { 1019 t oken.value = TOKidentifier;1035 t.value = TOKidentifier; 1020 1036 // Check for the identifier in the list: 1021 1037 if (tok in identifiers) { 1022 1038 // Use that. 1023 t oken.ident = identifiers[tok];1039 t.ident = identifiers[tok]; 1024 1040 } else { 1025 1041 // Make a new one. 1026 t oken.ident = new Identifier(tok, 0);1027 identifiers[tok] = t oken.ident;1042 t.ident = new Identifier(tok, 0); 1043 identifiers[tok] = t.ident; 1028 1044 } 1029 1045 } 1030 return t oken.value;1046 return t.value; 1031 1047 1032 1048 // Numeric literal: 1033 1049 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': { 1034 number(); 1035 return token.value; 1050 return number(t); 1036 1051 } 1037 1052 … … 1045 1060 1046 1061 // Parse a real number: 1047 uint inreal( ) {1062 uint inreal(Token t) { 1048 1063 printf("inreal\n"); 1049 return TOKfloat80v; 1064 1065 int dblstate, i; 1066 char[] s; 1067 char c; 1068 char hex; // is this a hexadecimal-floating-constant? 1069 uint result; 1070 1071 //printf("Lexer::inreal()\n"); 1072 i = 0; 1073 s.length = 16; 1074 dblstate = 0; 1075 hex = 0; 1076 Lnext: 1077 while (p < file.length) { 1078 // Get next char from input 1079 c = file[p++]; 1080 while (1) { 1081 switch (dblstate) { 1082 case 0: // opening state 1083 if (c == '0') 1084 dblstate = 9; 1085 else 1086 dblstate = 1; 1087 break; 1088 1089 case 9: 1090 dblstate = 1; 1091 if (c == 'X' || c == 'x') { 1092 hex++; 1093 break; 1094 } 1095 case 1: // digits to left of . 1096 case 3: // digits to right of . 1097 case 7: // continuing exponent digits 1098 if (!isdigit(c) && !(hex && isxdigit(c))) { 1099 if (c == '_') 1100 goto Lnext; // ignore embedded '_' 1101 dblstate++; 1102 continue; 1103 } 1104 break; 1105 1106 case 2: // no more digits to left of . 1107 if (c == '.') { 1108 dblstate++; 1109 break; 1110 } 1111 case 4: // no more digits to right of . 1112 if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) { 1113 dblstate = 5; 1114 hex = 0; // exponent is always decimal 1115 break; 1116 } 1117 if (hex) 1118 error("binary-exponent-part required"); 1119 goto done; 1120 1121 case 5: // looking immediately to right of E 1122 dblstate++; 1123 if (c == '-' || c == '+') 1124 break; 1125 case 6: // 1st exponent digit expected 1126 if (!isdigit(c)) 1127 error("exponent expected"); 1128 dblstate++; 1129 break; 1130 1131 case 8: // past end of exponent digits 1132 goto done; 1133 } 1134 break; 1135 } 1136 if (i == s.length) s.length = s.length * 2; 1137 s[i++] = c; 1138 } 1139 done: 1140 p--; 1141 s.length = i; 1142 1143 setErrno(0); 1144 1145 switch (file[p]) { 1146 case 'F': 1147 case 'f': 1148 t.float80value = strtod(s.ptr, null); 1149 result = TOKfloat32v; 1150 p++; 1151 break; 1152 1153 default: 1154 t.float80value = strtod(s.ptr, null); 1155 result = TOKfloat64v; 1156 break; 1157 1158 case 'L': 1159 case 'l': 1160 t.float80value = strtold(s.ptr, null); 1161 result = TOKfloat80v; 1162 p++; 1163 break; 1164 } 1165 // Imaginary value: 1166 if (file[p] == 'i' || file[p] == 'I') { 1167 p++; 1168 switch (result) 1169 { 1170 case TOKfloat32v: 1171 result = TOKimaginary32v; 1172 break; 1173 case TOKfloat64v: 1174 result = TOKimaginary64v; 1175 break; 1176 case TOKfloat80v: 1177 result = TOKimaginary80v; 1178 break; 1179 } 1180 } 1181 // Standard C: 1182 if (getErrno() != 0) // == ERANGE 1183 error("number is not representable"); 1184 return result; 1050 1185 } 1051 1186 1052 1187 // Parse a number and return the associated value: 1053 uint number( ) {1188 uint number(Token t) { 1054 1189 // We use a state machine to collect numbers 1055 1190 enum : uint { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, … … 1136 1271 realnum: // It's a real number. Back up and rescan as a real 1137 1272 p = start; 1138 return inreal( );1273 return inreal(t); 1139 1274 } 1140 1275 goto done; … … 1154 1289 goto realnum; 1155 1290 if (state == STATE_hex0) 1156 error( format("Hex digit expected, not '%c'", c));1291 error("Hex digit expected, not '%c'", c); 1157 1292 goto done; 1158 1293 } … … 1281 1416 case 0: 1282 1417 if (n & 0x8000000000000000L) 1283 t oken.value = TOKuns64v;1418 t.value = TOKuns64v; 1284 1419 else if (n & 0xFFFFFFFF00000000L) 1285 t oken.value = TOKint64v;1420 t.value = TOKint64v; 1286 1421 else if (n & 0x80000000) 1287 t oken.value = TOKuns32v;1422 t.value = TOKuns32v; 1288 1423 else 1289 t oken.value = TOKint32v;1424 t.value = TOKint32v; 1290 1425 break; 1291 1426 … … 1293 1428 if (n & 0x8000000000000000L) { 1294 1429 error("signed integer overflow"); 1295 t oken.value = TOKuns64v;1430 t.value = TOKuns64v; 1296 1431 } 1297 1432 else if (n & 0xFFFFFFFF80000000L) 1298 t oken.value = TOKint64v;1433 t.value = TOKint64v; 1299 1434 else 1300 t oken.value = TOKint32v;1435 t.value = TOKint32v; 1301 1436 break; 1302 1437 … … 1304 1439 case FLAGS_decimal | FLAGS_unsigned: 1305 1440 if (n & 0xFFFFFFFF00000000L) 1306 t oken.value = TOKuns64v;1441 t.value = TOKuns64v; 1307 1442 else 1308 t oken.value = TOKuns32v;1443 t.value = TOKuns32v; 1309 1444 break; 1310 1445 … … 1312 1447 if (n & 0x8000000000000000L) { 1313 1448 error("signed integer overflow"); 1314 t oken.value = TOKuns64v;1449 t.value = TOKuns64v; 1315 1450 } else 1316 t oken.value = TOKint64v;1451 t.value = TOKint64v; 1317 1452 break; 1318 1453 1319 1454 case FLAGS_long: 1320 1455 if (n & 0x8000000000000000L) 1321 t oken.value = TOKuns64v;1456 t.value = TOKuns64v; 1322 1457 else 1323 t oken.value = TOKint64v;1458 t.value = TOKint64v; 1324 1459 break; 1325 1460 1326 1461 case FLAGS_unsigned | FLAGS_long: 1327 1462 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: 1328 t oken.value = TOKuns64v;1463 t.value = TOKuns64v; 1329 1464 break; 1330 1465 … … 1333 1468 } 1334 1469 1335 token.uns64value = n; 1470 t.uns64value = n; 1471 return t.value; 1472 } 1473 1474 uint nextToken() { 1475 Token t; 1476 1477 if (token.next) { 1478 t = token.next; 1479 token = t; 1480 t.next = null; 1481 } else 1482 scan(token); 1483 1336 1484 return token.value; 1337 1485 } 1338 1486 1339 1487 // Only peek at the next token, don't consume it: 1340 uint peekToken() { 1341 uint savep = p, saveline = line; 1342 uint tok = nextToken(); 1343 p = savep; 1344 line = saveline; 1345 1346 return tok; 1488 Token peek(Token ct) { 1489 Token t; 1490 1491 if (ct.next) { 1492 t = ct.next; 1493 } else { 1494 t = new Token(); 1495 scan(t); 1496 t.next = null; 1497 ct.next = t; 1498 } 1499 1500 return t; 1347 1501 } 1348 1502
