Changeset 12
- Timestamp:
- 12/21/04 02:09:57 (7 years ago)
- Files:
-
- trunk/dlexer.d (modified) (11 diffs)
- trunk/dtags.d (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/dlexer.d
r11 r12 11 11 import std.string; 12 12 import std.ctype; 13 14 alias std.ctype.isdigit isdigit; 13 15 14 16 // Set this version identifier to enable interpretation of escaped characters within parsed strings … … 115 117 // Testing 116 118 TOKunittest, 117 119 118 120 TOKmax 119 121 } … … 182 184 // NCEG floating point compares 183 185 // !<>= <> <>= !> !>= !< !<= !<> 184 186 185 187 // NOTE: These could be horribly wrong 186 188 TOKunord : "!<>=", … … 278 280 // A language token: 279 281 struct Token { 280 // The identifier or value of the token:281 char[] ident;282 282 // The kind of token: 283 283 uint token; 284 union { 285 // The identifier or value of the token: 286 char[] ident; 287 288 // Integers 289 int int32value; 290 uint uns32value; 291 long int64value; 292 ulong uns64value; 293 294 // Floats 295 real float80value; 296 } 284 297 } 285 298 … … 291 304 super(dlx.filename ~ "(" ~ format("%d", dlx.line) ~ ")" ~ ": " ~ msg); 292 305 } 306 } 307 308 // Check for octal digit: 309 int isodigit(dchar x) { 310 if (!isdigit(x) || (x == '8') || (x == '9')) return 0; 311 return -1; 293 312 } 294 313 … … 408 427 uint p; // Current character 409 428 410 // TODO: 411 char[] wysiwygString() { 412 return null; 429 // Test this! 430 char[] wysiwygString(char tc) { 431 char c; 432 char[] s; 433 uint i; 434 435 i = 0; 436 ++p; 437 s.length = 16; 438 while (p < file.length) { 439 c = file[p++]; 440 switch (c) { 441 case '\n': 442 ++line; 443 break; 444 445 case '\r': 446 if (file[p] == '\n') 447 continue; // ignore 448 c = '\n'; // treat EndOfLine as \n character 449 ++line; 450 break; 451 452 case 0x1A: 453 throw new DLexerException(this, format("unterminated string constant starting at %s", s)); 454 return null; 455 456 case '"', '`': 457 if (c == tc) { 458 s.length = i; 459 return s; 460 } 461 break; 462 463 default: 464 break; 465 } 466 467 if (i == s.length) s.length = s.length * 2; 468 s[i++] = c; 469 } 470 s.length = i; 471 return s; 413 472 } 414 473 415 // T ODO:474 // Test this! 416 475 char[] hexString() { 476 char c; 477 uint n = 0, i = 0; 478 char[] s; 479 ubyte v; 480 481 p++; 482 s.length = 16; 483 while (p < file.length) { 484 c = file[p++]; 485 switch (c) { 486 case ' ', '\t', '\v', '\f': 487 continue; // skip white space 488 489 case '\r': 490 if (file[p] == '\n') 491 continue; // ignore 492 // Treat isolated '\r' as if it were a '\n' 493 case '\n': 494 ++line; 495 continue; 496 497 case 0x1A: 498 throw new DLexerException(this, format("unterminated string constant starting at %s", s)); 499 return null; 500 501 case '"': 502 if (n & 1) { 503 throw new DLexerException(this, format("odd number (%d) of hex characters in hex string", n)); 504 return null; 505 } 506 s.length = i; 507 return s; 508 509 default: 510 if (c >= '0' && c <= '9') 511 c -= '0'; 512 else if (c >= 'a' && c <= 'f') 513 c -= 'a' - 10; 514 else if (c >= 'A' && c <= 'F') 515 c -= 'A' - 10; 516 else 517 throw new DLexerException(this, format("non-hex character '%c'", c)); 518 if (n & 1) { 519 v = (v << 4) | c; 520 if (i == s.length) s.length = s.length * 2; 521 s[i++] = v; 522 } else 523 v = c; 524 ++n; 525 break; 526 } 527 } 417 528 return null; 418 529 } … … 462 573 rettok.token = TOKdot; 463 574 return rettok; 464 break;465 575 466 576 case '&': … … 841 951 goto case_ident; 842 952 } 953 case '`': 843 954 rettok.token = TOKstring; 844 rettok.ident = wysiwygString( );845 return rettok; 846 847 case ' h':955 rettok.ident = wysiwygString(file[p]); 956 return rettok; 957 958 case 'x': 848 959 // HEX string? 849 960 ++p; … … 857 968 858 969 // Identifier start with _ or a-z,A-Z: 859 case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k', 'l',860 'm', 'n', 'o', 'p', 'q', 's', 't', 'u', 'v', 'w', 'x',970 case 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 971 'm', 'n', 'o', 'p', 'q', 's', 't', 'u', 'v', 'w', 861 972 'y', 'z': 862 973 case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', … … 884 995 885 996 // Numeric literal: 886 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 887 // FIXME!! 888 // TODO: Handle binary, hex, octal, decimal, float, etc. 889 start = p; 890 while (p < file.length) { 891 if (!isalnum(file[p])) break; 892 ++p; 893 } 894 895 // Default to an int32v token for any number: 896 rettok.token = TOKint32v; 897 rettok.ident = file[start .. p]; 898 return rettok; 997 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': { 998 rettok = number(); 999 return rettok; 1000 } 899 1001 900 1002 default: … … 904 1006 905 1007 return null; 1008 } 1009 1010 Token* inreal() { 1011 printf("inreal\n"); 1012 return new Token; 1013 } 1014 1015 // Parse a number and return the associated value: 1016 Token* number() { 1017 Token* rettok = null; 1018 1019 // We use a state machine to collect numbers 1020 enum : uint { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, 1021 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, 1022 STATE_hexh, STATE_error }; 1023 uint state; 1024 1025 enum : uint { 1026 FLAGS_decimal = 1, // decimal 1027 FLAGS_unsigned = 2, // u or U suffix 1028 FLAGS_long = 4, // l or L suffix 1029 }; 1030 uint flags = FLAGS_decimal; 1031 1032 uint i; 1033 int base; 1034 char c; 1035 char[] s; 1036 1037 uint start; 1038 ulong n; 1039 1040 state = STATE_initial; 1041 base = 0; 1042 1043 s.length = 8; 1044 1045 start = p; 1046 i = 0; 1047 while (p < file.length) { 1048 c = file[p]; 1049 switch (state) { 1050 case STATE_initial: // opening state 1051 if (c == '0') 1052 state = STATE_0; 1053 else 1054 state = STATE_decimal; 1055 break; 1056 1057 case STATE_0: 1058 flags = (flags & ~FLAGS_decimal); 1059 switch (c) { 1060 case 'X': 1061 case 'x': 1062 state = STATE_hex0; 1063 break; 1064 case '.': 1065 if (file[p+1] == '.') // .. is a separate token 1066 goto done; 1067 case 'i': 1068 case 'f': 1069 case 'F': 1070 goto realnum; 1071 case 'B': 1072 case 'b': 1073 state = STATE_binary0; 1074 break; 1075 1076 case '0': case '1': case '2': case '3': 1077 case '4': case '5': case '6': case '7': 1078 state = STATE_octal; 1079 break; 1080 1081 case '_': 1082 state = STATE_octal; 1083 ++p; 1084 continue; 1085 1086 default: 1087 goto done; 1088 } 1089 break; 1090 1091 case STATE_decimal: // reading decimal number 1092 if (!isdigit(c)) { 1093 if (c == '_') { // ignore embedded _ 1094 ++p; 1095 continue; 1096 } 1097 if (c == '.' && file[p+1] != '.') 1098 goto realnum; 1099 else if (c == 'i' || c == 'f' || c == 'F' || 1100 c == 'e' || c == 'E') { 1101 realnum: // It's a real number. Back up and rescan as a real 1102 p = start; 1103 return inreal(); 1104 } 1105 goto done; 1106 } 1107 break; 1108 1109 case STATE_hex0: // reading hex number 1110 case STATE_hex: 1111 if (!isxdigit(c)) { 1112 if (c == '_') { // ignore embedded _ 1113 ++p; 1114 continue; 1115 } 1116 if (c == '.' && file[p+1] != '.') 1117 goto realnum; 1118 if (c == 'P' || c == 'p' || c == 'i') 1119 goto realnum; 1120 if (state == STATE_hex0) 1121 throw new DLexerException(this, format("Hex digit expected, not '%c'", c)); 1122 goto done; 1123 } 1124 state = STATE_hex; 1125 break; 1126 1127 case STATE_octal: // reading octal number 1128 case STATE_octale: // reading octal number with non-octal digits 1129 if (!isodigit(c)) { 1130 if (c == '_') { // ignore embedded _ 1131 ++p; 1132 continue; 1133 } 1134 if (c == '.' && file[p+1] != '.') 1135 goto realnum; 1136 if (c == 'i') 1137 goto realnum; 1138 if (isdigit(c)) 1139 state = STATE_octale; 1140 else 1141 goto done; 1142 } 1143 break; 1144 1145 case STATE_binary0: // starting binary number 1146 case STATE_binary: // reading binary number 1147 if (c != '0' && c != '1') { 1148 if (c == '_') { // ignore embedded _ 1149 ++p; 1150 continue; 1151 } 1152 if (state == STATE_binary0) { 1153 throw new DLexerException(this, "binary digit expected"); 1154 state = STATE_error; 1155 break; 1156 } else 1157 goto done; 1158 } 1159 state = STATE_binary; 1160 break; 1161 1162 case STATE_error: // for error recovery 1163 if (!isdigit(c)) // scan until non-digit 1164 goto done; 1165 break; 1166 1167 default: 1168 assert(0); 1169 } 1170 1171 if (i == s.length) s.length = s.length * 2; 1172 s[i++] = c; 1173 ++p; 1174 } 1175 1176 done: 1177 s.length = i; 1178 if (state == STATE_octale) 1179 throw new DLexerException(this, "Octal digit expected"); 1180 1181 if ((i == 1) && (state == STATE_decimal || state == STATE_0)) 1182 n = s[0] - '0'; 1183 else { 1184 // Convert string to integer 1185 int q = 0; 1186 int r = 10, d; 1187 1188 if (s[q] == '0') { 1189 if (s[q+1] == 'x' || s[q+1] == 'X') { 1190 q += 2, r = 16; 1191 } else if (s[q+1] == 'b' || s[q+1] == 'B') { 1192 q += 2, r = 2; 1193 } else if (isdigit(s[q+1])) { 1194 q += 1, r = 8; 1195 } 1196 } 1197 1198 n = 0; 1199 while (q < s.length) { 1200 if (s[q] >= '0' && s[q] <= '9') 1201 d = s[q] - '0'; 1202 else if (s[q] >= 'a' && s[q] <= 'z') 1203 d = s[q] - 'a' + 10; 1204 else if (s[q] >= 'A' && s[q] <= 'Z') 1205 d = s[q] - 'A' + 10; 1206 else 1207 break; 1208 if (d >= r) 1209 break; 1210 if (n * r + d < n) { 1211 throw new DLexerException(this, "integer overflow"); 1212 break; 1213 } 1214 1215 n = n * r + d; 1216 ++q; 1217 } 1218 } 1219 1220 // Parse trailing 'u', 'U', 'l' or 'L' in any combination 1221 while (1) { 1222 uint f; 1223 1224 switch (file[p]) { 1225 case 'U': 1226 case 'u': 1227 f = FLAGS_unsigned; 1228 goto L1; 1229 case 'L': 1230 case 'l': 1231 f = FLAGS_long; 1232 L1: 1233 ++p; 1234 if (flags & f) 1235 throw new DLexerException(this, "unrecognized token"); 1236 flags = (flags | f); 1237 continue; 1238 1239 default: 1240 break; 1241 } 1242 break; 1243 } 1244 1245 rettok = new Token; 1246 switch (flags) { 1247 case 0: 1248 if (n & 0x8000000000000000L) 1249 rettok.token = TOKuns64v; 1250 else if (n & 0xFFFFFFFF00000000L) 1251 rettok.token = TOKint64v; 1252 else if (n & 0x80000000) 1253 rettok.token = TOKuns32v; 1254 else 1255 rettok.token = TOKint32v; 1256 break; 1257 1258 case FLAGS_decimal: 1259 if (n & 0x8000000000000000L) { 1260 throw new DLexerException(this, "signed integer overflow"); 1261 rettok.token = TOKuns64v; 1262 } 1263 else if (n & 0xFFFFFFFF80000000L) 1264 rettok.token = TOKint64v; 1265 else 1266 rettok.token = TOKint32v; 1267 break; 1268 1269 case FLAGS_unsigned: 1270 case FLAGS_decimal | FLAGS_unsigned: 1271 if (n & 0xFFFFFFFF00000000L) 1272 rettok.token = TOKuns64v; 1273 else 1274 rettok.token = TOKuns32v; 1275 break; 1276 1277 case FLAGS_decimal | FLAGS_long: 1278 if (n & 0x8000000000000000L) { 1279 throw new DLexerException(this, "signed integer overflow"); 1280 rettok.token = TOKuns64v; 1281 } else 1282 rettok.token = TOKint64v; 1283 break; 1284 1285 case FLAGS_long: 1286 if (n & 0x8000000000000000L) 1287 rettok.token = TOKuns64v; 1288 else 1289 rettok.token = TOKint64v; 1290 break; 1291 1292 case FLAGS_unsigned | FLAGS_long: 1293 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: 1294 rettok.token = TOKuns64v; 1295 break; 1296 1297 default: 1298 assert(0); 1299 } 1300 1301 rettok.uns64value = n; 1302 return rettok; 906 1303 } 907 1304 trunk/dtags.d
r11 r12 59 59 struct DEnum { 60 60 char[] name; 61 uint type; // type of values in enumeration 61 uint type; // type of values in enumeration (TOKint32, TOKuns32, ...) 62 62 63 63 struct enumValue { 64 64 char[] name; 65 char[] value; 66 } 65 union { 66 int int32value; 67 uint uns32value; 68 long int64value; 69 ulong uns64value; 70 }; 71 }; 67 72 enumValue[] values; 68 73 }; … … 672 677 DEnum* en = new DEnum; 673 678 int i; 679 bool negative; 680 // Keep track of the enumeration value: 681 union integraltype { 682 int int32value; 683 uint uns32value; 684 long int64value; 685 ulong uns64value; 686 }; 687 integraltype m; 674 688 675 689 tok = nextToken(); … … 688 702 tok = nextToken(); 689 703 } 690 } else if (isType(tok.token)) { 691 // enum type : 692 en.type = tok.token; 693 expect(TOKcolon, "':'"); 704 } else if (tok.token == TOKcolon) { 694 705 tok = nextToken(); 706 if (isType(tok.token)) { 707 // enum : type 708 en.type = tok.token; 709 tok = nextToken(); 710 } else throw new DLexerException(this, "expected type after ':'"); 695 711 } 696 712 … … 713 729 tok = nextToken(); 714 730 if (tok.token == TOKassign) { 731 negative = false; 732 715 733 for (;;) { 716 734 tok = nextToken(); 717 if (tok.token == TOKcomma) break; 718 else if (tok.token == TOKrcurly) break; 719 720 if (tok.token == TOKidentifier) ev.value ~= tok.ident; 721 else if (tok.token == TOKint32v) ev.value ~= tok.ident; 722 else ev.value ~= toktostr[tok.token]; 735 if ((tok.token == TOKcomma) || (tok.token == TOKrcurly)) break; 736 737 if (tok.token == TOKmin) { 738 // Negative number: 739 negative = true; 740 } else { 741 // Specified enumeration value: 742 switch (en.type) { 743 case TOKint8, TOKint16, TOKint32: 744 m.int32value = (ev.int32value = tok.int32value) + 1; 745 break; 746 case TOKuns8, TOKuns16, TOKuns32: 747 m.uns32value = (ev.uns32value = tok.uns32value) + 1; 748 break; 749 case TOKint64: 750 m.int64value = (ev.int64value = tok.int64value) + 1; 751 break; 752 case TOKuns64: 753 m.uns64value = (ev.uns64value = tok.uns64value) + 1; 754 break; 755 } 756 } 757 } 758 // Set negative? 759 if (negative) { 760 switch (en.type) { 761 case TOKint8, TOKint16, TOKint32: 762 m.int32value = (ev.int32value = -ev.int32value) + 1; 763 break; 764 case TOKint64: 765 m.int64value = (ev.int64value = -ev.int64value) + 1; 766 break; 767 } 768 } 769 } else { 770 // Not given a value, make it default 1 + the last value: 771 switch (en.type) { 772 case TOKint8, TOKint16, TOKint32: 773 m.int32value = (ev.int32value = m.int32value) + 1; 774 break; 775 case TOKuns8, TOKuns16, TOKuns32: 776 m.uns32value = (ev.uns32value = m.uns32value) + 1; 777 break; 778 case TOKint64: 779 m.int64value = (ev.int64value = m.int64value) + 1; 780 break; 781 case TOKuns64: 782 m.uns64value = (ev.uns64value = m.uns64value) + 1; 783 break; 723 784 } 724 785 } … … 757 818 else if (tok.token == TOKstring) 758 819 printf(`"%.*s" `, tok.ident); 820 // Numbers: 759 821 else if (tok.token == TOKint32v) 760 printf(`%.*s `, tok.ident); 822 printf(`%ld `, tok.int32value); 823 else if (tok.token == TOKuns32v) 824 printf(`%ud `, tok.uns32value); 825 else if (tok.token == TOKint64v) 826 printf(`%ld `, tok.int64value); 827 else if (tok.token == TOKuns64v) 828 printf(`%lud `, tok.uns64value); 829 // Just a regular token: 761 830 else 762 831 printf(`%.*s `, toktostr[tok.token]); … … 766 835 // Indentation: 767 836 if (!(ntok is null)) 768 if ( peekToken().token == TOKrcurly)837 if (ntok.token == TOKrcurly) 769 838 --indent; 770 839 … … 970 1039 break; 971 1040 } 1041 1042 case TOKalias: { 1043 while (nextToken().token != TOKsemicolon) { } 1044 break; 1045 } 972 1046 973 1047 default: … … 1099 1173 foreach (DEnum* en; lex.dmodule.enums) { 1100 1174 printf("\tenum %.*s {\n", en.name); 1101 foreach (DEnum.enumValue ev; en.values) { 1102 if (ev.value is null) 1103 printf("\t\t%.*s,\n", ev.name); 1104 else 1105 printf("\t\t%.*s = %.*s,\n", ev.name, ev.value); 1175 switch (en.type) { 1176 case TOKint8, TOKint16, TOKint32: 1177 foreach (DEnum.enumValue ev; en.values) { 1178 printf("\t\t%.*s = %d,\n", ev.name, ev.int32value); 1179 } 1180 break; 1181 case TOKuns8, TOKuns16, TOKuns32: 1182 foreach (DEnum.enumValue ev; en.values) { 1183 printf("\t\t%.*s = %u,\n", ev.name, ev.uns32value); 1184 } 1185 break; 1186 case TOKint64: 1187 foreach (DEnum.enumValue ev; en.values) { 1188 printf("\t\t%.*s = %ld,\n", ev.name, ev.int64value); 1189 } 1190 break; 1191 case TOKuns64: 1192 foreach (DEnum.enumValue ev; en.values) { 1193 printf("\t\t%.*s = %lu,\n", ev.name, ev.uns64value); 1194 } 1195 break; 1106 1196 } 1107 1197 printf("\t}\n\n");
