Changeset 825
- Timestamp:
- 12/28/10 03:26:53 (14 years ago)
- Files:
-
- branches/dmd-1.x/src/constfold.c (modified) (4 diffs)
- branches/dmd-1.x/src/utf.c (modified) (2 diffs)
- branches/dmd-1.x/src/utf.h (modified) (1 diff)
- trunk/src/constfold.c (modified) (3 diffs)
- trunk/src/utf.c (modified) (2 diffs)
- trunk/src/utf.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/dmd-1.x/src/constfold.c
r670 r825 8 8 // in artistic.txt, or the GNU General Public License in gnu.txt. 9 9 // See the included readme.txt for details. 10 10 11 11 #include <stdio.h> 12 12 #include <stdlib.h> 13 13 #include <assert.h> 14 14 #include <math.h> 15 15 16 16 #if __DMC__ 17 17 #include <complex.h> 18 18 #endif 19 19 20 20 #include "rmem.h" 21 21 #include "root.h" 22 22 #include "port.h" 23 23 24 24 #include "mtype.h" 25 25 #include "expression.h" 26 26 #include "aggregate.h" 27 27 #include "declaration.h" 28 #include "utf.h" 28 29 29 30 #ifdef IN_GCC 30 31 #include "d-gcc-real.h" 31 32 32 33 /* %% fix? */ 33 34 extern "C" bool real_isnan (const real_t *); 34 35 #endif 35 36 36 37 static real_t zero; // work around DMC bug for now 37 38 38 39 #if __FreeBSD__ 39 40 #define fmodl fmod // hack for now, fix later 40 41 #endif 41 42 42 43 #define LOG 0 43 44 44 45 Expression *expType(Type *type, Expression *e) 45 46 { 46 47 if (type != e->type) 47 48 { … … 1304 1305 e->type = type; 1305 1306 } 1306 1307 } 1307 1308 return e; 1308 1309 } 1309 1310 1310 1311 /* Also return EXP_CANT_INTERPRET if this fails 1311 1312 */ 1312 1313 Expression *Cat(Type *type, Expression *e1, Expression *e2) 1313 1314 { Expression *e = EXP_CANT_INTERPRET; 1314 1315 Loc loc = e1->loc; 1315 1316 Type *t; 1316 1317 Type *t1 = e1->type->toBasetype(); 1317 1318 Type *t2 = e2->type->toBasetype(); 1318 1319 1319 1320 //printf("Cat(e1 = %s, e2 = %s)\n", e1->toChars(), e2->toChars()); 1320 1321 //printf("\tt1 = %s, t2 = %s, type = %s\n", t1->toChars(), t2->toChars(), type->toChars()); 1321 1322 1322 1323 if (e1->op == TOKnull && (e2->op == TOKint64 || e2->op == TOKstructliteral)) 1323 1324 { e = e2; 1325 t = t1; 1324 1326 goto L2; 1325 1327 } 1326 1328 else if ((e1->op == TOKint64 || e1->op == TOKstructliteral) && e2->op == TOKnull) 1327 1329 { e = e1; 1330 t = t2; 1328 1331 L2: 1329 1332 Type *tn = e->type->toBasetype(); 1330 1333 if (tn->ty == Tchar || tn->ty == Twchar || tn->ty == Tdchar) 1331 1334 { 1332 1335 // Create a StringExp 1333 1336 void *s; 1334 1337 StringExp *es; 1335 size_t len = 1; 1336 int sz = tn->size(); 1338 if (t->nextOf()) 1339 t = t->nextOf()->toBasetype(); 1340 int sz = t->size(); 1341 1337 1342 dinteger_t v = e->toInteger(); 1338 1343 1344 size_t len = utf_codeLength(sz, v); 1339 1345 s = mem.malloc((len + 1) * sz); 1340 memcpy((unsigned char *)s, &v, sz);1346 utf_encode(sz, s, v); 1341 1347 1342 1348 // Add terminating 0 1343 1349 memset((unsigned char *)s + len * sz, 0, sz); 1344 1350 1345 1351 es = new StringExp(loc, s, len); 1346 1352 es->sz = sz; 1347 1353 es->committed = 1; 1348 1354 e = es; 1349 1355 } 1350 1356 else 1351 1357 { // Create an ArrayLiteralExp 1352 1358 Expressions *elements = new Expressions(); 1353 1359 elements->push(e); 1354 1360 e = new ArrayLiteralExp(e->loc, elements); 1355 1361 } 1356 1362 e->type = type; 1357 1363 return e; 1358 1364 } 1359 1365 else if (e1->op == TOKstring && e2->op == TOKstring) 1360 1366 { … … 1382 1388 // Add terminating 0 1383 1389 memset((unsigned char *)s + len * sz, 0, sz); 1384 1390 1385 1391 es = new StringExp(loc, s, len); 1386 1392 es->sz = sz; 1387 1393 es->committed = es1->committed | es2->committed; 1388 1394 if (es1->committed) 1389 1395 t = es1->type; 1390 1396 else 1391 1397 t = es2->type; 1392 1398 es->type = type; 1393 1399 e = es; 1394 1400 } 1395 1401 else if (e1->op == TOKstring && e2->op == TOKint64) 1396 1402 { 1397 1403 // Concatenate the strings 1398 1404 void *s; 1399 1405 StringExp *es1 = (StringExp *)e1; 1400 1406 StringExp *es; 1401 1407 Type *t; 1402 size_t len = es1->len + 1;1403 1408 int sz = es1->sz; 1404 1409 dinteger_t v = e2->toInteger(); 1405 1410 1411 size_t len = es1->len + utf_codeLength(sz, v); 1406 1412 s = mem.malloc((len + 1) * sz); 1407 1413 memcpy(s, es1->string, es1->len * sz); 1408 memcpy((unsigned char *)s + es1->len * sz, &v, sz);1414 utf_encode(sz, (unsigned char *)s + (sz * es1->len), v); 1409 1415 1410 1416 // Add terminating 0 1411 1417 memset((unsigned char *)s + len * sz, 0, sz); 1412 1418 1413 1419 es = new StringExp(loc, s, len); 1414 1420 es->sz = sz; 1415 1421 es->committed = es1->committed; 1416 1422 t = es1->type; 1417 1423 es->type = type; 1418 1424 e = es; 1419 1425 } 1420 1426 else if (e1->op == TOKint64 && e2->op == TOKstring) 1421 1427 { 1422 1428 // Concatenate the strings 1423 1429 void *s; 1424 1430 StringExp *es2 = (StringExp *)e2; 1425 1431 StringExp *es; 1426 1432 Type *t; 1427 1433 size_t len = 1 + es2->len; 1428 1434 int sz = es2->sz; … … 1438 1444 es = new StringExp(loc, s, len); 1439 1445 es->sz = sz; 1440 1446 es->committed = es2->committed; 1441 1447 t = es2->type; 1442 1448 es->type = type; 1443 1449 e = es; 1444 1450 } 1445 1451 else if (e1->op == TOKarrayliteral && e2->op == TOKarrayliteral && 1446 1452 e1->type->equals(e2->type)) 1447 1453 { 1448 1454 // Concatenate the arrays 1449 1455 ArrayLiteralExp *es1 = (ArrayLiteralExp *)e1; 1450 1456 ArrayLiteralExp *es2 = (ArrayLiteralExp *)e2; 1451 1457 1452 1458 es1 = new ArrayLiteralExp(es1->loc, (Expressions *)es1->elements->copy()); 1453 1459 es1->elements->insert(es1->elements->dim, es2->elements); 1454 1460 e = es1; 1455 1461 1456 1462 if (type->toBasetype()->ty == Tsarray) 1457 1463 { 1458 e->type = new TypeSArray(t1->next , new IntegerExp(loc, es1->elements->dim, Type::tindex));1464 e->type = new TypeSArray(t1->nextOf(), new IntegerExp(loc, es1->elements->dim, Type::tindex)); 1459 1465 e->type = e->type->semantic(loc, NULL); 1460 1466 } 1461 1467 else 1462 1468 e->type = type; 1463 1469 } 1464 1470 else if (e1->op == TOKarrayliteral && e2->op == TOKnull && 1465 1471 t1->nextOf()->equals(t2->nextOf())) 1466 1472 { 1467 1473 e = e1; 1468 1474 goto L3; 1469 1475 } 1470 1476 else if (e1->op == TOKnull && e2->op == TOKarrayliteral && 1471 1477 t1->nextOf()->equals(t2->nextOf())) 1472 1478 { 1473 1479 e = e2; 1474 1480 L3: 1475 1481 // Concatenate the array with null 1476 1482 ArrayLiteralExp *es = (ArrayLiteralExp *)e; 1477 1483 1478 1484 es = new ArrayLiteralExp(es->loc, (Expressions *)es->elements->copy()); branches/dmd-1.x/src/utf.c
r493 r825 1 1 // utf.c 2 // Copyright (c) 2003-20 10by Digital Mars2 // Copyright (c) 2003-2009 by Digital Mars 3 3 // All Rights Reserved 4 4 // written by Walter Bright 5 5 // http://www.digitalmars.com 6 6 // License for redistribution is by either the Artistic License 7 7 // in artistic.txt, or the GNU General Public License in gnu.txt. 8 8 // See the included readme.txt for details. 9 9 10 10 // Description of UTF-8 at: 11 11 // http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 12 12 13 13 #include <stdio.h> 14 #include <string.h> 14 15 #include <assert.h> 15 16 16 17 #include "utf.h" 17 18 18 19 int utf_isValidDchar(dchar_t c) 19 20 { 20 21 return c < 0xD800 || 21 22 (c > 0xDFFF && c <= 0x10FFFF && c != 0xFFFE && c != 0xFFFF); 23 } 24 25 static const unsigned char UTF8stride[256] = 26 { 27 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 28 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 29 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 31 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 32 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 33 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 34 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 35 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 36 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 37 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 38 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 39 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 40 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 41 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 42 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, 43 }; 44 45 /** 46 * stride() returns the length of a UTF-8 sequence starting at index i 47 * in string s. 48 * Returns: 49 * The number of bytes in the UTF-8 sequence or 50 * 0xFF meaning s[i] is not the start of of UTF-8 sequence. 51 */ 52 53 unsigned stride(unsigned char* s, size_t i) 54 { 55 unsigned result = UTF8stride[s[i]]; 56 return result; 22 57 } 23 58 24 59 /******************************************** 25 60 * Decode a single UTF-8 character sequence. 26 61 * Returns: 27 62 * NULL success 28 63 * !=NULL error message string 29 64 */ 30 65 31 66 const char *utf_decodeChar(unsigned char *s, size_t len, size_t *pidx, dchar_t *presult) 32 67 { 33 68 dchar_t V; 34 69 size_t i = *pidx; 35 70 unsigned char u = s[i]; 36 71 37 72 //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len); 38 73 39 74 assert(i >= 0 && i < len); 40 75 41 76 if (u & 0x80) … … 176 211 } 177 212 else 178 213 i++; 179 214 } 180 215 else 181 216 { 182 217 i++; 183 218 } 184 219 185 220 assert(utf_isValidDchar(u)); 186 221 *pidx = i; 187 222 *presult = (dchar_t)u; 188 223 return NULL; 189 224 190 225 Lerr: 191 226 *presult = (dchar_t)s[i]; 192 227 *pidx = i + 1; 193 228 return msg; 194 229 } 195 230 231 void utf_encodeChar(unsigned char *s, dchar_t c) 232 { 233 if (c <= 0x7F) 234 { 235 s[0] = (char) c; 236 } 237 else if (c <= 0x7FF) 238 { 239 s[0] = (char)(0xC0 | (c >> 6)); 240 s[1] = (char)(0x80 | (c & 0x3F)); 241 } 242 else if (c <= 0xFFFF) 243 { 244 s[0] = (char)(0xE0 | (c >> 12)); 245 s[1] = (char)(0x80 | ((c >> 6) & 0x3F)); 246 s[2] = (char)(0x80 | (c & 0x3F)); 247 } 248 else if (c <= 0x10FFFF) 249 { 250 s[0] = (char)(0xF0 | (c >> 18)); 251 s[1] = (char)(0x80 | ((c >> 12) & 0x3F)); 252 s[2] = (char)(0x80 | ((c >> 6) & 0x3F)); 253 s[3] = (char)(0x80 | (c & 0x3F)); 254 } 255 else 256 assert(0); 257 } 258 259 void utf_encodeWchar(unsigned short *s, dchar_t c) 260 { 261 if (c <= 0xFFFF) 262 { 263 s[0] = (wchar_t) c; 264 } 265 else 266 { 267 s[0] = (wchar_t) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); 268 s[1] = (wchar_t) (((c - 0x10000) & 0x3FF) + 0xDC00); 269 } 270 } 271 272 273 /** 274 * Returns the code length of c in the encoding. 275 * The code is returned in character count, not in bytes. 276 */ 277 278 int utf_codeLengthChar(dchar_t c) 279 { 280 return 281 c <= 0x7F ? 1 282 : c <= 0x7FF ? 2 283 : c <= 0xFFFF ? 3 284 : c <= 0x10FFFF ? 4 285 : (assert(false), 6); 286 } 287 288 int utf_codeLengthWchar(dchar_t c) 289 { 290 return c <= 0xFFFF ? 1 : 2; 291 } 292 293 /** 294 * Returns the code length of c in the encoding. 295 * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32. 296 * The code is returned in character count, not in bytes. 297 */ 298 int utf_codeLength(int sz, dchar_t c) 299 { 300 if (sz == 1) 301 return utf_codeLengthChar(c); 302 if (sz == 2) 303 return utf_codeLengthWchar(c); 304 assert(sz == 4); 305 return 1; 306 } 307 308 void utf_encode(int sz, void *s, dchar_t c) 309 { 310 if (sz == 1) 311 utf_encodeChar((unsigned char *)s, c); 312 else if (sz == 2) 313 utf_encodeWchar((unsigned short *)s, c); 314 else 315 { 316 assert(sz == 4); 317 memcpy((unsigned char *)s, &c, sz); 318 } 319 } 320 branches/dmd-1.x/src/utf.h
r493 r825 6 6 // http://www.digitalmars.com 7 7 // License for redistribution is by either the Artistic License 8 8 // in artistic.txt, or the GNU General Public License in gnu.txt. 9 9 // See the included readme.txt for details. 10 10 11 11 #ifndef DMD_UTF_H 12 12 #define DMD_UTF_H 13 13 14 14 15 15 typedef unsigned dchar_t; 16 16 17 17 int utf_isValidDchar(dchar_t c); 18 18 19 19 const char *utf_decodeChar(unsigned char *s, size_t len, size_t *pidx, dchar_t *presult); 20 20 const char *utf_decodeWchar(unsigned short *s, size_t len, size_t *pidx, dchar_t *presult); 21 21 22 22 const char *utf_validateString(unsigned char *s, size_t len); 23 23 24 24 extern int isUniAlpha(dchar_t); 25 25 26 void utf_encodeChar(unsigned char *s, dchar_t c); 27 void utf_encodeWchar(unsigned short *s, dchar_t c); 28 29 int utf_codeLengthChar(dchar_t c); 30 int utf_codeLengthWchar(dchar_t c); 31 32 int utf_codeLength(int sz, dchar_t c); 33 void utf_encode(int sz, void *s, dchar_t c); 34 26 35 #endif trunk/src/constfold.c
r670 r825 8 8 // in artistic.txt, or the GNU General Public License in gnu.txt. 9 9 // See the included readme.txt for details. 10 10 11 11 #include <stdio.h> 12 12 #include <stdlib.h> 13 13 #include <assert.h> 14 14 #include <math.h> 15 15 16 16 #if __DMC__ 17 17 #include <complex.h> 18 18 #endif 19 19 20 20 #include "rmem.h" 21 21 #include "root.h" 22 22 #include "port.h" 23 23 24 24 #include "mtype.h" 25 25 #include "expression.h" 26 26 #include "aggregate.h" 27 27 #include "declaration.h" 28 #include "utf.h" 28 29 29 30 #if __FreeBSD__ 30 31 #define fmodl fmod // hack for now, fix later 31 32 #endif 32 33 33 34 #define LOG 0 34 35 35 36 Expression *expType(Type *type, Expression *e) 36 37 { 37 38 if (type != e->type) 38 39 { 39 40 e = e->copy(); 40 41 e->type = type; 41 42 } 42 43 return e; 43 44 } 44 45 45 46 /* ================================== isConst() ============================== */ 46 47 47 48 int Expression::isConst() … … 1336 1337 e->type = type; 1337 1338 } 1338 1339 } 1339 1340 return e; 1340 1341 } 1341 1342 1342 1343 /* Also return EXP_CANT_INTERPRET if this fails 1343 1344 */ 1344 1345 Expression *Cat(Type *type, Expression *e1, Expression *e2) 1345 1346 { Expression *e = EXP_CANT_INTERPRET; 1346 1347 Loc loc = e1->loc; 1347 1348 Type *t; 1348 1349 Type *t1 = e1->type->toBasetype(); 1349 1350 Type *t2 = e2->type->toBasetype(); 1350 1351 1351 1352 //printf("Cat(e1 = %s, e2 = %s)\n", e1->toChars(), e2->toChars()); 1352 1353 //printf("\tt1 = %s, t2 = %s, type = %s\n", t1->toChars(), t2->toChars(), type->toChars()); 1353 1354 1354 1355 if (e1->op == TOKnull && (e2->op == TOKint64 || e2->op == TOKstructliteral)) 1355 1356 { e = e2; 1357 t = t1; 1356 1358 goto L2; 1357 1359 } 1358 1360 else if ((e1->op == TOKint64 || e1->op == TOKstructliteral) && e2->op == TOKnull) 1359 1361 { e = e1; 1362 t = t2; 1360 1363 L2: 1361 1364 Type *tn = e->type->toBasetype(); 1362 1365 if (tn->ty == Tchar || tn->ty == Twchar || tn->ty == Tdchar) 1363 1366 { 1364 1367 // Create a StringExp 1365 1368 void *s; 1366 1369 StringExp *es; 1367 size_t len = 1; 1368 int sz = tn->size(); 1370 if (t->nextOf()) 1371 t = t->nextOf()->toBasetype(); 1372 int sz = t->size(); 1373 1369 1374 dinteger_t v = e->toInteger(); 1370 1375 1376 size_t len = utf_codeLength(sz, v); 1371 1377 s = mem.malloc((len + 1) * sz); 1372 memcpy((unsigned char *)s, &v, sz);1378 utf_encode(sz, s, v); 1373 1379 1374 1380 // Add terminating 0 1375 1381 memset((unsigned char *)s + len * sz, 0, sz); 1376 1382 1377 1383 es = new StringExp(loc, s, len); 1378 1384 es->sz = sz; 1379 1385 es->committed = 1; 1380 1386 e = es; 1381 1387 } 1382 1388 else 1383 1389 { // Create an ArrayLiteralExp 1384 1390 Expressions *elements = new Expressions(); 1385 1391 elements->push(e); 1386 1392 e = new ArrayLiteralExp(e->loc, elements); 1387 1393 } 1388 1394 e->type = type; 1389 1395 return e; 1390 1396 } 1391 1397 else if (e1->op == TOKstring && e2->op == TOKstring) 1392 1398 { … … 1442 1448 dinteger_t v = es2e->toInteger(); 1443 1449 memcpy((unsigned char *)s + (es1->len + i) * sz, &v, sz); 1444 1450 } 1445 1451 1446 1452 // Add terminating 0 1447 1453 memset((unsigned char *)s + len * sz, 0, sz); 1448 1454 1449 1455 StringExp *es = new StringExp(loc, s, len); 1450 1456 es->sz = sz; 1451 1457 es->committed = 0; //es1->committed; 1452 1458 es->type = type; 1453 1459 e = es; 1454 1460 } 1455 1461 else if (e1->op == TOKstring && e2->op == TOKint64) 1456 1462 { 1457 1463 // Concatenate the strings 1458 1464 void *s; 1459 1465 StringExp *es1 = (StringExp *)e1; 1460 1466 StringExp *es; 1461 1467 Type *t; 1462 size_t len = es1->len + 1;1463 1468 int sz = es1->sz; 1464 1469 dinteger_t v = e2->toInteger(); 1465 1470 1471 size_t len = es1->len + utf_codeLength(sz, v); 1466 1472 s = mem.malloc((len + 1) * sz); 1467 1473 memcpy(s, es1->string, es1->len * sz); 1468 memcpy((unsigned char *)s + es1->len * sz, &v, sz);1474 utf_encode(sz, (unsigned char *)s + (sz * es1->len), v); 1469 1475 1470 1476 // Add terminating 0 1471 1477 memset((unsigned char *)s + len * sz, 0, sz); 1472 1478 1473 1479 es = new StringExp(loc, s, len); 1474 1480 es->sz = sz; 1475 1481 es->committed = es1->committed; 1476 1482 t = es1->type; 1477 1483 es->type = type; 1478 1484 e = es; 1479 1485 } 1480 1486 else if (e1->op == TOKint64 && e2->op == TOKstring) 1481 1487 { 1482 1488 // Concatenate the strings 1483 1489 void *s; 1484 1490 StringExp *es2 = (StringExp *)e2; 1485 1491 StringExp *es; 1486 1492 Type *t; 1487 1493 size_t len = 1 + es2->len; 1488 1494 int sz = es2->sz; trunk/src/utf.c
r436 r825 1 1 // utf.c 2 2 // Copyright (c) 2003-2009 by Digital Mars 3 3 // All Rights Reserved 4 4 // written by Walter Bright 5 5 // http://www.digitalmars.com 6 6 // License for redistribution is by either the Artistic License 7 7 // in artistic.txt, or the GNU General Public License in gnu.txt. 8 8 // See the included readme.txt for details. 9 9 10 10 // Description of UTF-8 at: 11 11 // http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 12 12 13 13 #include <stdio.h> 14 #include <string.h> 14 15 #include <assert.h> 15 16 16 17 #include "utf.h" 17 18 18 19 int utf_isValidDchar(dchar_t c) 19 20 { 20 21 return c < 0xD800 || 21 22 (c > 0xDFFF && c <= 0x10FFFF && c != 0xFFFE && c != 0xFFFF); 22 23 } 23 24 24 25 static const unsigned char UTF8stride[256] = 25 26 { 26 27 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 27 28 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 28 29 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 29 30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 30 31 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 31 32 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 32 33 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 33 34 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, … … 210 211 } 211 212 else 212 213 i++; 213 214 } 214 215 else 215 216 { 216 217 i++; 217 218 } 218 219 219 220 assert(utf_isValidDchar(u)); 220 221 *pidx = i; 221 222 *presult = (dchar_t)u; 222 223 return NULL; 223 224 224 225 Lerr: 225 226 *presult = (dchar_t)s[i]; 226 227 *pidx = i + 1; 227 228 return msg; 228 229 } 229 230 231 void utf_encodeChar(unsigned char *s, dchar_t c) 232 { 233 if (c <= 0x7F) 234 { 235 s[0] = (char) c; 236 } 237 else if (c <= 0x7FF) 238 { 239 s[0] = (char)(0xC0 | (c >> 6)); 240 s[1] = (char)(0x80 | (c & 0x3F)); 241 } 242 else if (c <= 0xFFFF) 243 { 244 s[0] = (char)(0xE0 | (c >> 12)); 245 s[1] = (char)(0x80 | ((c >> 6) & 0x3F)); 246 s[2] = (char)(0x80 | (c & 0x3F)); 247 } 248 else if (c <= 0x10FFFF) 249 { 250 s[0] = (char)(0xF0 | (c >> 18)); 251 s[1] = (char)(0x80 | ((c >> 12) & 0x3F)); 252 s[2] = (char)(0x80 | ((c >> 6) & 0x3F)); 253 s[3] = (char)(0x80 | (c & 0x3F)); 254 } 255 else 256 assert(0); 257 } 258 259 void utf_encodeWchar(unsigned short *s, dchar_t c) 260 { 261 if (c <= 0xFFFF) 262 { 263 s[0] = (wchar_t) c; 264 } 265 else 266 { 267 s[0] = (wchar_t) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); 268 s[1] = (wchar_t) (((c - 0x10000) & 0x3FF) + 0xDC00); 269 } 270 } 271 272 273 /** 274 * Returns the code length of c in the encoding. 275 * The code is returned in character count, not in bytes. 276 */ 277 278 int utf_codeLengthChar(dchar_t c) 279 { 280 return 281 c <= 0x7F ? 1 282 : c <= 0x7FF ? 2 283 : c <= 0xFFFF ? 3 284 : c <= 0x10FFFF ? 4 285 : (assert(false), 6); 286 } 287 288 int utf_codeLengthWchar(dchar_t c) 289 { 290 return c <= 0xFFFF ? 1 : 2; 291 } 292 293 /** 294 * Returns the code length of c in the encoding. 295 * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32. 296 * The code is returned in character count, not in bytes. 297 */ 298 int utf_codeLength(int sz, dchar_t c) 299 { 300 if (sz == 1) 301 return utf_codeLengthChar(c); 302 if (sz == 2) 303 return utf_codeLengthWchar(c); 304 assert(sz == 4); 305 return 1; 306 } 307 308 void utf_encode(int sz, void *s, dchar_t c) 309 { 310 if (sz == 1) 311 utf_encodeChar((unsigned char *)s, c); 312 else if (sz == 2) 313 utf_encodeWchar((unsigned short *)s, c); 314 else 315 { 316 assert(sz == 4); 317 memcpy((unsigned char *)s, &c, sz); 318 } 319 } 320 trunk/src/utf.h
r189 r825 1 1 // Compiler implementation of the D programming language 2 2 // utf.h 3 // Copyright (c) 2003-20 08by Digital Mars3 // Copyright (c) 2003-2010 by Digital Mars 4 4 // All Rights Reserved 5 5 // written by Walter Bright 6 6 // http://www.digitalmars.com 7 7 // License for redistribution is by either the Artistic License 8 8 // in artistic.txt, or the GNU General Public License in gnu.txt. 9 9 // See the included readme.txt for details. 10 10 11 11 #ifndef DMD_UTF_H 12 12 #define DMD_UTF_H 13 13 14 14 15 15 typedef unsigned dchar_t; 16 16 17 17 int utf_isValidDchar(dchar_t c); 18 18 19 19 const char *utf_decodeChar(unsigned char *s, size_t len, size_t *pidx, dchar_t *presult); 20 20 const char *utf_decodeWchar(unsigned short *s, size_t len, size_t *pidx, dchar_t *presult); 21 21 22 22 const char *utf_validateString(unsigned char *s, size_t len); 23 23 24 24 extern int isUniAlpha(dchar_t); 25 25 26 void utf_encodeChar(unsigned char *s, dchar_t c); 27 void utf_encodeWchar(unsigned short *s, dchar_t c); 28 29 int utf_codeLengthChar(dchar_t c); 30 int utf_codeLengthWchar(dchar_t c); 31 32 int utf_codeLength(int sz, dchar_t c); 33 void utf_encode(int sz, void *s, dchar_t c); 34 26 35 #endif
