Changeset 834
- Timestamp:
- 08/10/08 14:26:50 (4 months ago)
- Files:
-
- branches/phobos-1.x/phobos/internal/arraybyte.d (modified) (14 diffs)
- branches/phobos-1.x/phobos/internal/arraydouble.d (modified) (26 diffs)
- branches/phobos-1.x/phobos/internal/arrayfloat.d (modified) (16 diffs)
- branches/phobos-1.x/phobos/internal/arrayint.d (modified) (45 diffs)
- branches/phobos-1.x/phobos/internal/arrayreal.d (modified) (3 diffs)
- branches/phobos-1.x/phobos/internal/arrayshort.d (modified) (41 diffs)
- branches/phobos-1.x/phobos/std/math.d (modified) (1 diff)
- branches/phobos-1.x/phobos/std/thread.d (modified) (2 diffs)
- branches/phobos-1.x/phobos/win32.mak (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/phobos-1.x/phobos/internal/arraybyte.d
r823 r834 32 32 } 33 33 34 version = log;34 //version = log; 35 35 36 36 bool disjoint(T)(T[] a, T[] b) … … 250 250 T[] a = new T[dim + j]; // aligned on 16 byte boundary 251 251 a = a[j .. dim + j]; // misalign for second iteration 252 T[] b = new T[dim]; 253 T[] c = new T[dim]; 252 T[] b = new T[dim + j]; 253 b = b[j .. dim + j]; 254 T[] c = new T[dim + j]; 255 c = c[j .. dim + j]; 254 256 255 257 for (int i = 0; i < dim; i++) … … 459 461 T[] a = new T[dim + j]; // aligned on 16 byte boundary 460 462 a = a[j .. dim + j]; // misalign for second iteration 461 T[] b = new T[dim]; 462 T[] c = new T[dim]; 463 T[] b = new T[dim + j]; 464 b = b[j .. dim + j]; 465 T[] c = new T[dim + j]; 466 c = c[j .. dim + j]; 463 467 464 468 for (int i = 0; i < dim; i++) … … 627 631 unittest 628 632 { 629 printf("_arrayExpSliceAddass SliceAssign_g unittest\n");633 printf("_arrayExpSliceAddass_g unittest\n"); 630 634 631 635 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) … … 638 642 T[] a = new T[dim + j]; // aligned on 16 byte boundary 639 643 a = a[j .. dim + j]; // misalign for second iteration 640 T[] b = new T[dim]; 641 T[] c = new T[dim]; 644 T[] b = new T[dim + j]; 645 b = b[j .. dim + j]; 646 T[] c = new T[dim + j]; 647 c = c[j .. dim + j]; 642 648 643 649 for (int i = 0; i < dim; i++) … … 822 828 unittest 823 829 { 824 printf("_arraySliceSliceAddass SliceAssign_g unittest\n");830 printf("_arraySliceSliceAddass_g unittest\n"); 825 831 826 832 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) … … 833 839 T[] a = new T[dim + j]; // aligned on 16 byte boundary 834 840 a = a[j .. dim + j]; // misalign for second iteration 835 T[] b = new T[dim]; 836 T[] c = new T[dim]; 841 T[] b = new T[dim + j]; 842 b = b[j .. dim + j]; 843 T[] c = new T[dim + j]; 844 c = c[j .. dim + j]; 837 845 838 846 for (int i = 0; i < dim; i++) … … 1060 1068 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1061 1069 a = a[j .. dim + j]; // misalign for second iteration 1062 T[] b = new T[dim]; 1063 T[] c = new T[dim]; 1070 T[] b = new T[dim + j]; 1071 b = b[j .. dim + j]; 1072 T[] c = new T[dim + j]; 1073 c = c[j .. dim + j]; 1064 1074 1065 1075 for (int i = 0; i < dim; i++) … … 1268 1278 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1269 1279 a = a[j .. dim + j]; // misalign for second iteration 1270 T[] b = new T[dim]; 1271 T[] c = new T[dim]; 1280 T[] b = new T[dim + j]; 1281 b = b[j .. dim + j]; 1282 T[] c = new T[dim + j]; 1283 c = c[j .. dim + j]; 1272 1284 1273 1285 for (int i = 0; i < dim; i++) … … 1473 1485 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1474 1486 a = a[j .. dim + j]; // misalign for second iteration 1475 T[] b = new T[dim]; 1476 T[] c = new T[dim]; 1487 T[] b = new T[dim + j]; 1488 b = b[j .. dim + j]; 1489 T[] c = new T[dim + j]; 1490 c = c[j .. dim + j]; 1477 1491 1478 1492 for (int i = 0; i < dim; i++) … … 1641 1655 unittest 1642 1656 { 1643 printf("_arrayExpSliceMinass SliceAssign_g unittest\n");1657 printf("_arrayExpSliceMinass_g unittest\n"); 1644 1658 1645 1659 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) … … 1652 1666 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1653 1667 a = a[j .. dim + j]; // misalign for second iteration 1654 T[] b = new T[dim]; 1655 T[] c = new T[dim]; 1668 T[] b = new T[dim + j]; 1669 b = b[j .. dim + j]; 1670 T[] c = new T[dim + j]; 1671 c = c[j .. dim + j]; 1656 1672 1657 1673 for (int i = 0; i < dim; i++) … … 1836 1852 unittest 1837 1853 { 1838 printf("_arraySliceSliceMinass SliceAssign_g unittest\n");1854 printf("_arraySliceSliceMinass_g unittest\n"); 1839 1855 1840 1856 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) … … 1847 1863 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1848 1864 a = a[j .. dim + j]; // misalign for second iteration 1849 T[] b = new T[dim]; 1850 T[] c = new T[dim]; 1865 T[] b = new T[dim + j]; 1866 b = b[j .. dim + j]; 1867 T[] c = new T[dim + j]; 1868 c = c[j .. dim + j]; 1851 1869 1852 1870 for (int i = 0; i < dim; i++) branches/phobos-1.x/phobos/internal/arraydouble.d
r823 r834 131 131 T[] a = new T[dim + j]; // aligned on 16 byte boundary 132 132 a = a[j .. dim + j]; // misalign for second iteration 133 T[] b = new T[dim]; 134 T[] c = new T[dim]; 133 T[] b = new T[dim + j]; 134 b = b[j .. dim + j]; 135 T[] c = new T[dim + j]; 136 c = c[j .. dim + j]; 135 137 136 138 for (int i = 0; i < dim; i++) … … 242 244 T[] a = new T[dim + j]; // aligned on 16 byte boundary 243 245 a = a[j .. dim + j]; // misalign for second iteration 244 T[] b = new T[dim]; 245 T[] c = new T[dim]; 246 T[] b = new T[dim + j]; 247 b = b[j .. dim + j]; 248 T[] c = new T[dim + j]; 249 c = c[j .. dim + j]; 246 250 247 251 for (int i = 0; i < dim; i++) … … 345 349 T[] a = new T[dim + j]; // aligned on 16 byte boundary 346 350 a = a[j .. dim + j]; // misalign for second iteration 347 T[] b = new T[dim]; 348 T[] c = new T[dim]; 351 T[] b = new T[dim + j]; 352 b = b[j .. dim + j]; 353 T[] c = new T[dim + j]; 354 c = c[j .. dim + j]; 349 355 350 356 for (int i = 0; i < dim; i++) … … 386 392 if (sse2() && a.length >= 8) 387 393 { 388 // align pointer 389 auto n = cast(T*)((cast(uint)aptr + 7) & ~7); 390 while (aptr < n) 391 *aptr++ += value; 392 n = cast(T*)((cast(uint)aend) & ~7); 394 auto n = cast(T*)((cast(uint)aend) & ~7); 393 395 if (aptr < n) 394 396 395 // Aligned case397 // Unaligned case 396 398 asm 397 399 { … … 403 405 align 8; 404 406 startsseloopa: 405 mov apd XMM0, [ESI];406 mov apd XMM1, [ESI+16];407 mov apd XMM2, [ESI+32];408 mov apd XMM3, [ESI+48];407 movupd XMM0, [ESI]; 408 movupd XMM1, [ESI+16]; 409 movupd XMM2, [ESI+32]; 410 movupd XMM3, [ESI+48]; 409 411 add ESI, 64; 410 412 addpd XMM0, XMM4; … … 412 414 addpd XMM2, XMM4; 413 415 addpd XMM3, XMM4; 414 mov apd [ESI+ 0-64], XMM0;415 mov apd [ESI+16-64], XMM1;416 mov apd [ESI+32-64], XMM2;417 mov apd [ESI+48-64], XMM3;416 movupd [ESI+ 0-64], XMM0; 417 movupd [ESI+16-64], XMM1; 418 movupd [ESI+32-64], XMM2; 419 movupd [ESI+48-64], XMM3; 418 420 cmp ESI, EDI; 419 421 jb startsseloopa; … … 442 444 T[] a = new T[dim + j]; // aligned on 16 byte boundary 443 445 a = a[j .. dim + j]; // misalign for second iteration 444 T[] b = new T[dim]; 445 T[] c = new T[dim]; 446 T[] b = new T[dim + j]; 447 b = b[j .. dim + j]; 448 T[] c = new T[dim + j]; 449 c = c[j .. dim + j]; 446 450 447 451 for (int i = 0; i < dim; i++) … … 547 551 T[] a = new T[dim + j]; // aligned on 16 byte boundary 548 552 a = a[j .. dim + j]; // misalign for second iteration 549 T[] b = new T[dim]; 550 T[] c = new T[dim]; 553 T[] b = new T[dim + j]; 554 b = b[j .. dim + j]; 555 T[] c = new T[dim + j]; 556 c = c[j .. dim + j]; 551 557 552 558 for (int i = 0; i < dim; i++) … … 650 656 T[] a = new T[dim + j]; // aligned on 16 byte boundary 651 657 a = a[j .. dim + j]; // misalign for second iteration 652 T[] b = new T[dim]; 653 T[] c = new T[dim]; 658 T[] b = new T[dim + j]; 659 b = b[j .. dim + j]; 660 T[] c = new T[dim + j]; 661 c = c[j .. dim + j]; 654 662 655 663 for (int i = 0; i < dim; i++) … … 756 764 T[] a = new T[dim + j]; // aligned on 16 byte boundary 757 765 a = a[j .. dim + j]; // misalign for second iteration 758 T[] b = new T[dim]; 759 T[] c = new T[dim]; 766 T[] b = new T[dim + j]; 767 b = b[j .. dim + j]; 768 T[] c = new T[dim + j]; 769 c = c[j .. dim + j]; 760 770 761 771 for (int i = 0; i < dim; i++) … … 797 807 if (sse2() && a.length >= 8) 798 808 { 799 // align pointer 800 auto n = cast(T*)((cast(uint)aptr + 7) & ~7); 801 while (aptr < n) 802 *aptr++ -= value; 803 n = cast(T*)((cast(uint)aend) & ~7); 809 auto n = cast(T*)((cast(uint)aend) & ~7); 804 810 if (aptr < n) 805 811 806 // Aligned case812 // Unaligned case 807 813 asm 808 814 { … … 814 820 align 8; 815 821 startsseloopa: 816 mov apd XMM0, [ESI];817 mov apd XMM1, [ESI+16];818 mov apd XMM2, [ESI+32];819 mov apd XMM3, [ESI+48];822 movupd XMM0, [ESI]; 823 movupd XMM1, [ESI+16]; 824 movupd XMM2, [ESI+32]; 825 movupd XMM3, [ESI+48]; 820 826 add ESI, 64; 821 827 subpd XMM0, XMM4; … … 823 829 subpd XMM2, XMM4; 824 830 subpd XMM3, XMM4; 825 mov apd [ESI+ 0-64], XMM0;826 mov apd [ESI+16-64], XMM1;827 mov apd [ESI+32-64], XMM2;828 mov apd [ESI+48-64], XMM3;831 movupd [ESI+ 0-64], XMM0; 832 movupd [ESI+16-64], XMM1; 833 movupd [ESI+32-64], XMM2; 834 movupd [ESI+48-64], XMM3; 829 835 cmp ESI, EDI; 830 836 jb startsseloopa; … … 843 849 unittest 844 850 { 845 printf("_arrayExpSlice minass_d unittest\n");851 printf("_arrayExpSliceMinass_d unittest\n"); 846 852 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) 847 853 { … … 853 859 T[] a = new T[dim + j]; // aligned on 16 byte boundary 854 860 a = a[j .. dim + j]; // misalign for second iteration 855 T[] b = new T[dim]; 856 T[] c = new T[dim]; 861 T[] b = new T[dim + j]; 862 b = b[j .. dim + j]; 863 T[] c = new T[dim + j]; 864 c = c[j .. dim + j]; 857 865 858 866 for (int i = 0; i < dim; i++) … … 958 966 T[] a = new T[dim + j]; // aligned on 16 byte boundary 959 967 a = a[j .. dim + j]; // misalign for second iteration 960 T[] b = new T[dim]; 961 T[] c = new T[dim]; 968 T[] b = new T[dim + j]; 969 b = b[j .. dim + j]; 970 T[] c = new T[dim + j]; 971 c = c[j .. dim + j]; 962 972 963 973 for (int i = 0; i < dim; i++) … … 1061 1071 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1062 1072 a = a[j .. dim + j]; // misalign for second iteration 1063 T[] b = new T[dim]; 1064 T[] c = new T[dim]; 1073 T[] b = new T[dim + j]; 1074 b = b[j .. dim + j]; 1075 T[] c = new T[dim + j]; 1076 c = c[j .. dim + j]; 1065 1077 1066 1078 for (int i = 0; i < dim; i++) … … 1171 1183 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1172 1184 a = a[j .. dim + j]; // misalign for second iteration 1173 T[] b = new T[dim]; 1174 T[] c = new T[dim]; 1185 T[] b = new T[dim + j]; 1186 b = b[j .. dim + j]; 1187 T[] c = new T[dim + j]; 1188 c = c[j .. dim + j]; 1175 1189 1176 1190 for (int i = 0; i < dim; i++) … … 1212 1226 if (sse2() && a.length >= 8) 1213 1227 { 1214 // align pointer 1215 auto n = cast(T*)((cast(uint)aptr + 7) & ~7); 1216 while (aptr < n) 1217 *aptr++ *= value; 1218 n = cast(T*)((cast(uint)aend) & ~7); 1228 auto n = cast(T*)((cast(uint)aend) & ~7); 1219 1229 if (aptr < n) 1220 1230 1221 // Aligned case1231 // Unaligned case 1222 1232 asm 1223 1233 { … … 1229 1239 align 8; 1230 1240 startsseloopa: 1231 mov apd XMM0, [ESI];1232 mov apd XMM1, [ESI+16];1233 mov apd XMM2, [ESI+32];1234 mov apd XMM3, [ESI+48];1241 movupd XMM0, [ESI]; 1242 movupd XMM1, [ESI+16]; 1243 movupd XMM2, [ESI+32]; 1244 movupd XMM3, [ESI+48]; 1235 1245 add ESI, 64; 1236 1246 mulpd XMM0, XMM4; … … 1238 1248 mulpd XMM2, XMM4; 1239 1249 mulpd XMM3, XMM4; 1240 mov apd [ESI+ 0-64], XMM0;1241 mov apd [ESI+16-64], XMM1;1242 mov apd [ESI+32-64], XMM2;1243 mov apd [ESI+48-64], XMM3;1250 movupd [ESI+ 0-64], XMM0; 1251 movupd [ESI+16-64], XMM1; 1252 movupd [ESI+32-64], XMM2; 1253 movupd [ESI+48-64], XMM3; 1244 1254 cmp ESI, EDI; 1245 1255 jb startsseloopa; … … 1268 1278 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1269 1279 a = a[j .. dim + j]; // misalign for second iteration 1270 T[] b = new T[dim]; 1271 T[] c = new T[dim]; 1280 T[] b = new T[dim + j]; 1281 b = b[j .. dim + j]; 1282 T[] c = new T[dim + j]; 1283 c = c[j .. dim + j]; 1272 1284 1273 1285 for (int i = 0; i < dim; i++) … … 1373 1385 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1374 1386 a = a[j .. dim + j]; // misalign for second iteration 1375 T[] b = new T[dim]; 1376 T[] c = new T[dim]; 1387 T[] b = new T[dim + j]; 1388 b = b[j .. dim + j]; 1389 T[] c = new T[dim + j]; 1390 c = c[j .. dim + j]; 1377 1391 1378 1392 for (int i = 0; i < dim; i++) … … 1490 1504 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1491 1505 a = a[j .. dim + j]; // misalign for second iteration 1492 T[] b = new T[dim]; 1493 T[] c = new T[dim]; 1506 T[] b = new T[dim + j]; 1507 b = b[j .. dim + j]; 1508 T[] c = new T[dim + j]; 1509 c = c[j .. dim + j]; 1494 1510 1495 1511 for (int i = 0; i < dim; i++) … … 1595 1611 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1596 1612 a = a[j .. dim + j]; // misalign for second iteration 1597 T[] b = new T[dim]; 1598 T[] c = new T[dim]; 1613 T[] b = new T[dim + j]; 1614 b = b[j .. dim + j]; 1615 T[] c = new T[dim + j]; 1616 c = c[j .. dim + j]; 1599 1617 1600 1618 for (int i = 0; i < dim; i++) … … 1620 1638 1621 1639 1640 /* ======================================================================== */ 1641 1642 /*********************** 1643 * Computes: 1644 * a[] -= b[] * value 1645 */ 1646 1647 T[] _arraySliceExpMulSliceMinass_d(T[] a, T value, T[] b) 1648 { 1649 return _arraySliceExpMulSliceAddass_d(a, -value, b); 1650 } 1651 1652 /*********************** 1653 * Computes: 1654 * a[] += b[] * value 1655 */ 1656 1657 T[] _arraySliceExpMulSliceAddass_d(T[] a, T value, T[] b) 1658 in 1659 { 1660 assert(a.length == b.length); 1661 assert(disjoint(a, b)); 1662 } 1663 body 1664 { 1665 auto aptr = a.ptr; 1666 auto aend = aptr + a.length; 1667 auto bptr = b.ptr; 1668 1669 // Handle remainder 1670 while (aptr < aend) 1671 *aptr++ += *bptr++ * value; 1672 1673 return a; 1674 } 1675 1676 unittest 1677 { 1678 printf("_arraySliceExpMulSliceAddass_d unittest\n"); 1679 1680 cpuid = 1; 1681 { 1682 version (log) printf(" cpuid %d\n", cpuid); 1683 1684 for (int j = 0; j < 1; j++) 1685 { 1686 const int dim = 67; 1687 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1688 a = a[j .. dim + j]; // misalign for second iteration 1689 T[] b = new T[dim + j]; 1690 b = b[j .. dim + j]; 1691 T[] c = new T[dim + j]; 1692 c = c[j .. dim + j]; 1693 1694 for (int i = 0; i < dim; i++) 1695 { a[i] = cast(T)i; 1696 b[i] = cast(T)(i + 7); 1697 c[i] = cast(T)(i * 2); 1698 } 1699 1700 b[] = c[]; 1701 c[] += a[] * 6; 1702 1703 for (int i = 0; i < dim; i++) 1704 { 1705 //printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); 1706 if (c[i] != cast(T)(b[i] + a[i] * 6)) 1707 { 1708 printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); 1709 assert(0); 1710 } 1711 } 1712 } 1713 } 1714 } 1715 branches/phobos-1.x/phobos/internal/arrayfloat.d
r823 r834 171 171 T[] a = new T[dim + j]; // aligned on 16 byte boundary 172 172 a = a[j .. dim + j]; // misalign for second iteration 173 T[] b = new T[dim]; 174 T[] c = new T[dim]; 173 T[] b = new T[dim + j]; 174 b = b[j .. dim + j]; 175 T[] c = new T[dim + j]; 176 c = c[j .. dim + j]; 175 177 176 178 for (int i = 0; i < dim; i++) … … 321 323 T[] a = new T[dim + j]; // aligned on 16 byte boundary 322 324 a = a[j .. dim + j]; // misalign for second iteration 323 T[] b = new T[dim]; 324 T[] c = new T[dim]; 325 T[] b = new T[dim + j]; 326 b = b[j .. dim + j]; 327 T[] c = new T[dim + j]; 328 c = c[j .. dim + j]; 325 329 326 330 for (int i = 0; i < dim; i++) … … 463 467 T[] a = new T[dim + j]; // aligned on 16 byte boundary 464 468 a = a[j .. dim + j]; // misalign for second iteration 465 T[] b = new T[dim]; 466 T[] c = new T[dim]; 469 T[] b = new T[dim + j]; 470 b = b[j .. dim + j]; 471 T[] c = new T[dim + j]; 472 c = c[j .. dim + j]; 467 473 468 474 for (int i = 0; i < dim; i++) … … 597 603 T[] a = new T[dim + j]; // aligned on 16 byte boundary 598 604 a = a[j .. dim + j]; // misalign for second iteration 599 T[] b = new T[dim]; 600 T[] c = new T[dim]; 605 T[] b = new T[dim + j]; 606 b = b[j .. dim + j]; 607 T[] c = new T[dim + j]; 608 c = c[j .. dim + j]; 601 609 602 610 for (int i = 0; i < dim; i++) … … 738 746 T[] a = new T[dim + j]; // aligned on 16 byte boundary 739 747 a = a[j .. dim + j]; // misalign for second iteration 740 T[] b = new T[dim]; 741 T[] c = new T[dim]; 748 T[] b = new T[dim + j]; 749 b = b[j .. dim + j]; 750 T[] c = new T[dim + j]; 751 c = c[j .. dim + j]; 742 752 743 753 for (int i = 0; i < dim; i++) … … 882 892 T[] a = new T[dim + j]; // aligned on 16 byte boundary 883 893 a = a[j .. dim + j]; // misalign for second iteration 884 T[] b = new T[dim]; 885 T[] c = new T[dim]; 894 T[] b = new T[dim + j]; 895 b = b[j .. dim + j]; 896 T[] c = new T[dim + j]; 897 c = c[j .. dim + j]; 886 898 887 899 for (int i = 0; i < dim; i++) … … 1028 1040 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1029 1041 a = a[j .. dim + j]; // misalign for second iteration 1030 T[] b = new T[dim]; 1031 T[] c = new T[dim]; 1042 T[] b = new T[dim + j]; 1043 b = b[j .. dim + j]; 1044 T[] c = new T[dim + j]; 1045 c = c[j .. dim + j]; 1032 1046 1033 1047 for (int i = 0; i < dim; i++) … … 1162 1176 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1163 1177 a = a[j .. dim + j]; // misalign for second iteration 1164 T[] b = new T[dim]; 1165 T[] c = new T[dim]; 1178 T[] b = new T[dim + j]; 1179 b = b[j .. dim + j]; 1180 T[] c = new T[dim + j]; 1181 c = c[j .. dim + j]; 1166 1182 1167 1183 for (int i = 0; i < dim; i++) … … 1303 1319 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1304 1320 a = a[j .. dim + j]; // misalign for second iteration 1305 T[] b = new T[dim]; 1306 T[] c = new T[dim]; 1321 T[] b = new T[dim + j]; 1322 b = b[j .. dim + j]; 1323 T[] c = new T[dim + j]; 1324 c = c[j .. dim + j]; 1307 1325 1308 1326 for (int i = 0; i < dim; i++) … … 1446 1464 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1447 1465 a = a[j .. dim + j]; // misalign for second iteration 1448 T[] b = new T[dim]; 1449 T[] c = new T[dim]; 1466 T[] b = new T[dim + j]; 1467 b = b[j .. dim + j]; 1468 T[] c = new T[dim + j]; 1469 c = c[j .. dim + j]; 1450 1470 1451 1471 for (int i = 0; i < dim; i++) … … 1595 1615 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1596 1616 a = a[j .. dim + j]; // misalign for second iteration 1597 T[] b = new T[dim]; 1598 T[] c = new T[dim]; 1617 T[] b = new T[dim + j]; 1618 b = b[j .. dim + j]; 1619 T[] c = new T[dim + j]; 1620 c = c[j .. dim + j]; 1599 1621 1600 1622 for (int i = 0; i < dim; i++) … … 1729 1751 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1730 1752 a = a[j .. dim + j]; // misalign for second iteration 1731 T[] b = new T[dim]; 1732 T[] c = new T[dim]; 1753 T[] b = new T[dim + j]; 1754 b = b[j .. dim + j]; 1755 T[] c = new T[dim + j]; 1756 c = c[j .. dim + j]; 1733 1757 1734 1758 for (int i = 0; i < dim; i++) … … 1870 1894 T[] a = new T[dim + j]; // aligned on 16 byte boundary 1871 1895 a = a[j .. dim + j]; // misalign for second iteration 1872 T[] b = new T[dim]; 1873 T[] c = new T[dim]; 1896 T[] b = new T[dim + j]; 1897 b = b[j .. dim + j]; 1898 T[] c = new T[dim + j]; 1899 c = c[j .. dim + j]; 1874 1900 1875 1901 for (int i = 0; i < dim; i++) … … 2026 2052 T[] a = new T[dim + j]; // aligned on 16 byte boundary 2027 2053 a = a[j .. dim + j]; // misalign for second iteration 2028 T[] b = new T[dim]; 2029 T[] c = new T[dim]; 2054 T[] b = new T[dim + j]; 2055 b = b[j .. dim + j]; 2056 T[] c = new T[dim + j]; 2057 c = c[j .. dim + j]; 2030 2058 2031 2059 for (int i = 0; i < dim; i++) … … 2172 2200 T[] a = new T[dim + j]; // aligned on 16 byte boundary 2173 2201 a = a[j .. dim + j]; // misalign for second iteration 2174 T[] b = new T[dim]; 2175 T[] c = new T[dim]; 2202 T[] b = new T[dim + j]; 2203 b = b[j .. dim + j]; 2204 T[] c = new T[dim + j]; 2205 c = c[j .. dim + j]; 2176 2206 2177 2207 for (int i = 0; i < dim; i++) … … 2197 2227 2198 2228 2229 /* ======================================================================== */ 2230 2231 /*********************** 2232 * Computes: 2233 * a[] -= b[] * value 2234 */ 2235 2236 T[] _arraySliceExpMulSliceMinass_f(T[] a, T value, T[] b) 2237 { 2238 return _arraySliceExpMulSliceAddass_f(a, -value, b); 2239 } 2240 2241 /*********************** 2242 * Computes: 2243 * a[] += b[] * value 2244 */ 2245 2246 T[] _arraySliceExpMulSliceAddass_f(T[] a, T value, T[] b) 2247 in 2248 { 2249 assert(a.length == b.length); 2250 assert(disjoint(a, b)); 2251 } 2252 body 2253 { 2254 auto aptr = a.ptr; 2255 auto aend = aptr + a.length; 2256 auto bptr = b.ptr; 2257 2258 // Handle remainder 2259 while (aptr < aend) 2260 *aptr++ += *bptr++ * value; 2261 2262 return a; 2263 } 2264 2265 unittest 2266 { 2267 printf("_arraySliceExpMulSliceAddass_f unittest\n"); 2268 2269 cpuid = 1; 2270 { 2271 version (log) printf(" cpuid %d\n", cpuid); 2272 2273 for (int j = 0; j < 1; j++) 2274 { 2275 const int dim = 67; 2276 T[] a = new T[dim + j]; // aligned on 16 byte boundary 2277 a = a[j .. dim + j]; // misalign for second iteration 2278 T[] b = new T[dim + j]; 2279 b = b[j .. dim + j]; 2280 T[] c = new T[dim + j]; 2281 c = c[j .. dim + j]; 2282 2283 for (int i = 0; i < dim; i++) 2284 { a[i] = cast(T)i; 2285 b[i] = cast(T)(i + 7); 2286 c[i] = cast(T)(i * 2); 2287 } 2288 2289 b[] = c[]; 2290 c[] += a[] * 6; 2291 2292 for (int i = 0; i < dim; i++) 2293 { 2294 //printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); 2295 if (c[i] != cast(T)(b[i] + a[i] * 6)) 2296 { 2297 printf("[%d]: %g ?= %g + %g * 6\n", i, c[i], b[i], a[i]); 2298 assert(0); 2299 } 2300 } 2301 } 2302 } 2303 } 2304 2305 branches/phobos-1.x/phobos/internal/arrayint.d
r823 r834 76 76 { 77 77 // SSE2 aligned version is 380% faster 78 if (s td.cpuid.sse2() && a.length >= 8)78 if (sse2() && a.length >= 8) 79 79 { 80 80 auto n = aptr + (a.length & ~7); … … 139 139 else 140 140 // MMX version is 298% faster 141 if ( std.cpuid.mmx() && a.length >= 4)141 if (mmx() && a.length >= 4) 142 142 { 143 143 auto n = aptr + (a.length & ~3); … … 207 207 } 208 208 209 unittest 210 { 211 printf("_arraySliceExpAddSliceAssign_i unittest\n"); 212 213 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) 214 { 215 version (log) printf(" cpuid %d\n", cpuid); 216 217 for (int j = 0; j < 2; j++) 218 { 219 const int dim = 67; 220 T[] a = new T[dim + j]; // aligned on 16 byte boundary 221 a = a[j .. dim + j]; // misalign for second iteration 222 T[] b = new T[dim + j]; 223 b = b[j .. dim + j]; 224 T[] c = new T[dim + j]; 225 c = c[j .. dim + j]; 226 227 for (int i = 0; i < dim; i++) 228 { a[i] = cast(T)i; 229 b[i] = cast(T)(i + 7); 230 c[i] = cast(T)(i * 2); 231 } 232 233 c[] = a[] + 6; 234 235 for (int i = 0; i < dim; i++) 236 { 237 if (c[i] != cast(T)(a[i] + 6)) 238 { 239 printf("[%d]: %d != %d + 6\n", i, c[i], a[i]); 240 assert(0); 241 } 242 } 243 } 244 } 245 } 246 247 209 248 /* ======================================================================== */ 210 249 … … 243 282 { 244 283 // SSE2 aligned version is 1710% faster 245 if (s td.cpuid.sse2() && a.length >= 8)284 if (sse2() && a.length >= 8) 246 285 { 247 286 auto n = aptr + (a.length & ~7); … … 310 349 else 311 350 // MMX version is 995% faster 312 if ( std.cpuid.mmx() && a.length >= 4)351 if (mmx() && a.length >= 4) 313 352 { 314 353 auto n = aptr + (a.length & ~3); … … 352 391 } 353 392 393 unittest 394 { 395 printf("_arraySliceSliceAddSliceAssign_i unittest\n"); 396 397 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) 398 { 399 version (log) printf(" cpuid %d\n", cpuid); 400 401 for (int j = 0; j < 2; j++) 402 { 403 const int dim = 67; 404 T[] a = new T[dim + j]; // aligned on 16 byte boundary 405 a = a[j .. dim + j]; // misalign for second iteration 406 T[] b = new T[dim + j]; 407 b = b[j .. dim + j]; 408 T[] c = new T[dim + j]; 409 c = c[j .. dim + j]; 410 411 for (int i = 0; i < dim; i++) 412 { a[i] = cast(T)i; 413 b[i] = cast(T)(i + 7); 414 c[i] = cast(T)(i * 2); 415 } 416 417 c[] = a[] + b[]; 418 419 for (int i = 0; i < dim; i++) 420 { 421 if (c[i] != cast(T)(a[i] + b[i])) 422 { 423 printf("[%d]: %d != %d + %d\n", i, c[i], a[i], b[i]); 424 assert(0); 425 } 426 } 427 } 428 } 429 } 430 431 354 432 /* ======================================================================== */ 355 433 … … 378 456 { 379 457 // SSE2 aligned version is 83% faster 380 if (s td.cpuid.sse2() && a.length >= 8)458 if (sse2() && a.length >= 8) 381 459 { 382 460 auto n = aptr + (a.length & ~7); … … 435 513 else 436 514 // MMX version is 81% faster 437 if ( std.cpuid.mmx() && a.length >= 4)515 if (mmx() && a.length >= 4) 438 516 { 439 517 auto n = aptr + (a.length & ~3); … … 497 575 } 498 576 577 unittest 578 { 579 printf("_arrayExpSliceAddass_i unittest\n"); 580 581 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) 582 { 583 version (log) printf(" cpuid %d\n", cpuid); 584 585 for (int j = 0; j < 2; j++) 586 { 587 const int dim = 67; 588 T[] a = new T[dim + j]; // aligned on 16 byte boundary 589 a = a[j .. dim + j]; // misalign for second iteration 590 T[] b = new T[dim + j]; 591 b = b[j .. dim + j]; 592 T[] c = new T[dim + j]; 593 c = c[j .. dim + j]; 594 595 for (int i = 0; i < dim; i++) 596 { a[i] = cast(T)i; 597 b[i] = cast(T)(i + 7); 598 c[i] = cast(T)(i * 2); 599 } 600 601 a[] = c[]; 602 a[] += 6; 603 604 for (int i = 0; i < dim; i++) 605 { 606 if (a[i] != cast(T)(c[i] + 6)) 607 { 608 printf("[%d]: %d != %d + 6\n", i, a[i], c[i]); 609 assert(0); 610 } 611 } 612 } 613 } 614 } 615 616 499 617 /* ======================================================================== */ 500 618 … … 530 648 { 531 649 // SSE2 aligned version is 695% faster 532 if (s td.cpuid.sse2() && a.length >= 8)650 if (sse2() && a.length >= 8) 533 651 { 534 652 auto n = aptr + (a.length & ~7); … … 591 709 else 592 710 // MMX version is 471% faster 593 if ( std.cpuid.mmx() && a.length >= 4)711 if (mmx() && a.length >= 4) 594 712 { 595 713 auto n = aptr + (a.length & ~3); … … 630 748 } 631 749 750 unittest 751 { 752 printf("_arraySliceSliceAddass_i unittest\n"); 753 754 for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) 755 { 756 version (log) printf(" cpuid %d\n", cpuid); 757 758 for (int j = 0; j < 2; j++) 759 { 760 const int dim = 67; 761 T[] a = new T[dim + j]; // aligned on 16 byte boundary 762 a = a[j .. dim + j]; // misalign for second iteration 763 T[] b = new T[dim + j]; 764 b = b[j .. dim + j]; 765 T[] c = new T[dim + j]; 766 c = c[j .. dim + j]; 767 768 for (int i = 0; i < dim; i++) 769 { a[i] = cast(T)i; 770 b[i] = cast(T)(i + 7); 771 c[i] = cast(T)(i * 2); 772 } 773 774 b[] = c[]; 775 c[] += a[]; 776 777 for (int i = 0; i < dim; i++) 778 { 779 if (c[i] != cast(T)(b[i] + a[i])) 780 { 781 printf("[%d]: %d != %d + %d\n", i, c[i], b[i], a[i]); 782 assert(0); 783 } 784 } 785 } 786 } 787 } 788 789 632 790 /* ======================================================================== */ 633 791 … … 663 821 { 664 822 // SSE2 aligned version is 400% faster 665 if (s td.cpuid.sse2() && a.length >= 8)823 if (sse2() && a.length >= 8) 666 824 { 667 825 auto n = aptr + (a.length & ~7); … … 726 884 else 727 885 // MMX version is 315% faster 728 if ( std.cpuid.mmx() && a.length >= 4)886 if (mmx() && a.length >= 4) 729 887 { 730 888 auto n = aptr + (a.length & ~3); … … 794 952 } 795 953
