Note: This website is archived. For up-to-date information about D projects and development, please visit wiki.dlang.org.

Changeset 1552

Show
Ignore:
Timestamp:
05/24/10 04:27:25 (15 years ago)
Author:
rsinfu
Message:

Fixed bugzilla 3465: isIdeographic can be wrong in std.xml.

- Applied Michael Rynn's contribution to isChar(), isDigit() and isIdeographic(). Now these functions are hard-coded for performance. Profiling showed that this code was three times faster than the old one.
- IdeographicTable? must be sorted for lookup().

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/docsrc/changelog.dd

    r1551 r1552  
    88    $(WHATSNEW 
    99    $(LI std.functional: toDelegate now accepts callable(function pointers, delegates and objects implement opCall) ) 
    1010    $(LI std.traits: Added templates to get compile-time information about functions.) 
    1111    $(LI std.typecons: Added tie and AutoImplement.) 
    1212    ) 
    1313    $(BUGSFIXED 
    1414    $(LI $(BUGZILLA 2738): Rebindable should work for interfaces.) 
    1515    $(LI $(BUGZILLA 2835): std.socket.TcpSocket doesn't actually connect) 
    1616    $(LI $(BUGZILLA 3088): std.xml.check() fails on xml comments) 
    1717    $(LI $(BUGZILLA 3200): std.xml doesn't follow spec for Tag.text) 
     18    $(LI $(BUGZILLA 3465): isIdeographic can be wrong in std.xml) 
    1819    $(LI $(BUGZILLA 3873): std.range.repeat should have popBack defined) 
    1920    $(LI $(BUGZILLA 3880): std.regex functions with const/immutable Regex object) 
    2021    $(LI $(BUGZILLA 4109): writeln doesn't work with empty static array) 
    2122    $(LI $(BUGZILLA 4202): Changset 1517 doesn't compile) 
    2223    $(LI $(BUGZILLA 4228): std.array.replace contains 2 bugs) 
    2324    $(LI $(BUGZILLA 4219): hasAliasing does not care about immutable) 
    2425    ) 
    2526) 
    2627 
    2728<div id=version> 
  • trunk/phobos/std/xml.d

    r1549 r1552  
    130130/** 
    131131 * Returns true if the character is a character according to the XML standard 
    132132 * 
    133133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    134134 * 
    135135 * Params: 
    136136 *    c = the character to be tested 
    137137 */ 
    138138bool isChar(dchar c) // rule 2 
    139139{ 
    140     return lookup(CharTable,c); 
     140    if (c <= 0xD7FF) 
     141    { 
     142        if (c >= 0x20) 
     143            return true; 
     144        switch(c) 
     145        { 
     146        case 0xA: 
     147        case 0x9: 
     148        case 0xD: 
     149            return true; 
     150        default: 
     151            return false; 
     152        } 
     153    } 
     154    else if (0xE000 <= c && c <= 0x10FFFF) 
     155    { 
     156        if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 
     157            return true; 
     158    } 
     159    return false; 
    141160} 
    142161 
    143162unittest 
    144163{ 
    145164//  const CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 
    146165//        0x10000,0x10FFFF]; 
    147166    assert(!isChar(cast(dchar)0x8)); 
    148167    assert( isChar(cast(dchar)0x9)); 
    149168    assert( isChar(cast(dchar)0xA)); 
    150169    assert(!isChar(cast(dchar)0xB)); 
     
    157176    assert( isChar(cast(dchar)0xD7FF)); 
    158177    assert(!isChar(cast(dchar)0xD800)); 
    159178    assert(!isChar(cast(dchar)0xDFFF)); 
    160179    assert( isChar(cast(dchar)0xE000)); 
    161180    assert( isChar(cast(dchar)0xFFFD)); 
    162181    assert(!isChar(cast(dchar)0xFFFE)); 
    163182    assert(!isChar(cast(dchar)0xFFFF)); 
    164183    assert( isChar(cast(dchar)0x10000)); 
    165184    assert( isChar(cast(dchar)0x10FFFF)); 
    166185    assert(!isChar(cast(dchar)0x110000)); 
     186 
     187    debug (stdxml_TestHardcodedChecks) 
     188    { 
     189        foreach (c; 0 .. dchar.max + 1) 
     190            assert(isChar(c) == lookup(CharTable, c)); 
     191    } 
    167192} 
    168193 
    169194/** 
    170195 * Returns true if the character is whitespace according to the XML standard 
    171196 * 
    172197 * Only the following characters are considered whitespace in XML - space, tab, 
    173198 * carriage return and linefeed 
    174199 * 
    175200 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    176201 * 
     
    185210/** 
    186211 * Returns true if the character is a digit according to the XML standard 
    187212 * 
    188213 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    189214 * 
    190215 * Params: 
    191216 *    c = the character to be tested 
    192217 */ 
    193218bool isDigit(dchar c) 
    194219{ 
    195     return lookup(DigitTable,c); 
     220    if (c <= 0x0039 && c >= 0x0030) 
     221        return true; 
     222    else 
     223        return lookup(DigitTable,c); 
     224
     225 
     226unittest 
     227
     228    debug (stdxml_TestHardcodedChecks) 
     229    { 
     230        foreach (c; 0 .. dchar.max + 1) 
     231            assert(isDigit(c) == lookup(DigitTable, c)); 
     232    } 
    196233} 
    197234 
    198235/** 
    199236 * Returns true if the character is a letter according to the XML standard 
    200237 * 
    201238 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    202239 * 
    203240 * Params: 
    204241 *    c = the character to be tested 
    205242 */ 
     
    212249 * Returns true if the character is an ideographic character according to the 
    213250 * XML standard 
    214251 * 
    215252 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    216253 * 
    217254 * Params: 
    218255 *    c = the character to be tested 
    219256 */ 
    220257bool isIdeographic(dchar c) 
    221258{ 
    222     return lookup(IdeographicTable,c); 
     259    if (c == 0x3007) 
     260        return true; 
     261    if (c <= 0x3029 && c >= 0x3021 ) 
     262        return true; 
     263    if (c <= 0x9FA5 && c >= 0x4E00) 
     264        return true; 
     265    return false; 
     266
     267 
     268unittest 
     269
     270    assert(isIdeographic('\u4E00')); 
     271    assert(isIdeographic('\u9FA5')); 
     272    assert(isIdeographic('\u3007')); 
     273    assert(isIdeographic('\u3021')); 
     274    assert(isIdeographic('\u3029')); 
     275 
     276    debug (stdxml_TestHardcodedChecks) 
     277    { 
     278        foreach (c; 0 .. dchar.max + 1) 
     279            assert(isIdeographic(c) == lookup(IdeographicTable, c)); 
     280    } 
    223281} 
    224282 
    225283/** 
    226284 * Returns true if the character is a base character according to the XML 
    227285 * standard 
    228286 * 
    229287 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 
    230288 * 
    231289 * Params: 
    232290 *    c = the character to be tested 
     
    28222880        0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 
    28232881        0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 
    28242882        0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 
    28252883        0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 
    28262884        0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 
    28272885        0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 
    28282886        0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 
    28292887        0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 
    28302888        0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 
    28312889        0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 
    2832     immutable IdeographicTable=[0x4E00,0x9FA5,0x3007,0x3007,0x3021,0x3029]; 
     2890    immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 
    28332891    immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 
    28342892        0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 
    28352893        0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 
    28362894        0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 
    28372895        0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 
    28382896        0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 
    28392897        0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 
    28402898        0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 
    28412899        0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 
    28422900        0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,