Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

Changeset 2894

Show
Ignore:
Timestamp:
11/19/07 21:18:55 (1 year ago)
Author:
keinfarbton
Message:

added comment for title case

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/tango/text/Unicode.d

    r2809 r2894  
    1212        only 99 % complete, because it does not take into account Conditional 
    1313        case mappings. This means the Greek Letter Sigma will not be correctly 
    14         case mapped at the end of a Word, and the Locales Lithuanian, Turkish  
     14        case mapped at the end of a Word, and the Locales Lithuanian, Turkish 
    1515        and Azeri are not taken into account during Case Mappings. This means 
    1616        all in all around 12 Characters will not be mapped correctly under 
    1717        some circumstances. 
    18          
     18 
    1919        ICU4j also does not handle these cases at the moment. 
    20          
     20 
    2121        Unittests are written against output from ICU4j 
    22          
     22 
    2323        This Module tries to minimize Memory allocation and usage. You can 
    2424        always pass the output buffer that should be used to the case mapping 
     
    3636/** 
    3737 * Converts an Utf8 String to Upper case 
    38  *  
     38 * 
    3939 * Params: 
    4040 *     input = String to be case mapped 
     
    7171                working[oprod..produced] = (*s).upperCaseMapping; 
    7272                continue; 
    73             }           
    74         }  
     73            } 
     74        } 
    7575        // Make sure no relocation is made in the toUtf8 Method 
    7676        if(produced + 1 >= output.length) 
     
    8585/** 
    8686 * Converts an Utf8 String to Upper case 
    87  *  
     87 * 
    8888 * Params: 
    8989 *     input = String to be case mapped 
     
    122122                produced += res.length; 
    123123                continue; 
    124             }           
    125         }  
     124            } 
     125        } 
    126126        // Make sure no relocation is made in the toUtf8 Method 
    127127        if(produced + 4 >= output.length) 
     
    141141/** 
    142142 * Converts an Utf16 String to Upper case 
    143  *  
     143 * 
    144144 * Params: 
    145145 *     input = String to be case mapped 
     
    195195/** 
    196196 * Converts an Utf32 String to Upper case 
    197  *  
     197 * 
    198198 * Params: 
    199199 *     input = String to be case mapped 
     
    237237/** 
    238238 * Converts an Utf8 String to Lower case 
    239  *  
     239 * 
    240240 * Params: 
    241241 *     input = String to be case mapped 
     
    274274                produced += res.length; 
    275275                continue; 
    276             }           
    277         }  
     276            } 
     277        } 
    278278        // Make sure no relocation is made in the toUtf8 Method 
    279279        if(produced + 4 >= output.length) 
     
    293293/** 
    294294 * Converts an Utf16 String to Lower case 
    295  *  
     295 * 
    296296 * Params: 
    297297 *     input = String to be case mapped 
     
    348348/** 
    349349 * Converts an Utf32 String to Lower case 
    350  *  
     350 * 
    351351 * Params: 
    352352 *     input = String to be case mapped 
     
    390390 * Converts an Utf8 String to Folding case 
    391391 * Folding case is used for case insensitive comparsions. 
    392  *  
     392 * 
    393393 * Params: 
    394394 *     input = String to be case mapped 
     
    439439 * Converts an Utf16 String to Folding case 
    440440 * Folding case is used for case insensitive comparsions. 
    441  *  
     441 * 
    442442 * Params: 
    443443 *     input = String to be case mapped 
     
    487487 * Converts an Utf32 String to Folding case 
    488488 * Folding case is used for case insensitive comparsions. 
    489  *  
     489 * 
    490490 * Params: 
    491491 *     input = String to be case mapped 
     
    523523 * Determines if a character is a digit. It returns true for decimal 
    524524 * digits only. 
    525  *  
     525 * 
    526526 * Params: 
    527527 *     ch = the character to be inspected 
     
    535535/** 
    536536 * Determines if a character is a letter. 
    537  *  
     537 * 
    538538 * Params: 
    539539 *     ch = the character to be inspected 
     
    552552 * Determines if a character is a letter or a 
    553553 * decimal digit. 
    554  *  
     554 * 
    555555 * Params: 
    556556 *     ch = the character to be inspected 
     
    558558bool isLetterOrDigit(int ch) { 
    559559    UnicodeData **d = (ch in unicodeData); 
    560     return (d !is null) && ((*d).generalCategory &  
     560    return (d !is null) && ((*d).generalCategory & 
    561561        ( UnicodeData.GeneralCategory.Lu 
    562562        | UnicodeData.GeneralCategory.Ll 
     
    579579/** 
    580580 * Determines if a character is a title case letter. 
     581 * In case of combined letters, only the first is upper and the second is lower. 
     582 * Some of these special characters can be found in the croatian and greek language. 
     583 * See_Also: http://en.wikipedia.org/wiki/Capitalization 
    581584 * Params: 
    582585 *     ch = the character to be inspected 
     
    599602/** 
    600603 * Determines if a character is a Whitespace character. 
    601  * Whitespace characters are characters in the  
     604 * Whitespace characters are characters in the 
    602605 * General Catetories Zs, Zl, Zp without the No Break 
    603606 * spaces plus the control characters out of the ASCII 
    604607 * range, that are used as spaces: 
    605608 * TAB VT LF FF CR FS GS RS US NL 
    606  *  
     609 * 
    607610 * WARNING: look at isSpace, maybe that function does 
    608611 *          more what you expect. 
    609  *  
     612 * 
    610613 * Params: 
    611614 *     ch = the character to be inspected 
     
    615618        return true; 
    616619    UnicodeData **d = (ch in unicodeData); 
    617     return (d !is null) && ((*d).generalCategory &  
     620    return (d !is null) && ((*d).generalCategory & 
    618621            ( UnicodeData.GeneralCategory.Zs 
    619622            | UnicodeData.GeneralCategory.Zl 
     
    621624            && ch != 0x00A0 // NBSP 
    622625            && ch != 0x202F // NARROW NBSP 
    623             && ch != 0xFEFF; // ZERO WIDTH NBSP  
     626            && ch != 0xFEFF; // ZERO WIDTH NBSP 
    624627} 
    625628 
     
    627630 * Detemines if a character is a Space character as 
    628631 * specified in the Unicode Standart. 
    629  *  
     632 * 
    630633 * WARNING: look at isWhitepace, maybe that function does 
    631634 *          more what you expect. 
    632  *  
     635 * 
    633636 * Params: 
    634637 *     ch = the character to be inspected 
     
    636639bool isSpace(dchar ch) { 
    637640    UnicodeData **d = (ch in unicodeData); 
    638     return (d !is null) && ((*d).generalCategory &  
     641    return (d !is null) && ((*d).generalCategory & 
    639642            ( UnicodeData.GeneralCategory.Zs 
    640643            | UnicodeData.GeneralCategory.Zl 
     
    646649 * Detemines if a character is a printable character as 
    647650 * specified in the Unicode Standart. 
    648  *  
    649  *  
     651 * 
     652 * 
    650653 * WARNING: look at isWhitepace, maybe that function does 
    651654 *          more what you expect. 
    652  *  
     655 * 
    653656 * Params: 
    654657 *     ch = the character to be inspected 
     
    656659bool isPrintable(dchar ch) { 
    657660    UnicodeData **d = (ch in unicodeData); 
    658     return (d !is null) && ((*d).generalCategory &  
     661    return (d !is null) && ((*d).generalCategory & 
    659662            ( UnicodeData.GeneralCategory.Cn 
    660663            | UnicodeData.GeneralCategory.Cc 
     
    667670    void main() {} 
    668671 
    669 debug (UnitTest) {        
     672debug (UnitTest) { 
    670673 
    671674unittest { 
    672      
    673      
     675 
     676 
    674677    // 1) No Buffer passed, no resize, no SpecialCase 
    675      
    676     char[] testString1utf8 = "\u00E4\u00F6\u00FC";  
     678 
     679    char[] testString1utf8 = "\u00E4\u00F6\u00FC"; 
    677680    wchar[] testString1utf16 = "\u00E4\u00F6\u00FC"; 
    678681    dchar[] testString1utf32 = "\u00E4\u00F6\u00FC"; 
     
    686689    dchar[] resultString1utf32 = toUpper(testString1utf32); 
    687690    assert(resultString1utf32 == refString1utf32); 
    688      
     691 
    689692    // 2) Buffer passed, no resize, no SpecialCase 
    690693    char[60] buffer1utf8; 
     
    715718    assert(resultString1utf32.ptr != buffer2utf32.ptr); 
    716719    assert(resultString1utf32 == refString1utf32); 
    717      
     720 
    718721    // 4) Buffer passed, resize necessary, extensive SpecialCase 
    719      
    720      
    721     char[] testString2utf8 = "\uFB03\uFB04\uFB05";  
     722 
     723 
     724    char[] testString2utf8 = "\uFB03\uFB04\uFB05"; 
    722725    wchar[] testString2utf16 = "\uFB03\uFB04\uFB05"; 
    723726    dchar[] testString2utf32 = "\uFB03\uFB04\uFB05"; 
     
    739742 
    740743unittest { 
    741      
    742      
     744 
     745 
    743746    // 1) No Buffer passed, no resize, no SpecialCase 
    744      
     747 
    745748    char[] testString1utf8 = "\u00C4\u00D6\u00DC"; 
    746749    wchar[] testString1utf16 = "\u00C4\u00D6\u00DC"; 
    747750    dchar[] testString1utf32 = "\u00C4\u00D6\u00DC"; 
    748     char[] refString1utf8 = "\u00E4\u00F6\u00FC";   
     751    char[] refString1utf8 = "\u00E4\u00F6\u00FC"; 
    749752    wchar[] refString1utf16 = "\u00E4\u00F6\u00FC"; 
    750753    dchar[] refString1utf32 = "\u00E4\u00F6\u00FC"; 
     
    755758    dchar[] resultString1utf32 = toLower(testString1utf32); 
    756759    assert(resultString1utf32 == refString1utf32); 
    757      
     760 
    758761    // 2) Buffer passed, no resize, no SpecialCase 
    759762    char[60] buffer1utf8; 
     
    784787    assert(resultString1utf32.ptr != buffer2utf32.ptr); 
    785788    assert(resultString1utf32 == refString1utf32); 
    786      
     789 
    787790    // 4) Buffer passed, resize necessary, extensive SpecialCase 
    788      
    789     char[] testString2utf8 = "\u0130\u0130\u0130";  
     791 
     792    char[] testString2utf8 = "\u0130\u0130\u0130"; 
    790793    wchar[] testString2utf16 = "\u0130\u0130\u0130"; 
    791794    dchar[] testString2utf32 = "\u0130\u0130\u0130";