 |
Changeset 2894
- Timestamp:
- 11/19/07 21:18:55
(1 year ago)
- Author:
- keinfarbton
- Message:
added comment for title case
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r2809 |
r2894 |
|
| 12 | 12 | only 99 % complete, because it does not take into account Conditional |
|---|
| 13 | 13 | case mappings. This means the Greek Letter Sigma will not be correctly |
|---|
| 14 | | case mapped at the end of a Word, and the Locales Lithuanian, Turkish |
|---|
| | 14 | case mapped at the end of a Word, and the Locales Lithuanian, Turkish |
|---|
| 15 | 15 | and Azeri are not taken into account during Case Mappings. This means |
|---|
| 16 | 16 | all in all around 12 Characters will not be mapped correctly under |
|---|
| 17 | 17 | some circumstances. |
|---|
| 18 | | |
|---|
| | 18 | |
|---|
| 19 | 19 | ICU4j also does not handle these cases at the moment. |
|---|
| 20 | | |
|---|
| | 20 | |
|---|
| 21 | 21 | Unittests are written against output from ICU4j |
|---|
| 22 | | |
|---|
| | 22 | |
|---|
| 23 | 23 | This Module tries to minimize Memory allocation and usage. You can |
|---|
| 24 | 24 | always pass the output buffer that should be used to the case mapping |
|---|
| … | … | |
| 36 | 36 | /** |
|---|
| 37 | 37 | * Converts an Utf8 String to Upper case |
|---|
| 38 | | * |
|---|
| | 38 | * |
|---|
| 39 | 39 | * Params: |
|---|
| 40 | 40 | * input = String to be case mapped |
|---|
| … | … | |
| 71 | 71 | working[oprod..produced] = (*s).upperCaseMapping; |
|---|
| 72 | 72 | continue; |
|---|
| 73 | | } |
|---|
| 74 | | } |
|---|
| | 73 | } |
|---|
| | 74 | } |
|---|
| 75 | 75 | // Make sure no relocation is made in the toUtf8 Method |
|---|
| 76 | 76 | if(produced + 1 >= output.length) |
|---|
| … | … | |
| 85 | 85 | /** |
|---|
| 86 | 86 | * Converts an Utf8 String to Upper case |
|---|
| 87 | | * |
|---|
| | 87 | * |
|---|
| 88 | 88 | * Params: |
|---|
| 89 | 89 | * input = String to be case mapped |
|---|
| … | … | |
| 122 | 122 | produced += res.length; |
|---|
| 123 | 123 | continue; |
|---|
| 124 | | } |
|---|
| 125 | | } |
|---|
| | 124 | } |
|---|
| | 125 | } |
|---|
| 126 | 126 | // Make sure no relocation is made in the toUtf8 Method |
|---|
| 127 | 127 | if(produced + 4 >= output.length) |
|---|
| … | … | |
| 141 | 141 | /** |
|---|
| 142 | 142 | * Converts an Utf16 String to Upper case |
|---|
| 143 | | * |
|---|
| | 143 | * |
|---|
| 144 | 144 | * Params: |
|---|
| 145 | 145 | * input = String to be case mapped |
|---|
| … | … | |
| 195 | 195 | /** |
|---|
| 196 | 196 | * Converts an Utf32 String to Upper case |
|---|
| 197 | | * |
|---|
| | 197 | * |
|---|
| 198 | 198 | * Params: |
|---|
| 199 | 199 | * input = String to be case mapped |
|---|
| … | … | |
| 237 | 237 | /** |
|---|
| 238 | 238 | * Converts an Utf8 String to Lower case |
|---|
| 239 | | * |
|---|
| | 239 | * |
|---|
| 240 | 240 | * Params: |
|---|
| 241 | 241 | * input = String to be case mapped |
|---|
| … | … | |
| 274 | 274 | produced += res.length; |
|---|
| 275 | 275 | continue; |
|---|
| 276 | | } |
|---|
| 277 | | } |
|---|
| | 276 | } |
|---|
| | 277 | } |
|---|
| 278 | 278 | // Make sure no relocation is made in the toUtf8 Method |
|---|
| 279 | 279 | if(produced + 4 >= output.length) |
|---|
| … | … | |
| 293 | 293 | /** |
|---|
| 294 | 294 | * Converts an Utf16 String to Lower case |
|---|
| 295 | | * |
|---|
| | 295 | * |
|---|
| 296 | 296 | * Params: |
|---|
| 297 | 297 | * input = String to be case mapped |
|---|
| … | … | |
| 348 | 348 | /** |
|---|
| 349 | 349 | * Converts an Utf32 String to Lower case |
|---|
| 350 | | * |
|---|
| | 350 | * |
|---|
| 351 | 351 | * Params: |
|---|
| 352 | 352 | * input = String to be case mapped |
|---|
| … | … | |
| 390 | 390 | * Converts an Utf8 String to Folding case |
|---|
| 391 | 391 | * Folding case is used for case insensitive comparsions. |
|---|
| 392 | | * |
|---|
| | 392 | * |
|---|
| 393 | 393 | * Params: |
|---|
| 394 | 394 | * input = String to be case mapped |
|---|
| … | … | |
| 439 | 439 | * Converts an Utf16 String to Folding case |
|---|
| 440 | 440 | * Folding case is used for case insensitive comparsions. |
|---|
| 441 | | * |
|---|
| | 441 | * |
|---|
| 442 | 442 | * Params: |
|---|
| 443 | 443 | * input = String to be case mapped |
|---|
| … | … | |
| 487 | 487 | * Converts an Utf32 String to Folding case |
|---|
| 488 | 488 | * Folding case is used for case insensitive comparsions. |
|---|
| 489 | | * |
|---|
| | 489 | * |
|---|
| 490 | 490 | * Params: |
|---|
| 491 | 491 | * input = String to be case mapped |
|---|
| … | … | |
| 523 | 523 | * Determines if a character is a digit. It returns true for decimal |
|---|
| 524 | 524 | * digits only. |
|---|
| 525 | | * |
|---|
| | 525 | * |
|---|
| 526 | 526 | * Params: |
|---|
| 527 | 527 | * ch = the character to be inspected |
|---|
| … | … | |
| 535 | 535 | /** |
|---|
| 536 | 536 | * Determines if a character is a letter. |
|---|
| 537 | | * |
|---|
| | 537 | * |
|---|
| 538 | 538 | * Params: |
|---|
| 539 | 539 | * ch = the character to be inspected |
|---|
| … | … | |
| 552 | 552 | * Determines if a character is a letter or a |
|---|
| 553 | 553 | * decimal digit. |
|---|
| 554 | | * |
|---|
| | 554 | * |
|---|
| 555 | 555 | * Params: |
|---|
| 556 | 556 | * ch = the character to be inspected |
|---|
| … | … | |
| 558 | 558 | bool isLetterOrDigit(int ch) { |
|---|
| 559 | 559 | UnicodeData **d = (ch in unicodeData); |
|---|
| 560 | | return (d !is null) && ((*d).generalCategory & |
|---|
| | 560 | return (d !is null) && ((*d).generalCategory & |
|---|
| 561 | 561 | ( UnicodeData.GeneralCategory.Lu |
|---|
| 562 | 562 | | UnicodeData.GeneralCategory.Ll |
|---|
| … | … | |
| 579 | 579 | /** |
|---|
| 580 | 580 | * Determines if a character is a title case letter. |
|---|
| | 581 | * In case of combined letters, only the first is upper and the second is lower. |
|---|
| | 582 | * Some of these special characters can be found in the croatian and greek language. |
|---|
| | 583 | * See_Also: http://en.wikipedia.org/wiki/Capitalization |
|---|
| 581 | 584 | * Params: |
|---|
| 582 | 585 | * ch = the character to be inspected |
|---|
| … | … | |
| 599 | 602 | /** |
|---|
| 600 | 603 | * Determines if a character is a Whitespace character. |
|---|
| 601 | | * Whitespace characters are characters in the |
|---|
| | 604 | * Whitespace characters are characters in the |
|---|
| 602 | 605 | * General Catetories Zs, Zl, Zp without the No Break |
|---|
| 603 | 606 | * spaces plus the control characters out of the ASCII |
|---|
| 604 | 607 | * range, that are used as spaces: |
|---|
| 605 | 608 | * TAB VT LF FF CR FS GS RS US NL |
|---|
| 606 | | * |
|---|
| | 609 | * |
|---|
| 607 | 610 | * WARNING: look at isSpace, maybe that function does |
|---|
| 608 | 611 | * more what you expect. |
|---|
| 609 | | * |
|---|
| | 612 | * |
|---|
| 610 | 613 | * Params: |
|---|
| 611 | 614 | * ch = the character to be inspected |
|---|
| … | … | |
| 615 | 618 | return true; |
|---|
| 616 | 619 | UnicodeData **d = (ch in unicodeData); |
|---|
| 617 | | return (d !is null) && ((*d).generalCategory & |
|---|
| | 620 | return (d !is null) && ((*d).generalCategory & |
|---|
| 618 | 621 | ( UnicodeData.GeneralCategory.Zs |
|---|
| 619 | 622 | | UnicodeData.GeneralCategory.Zl |
|---|
| … | … | |
| 621 | 624 | && ch != 0x00A0 // NBSP |
|---|
| 622 | 625 | && ch != 0x202F // NARROW NBSP |
|---|
| 623 | | && ch != 0xFEFF; // ZERO WIDTH NBSP |
|---|
| | 626 | && ch != 0xFEFF; // ZERO WIDTH NBSP |
|---|
| 624 | 627 | } |
|---|
| 625 | 628 | |
|---|
| … | … | |
| 627 | 630 | * Detemines if a character is a Space character as |
|---|
| 628 | 631 | * specified in the Unicode Standart. |
|---|
| 629 | | * |
|---|
| | 632 | * |
|---|
| 630 | 633 | * WARNING: look at isWhitepace, maybe that function does |
|---|
| 631 | 634 | * more what you expect. |
|---|
| 632 | | * |
|---|
| | 635 | * |
|---|
| 633 | 636 | * Params: |
|---|
| 634 | 637 | * ch = the character to be inspected |
|---|
| … | … | |
| 636 | 639 | bool isSpace(dchar ch) { |
|---|
| 637 | 640 | UnicodeData **d = (ch in unicodeData); |
|---|
| 638 | | return (d !is null) && ((*d).generalCategory & |
|---|
| | 641 | return (d !is null) && ((*d).generalCategory & |
|---|
| 639 | 642 | ( UnicodeData.GeneralCategory.Zs |
|---|
| 640 | 643 | | UnicodeData.GeneralCategory.Zl |
|---|
| … | … | |
| 646 | 649 | * Detemines if a character is a printable character as |
|---|
| 647 | 650 | * specified in the Unicode Standart. |
|---|
| 648 | | * |
|---|
| 649 | | * |
|---|
| | 651 | * |
|---|
| | 652 | * |
|---|
| 650 | 653 | * WARNING: look at isWhitepace, maybe that function does |
|---|
| 651 | 654 | * more what you expect. |
|---|
| 652 | | * |
|---|
| | 655 | * |
|---|
| 653 | 656 | * Params: |
|---|
| 654 | 657 | * ch = the character to be inspected |
|---|
| … | … | |
| 656 | 659 | bool isPrintable(dchar ch) { |
|---|
| 657 | 660 | UnicodeData **d = (ch in unicodeData); |
|---|
| 658 | | return (d !is null) && ((*d).generalCategory & |
|---|
| | 661 | return (d !is null) && ((*d).generalCategory & |
|---|
| 659 | 662 | ( UnicodeData.GeneralCategory.Cn |
|---|
| 660 | 663 | | UnicodeData.GeneralCategory.Cc |
|---|
| … | … | |
| 667 | 670 | void main() {} |
|---|
| 668 | 671 | |
|---|
| 669 | | debug (UnitTest) { |
|---|
| | 672 | debug (UnitTest) { |
|---|
| 670 | 673 | |
|---|
| 671 | 674 | unittest { |
|---|
| 672 | | |
|---|
| 673 | | |
|---|
| | 675 | |
|---|
| | 676 | |
|---|
| 674 | 677 | // 1) No Buffer passed, no resize, no SpecialCase |
|---|
| 675 | | |
|---|
| 676 | | char[] testString1utf8 = "\u00E4\u00F6\u00FC"; |
|---|
| | 678 | |
|---|
| | 679 | char[] testString1utf8 = "\u00E4\u00F6\u00FC"; |
|---|
| 677 | 680 | wchar[] testString1utf16 = "\u00E4\u00F6\u00FC"; |
|---|
| 678 | 681 | dchar[] testString1utf32 = "\u00E4\u00F6\u00FC"; |
|---|
| … | … | |
| 686 | 689 | dchar[] resultString1utf32 = toUpper(testString1utf32); |
|---|
| 687 | 690 | assert(resultString1utf32 == refString1utf32); |
|---|
| 688 | | |
|---|
| | 691 | |
|---|
| 689 | 692 | // 2) Buffer passed, no resize, no SpecialCase |
|---|
| 690 | 693 | char[60] buffer1utf8; |
|---|
| … | … | |
| 715 | 718 | assert(resultString1utf32.ptr != buffer2utf32.ptr); |
|---|
| 716 | 719 | assert(resultString1utf32 == refString1utf32); |
|---|
| 717 | | |
|---|
| | 720 | |
|---|
| 718 | 721 | // 4) Buffer passed, resize necessary, extensive SpecialCase |
|---|
| 719 | | |
|---|
| 720 | | |
|---|
| 721 | | char[] testString2utf8 = "\uFB03\uFB04\uFB05"; |
|---|
| | 722 | |
|---|
| | 723 | |
|---|
| | 724 | char[] testString2utf8 = "\uFB03\uFB04\uFB05"; |
|---|
| 722 | 725 | wchar[] testString2utf16 = "\uFB03\uFB04\uFB05"; |
|---|
| 723 | 726 | dchar[] testString2utf32 = "\uFB03\uFB04\uFB05"; |
|---|
| … | … | |
| 739 | 742 | |
|---|
| 740 | 743 | unittest { |
|---|
| 741 | | |
|---|
| 742 | | |
|---|
| | 744 | |
|---|
| | 745 | |
|---|
| 743 | 746 | // 1) No Buffer passed, no resize, no SpecialCase |
|---|
| 744 | | |
|---|
| | 747 | |
|---|
| 745 | 748 | char[] testString1utf8 = "\u00C4\u00D6\u00DC"; |
|---|
| 746 | 749 | wchar[] testString1utf16 = "\u00C4\u00D6\u00DC"; |
|---|
| 747 | 750 | dchar[] testString1utf32 = "\u00C4\u00D6\u00DC"; |
|---|
| 748 | | char[] refString1utf8 = "\u00E4\u00F6\u00FC"; |
|---|
| | 751 | char[] refString1utf8 = "\u00E4\u00F6\u00FC"; |
|---|
| 749 | 752 | wchar[] refString1utf16 = "\u00E4\u00F6\u00FC"; |
|---|
| 750 | 753 | dchar[] refString1utf32 = "\u00E4\u00F6\u00FC"; |
|---|
| … | … | |
| 755 | 758 | dchar[] resultString1utf32 = toLower(testString1utf32); |
|---|
| 756 | 759 | assert(resultString1utf32 == refString1utf32); |
|---|
| 757 | | |
|---|
| | 760 | |
|---|
| 758 | 761 | // 2) Buffer passed, no resize, no SpecialCase |
|---|
| 759 | 762 | char[60] buffer1utf8; |
|---|
| … | … | |
| 784 | 787 | assert(resultString1utf32.ptr != buffer2utf32.ptr); |
|---|
| 785 | 788 | assert(resultString1utf32 == refString1utf32); |
|---|
| 786 | | |
|---|
| | 789 | |
|---|
| 787 | 790 | // 4) Buffer passed, resize necessary, extensive SpecialCase |
|---|
| 788 | | |
|---|
| 789 | | char[] testString2utf8 = "\u0130\u0130\u0130"; |
|---|
| | 791 | |
|---|
| | 792 | char[] testString2utf8 = "\u0130\u0130\u0130"; |
|---|
| 790 | 793 | wchar[] testString2utf16 = "\u0130\u0130\u0130"; |
|---|
| 791 | 794 | dchar[] testString2utf32 = "\u0130\u0130\u0130"; |
|---|
Download in other formats:
|
 |
 |
|
 |
Copyright © 2006-2008 Tango. All Rights Reserved. | Page Width:
Static or
Dynamic