| 51 | | dchar getFirstCodepoint( char[] str ){ |
|---|
| 52 | | foreach( dchar d; str ){ |
|---|
| 53 | | return d; |
|---|
| 54 | | } |
|---|
| 55 | | } |
|---|
| | 54 | int codepointIndexToIndex( char[] str, int cpIndex ){ |
|---|
| | 55 | int cps = cpIndex; |
|---|
| | 56 | int res = 0; |
|---|
| | 57 | while( cps > 0 ){ |
|---|
| | 58 | cps--; |
|---|
| | 59 | if( str[res] < 0x80 ){ |
|---|
| | 60 | res+=1; |
|---|
| | 61 | } |
|---|
| | 62 | else if( str[res] < 0xE0 ){ |
|---|
| | 63 | res+=2; |
|---|
| | 64 | } |
|---|
| | 65 | else if( str[res] & 0xF0 ){ |
|---|
| | 66 | res+=3; |
|---|
| | 67 | } |
|---|
| | 68 | else{ |
|---|
| | 69 | res+=4; |
|---|
| | 70 | } |
|---|
| | 71 | } |
|---|
| | 72 | return res; |
|---|
| | 73 | } |
|---|
| | 74 | int indexToCodepointIndex( char[] str, int index ){ |
|---|
| | 75 | int i = 0; |
|---|
| | 76 | int res = 0; |
|---|
| | 77 | while( i < index ){ |
|---|
| | 78 | if( str[i] < 0x80 ){ |
|---|
| | 79 | i+=1; |
|---|
| | 80 | } |
|---|
| | 81 | else if( str[i] < 0xE0 ){ |
|---|
| | 82 | i+=2; |
|---|
| | 83 | } |
|---|
| | 84 | else if( str[i] & 0xF0 ){ |
|---|
| | 85 | i+=3; |
|---|
| | 86 | } |
|---|
| | 87 | else{ |
|---|
| | 88 | i+=4; |
|---|
| | 89 | } |
|---|
| | 90 | res++; |
|---|
| | 91 | } |
|---|
| | 92 | return res; |
|---|
| | 93 | } |
|---|
| | 94 | |
|---|
| | 95 | char[] firstCodePointStr( char[] str, out int consumed ){ |
|---|
| | 96 | dchar[1] buf; |
|---|
| | 97 | uint ate; |
|---|
| | 98 | dchar[] res = str.toString32( buf, &ate ); |
|---|
| | 99 | consumed = ate; |
|---|
| | 100 | return str[ 0 .. ate ]; |
|---|
| | 101 | } |
|---|
| | 102 | |
|---|
| | 103 | dchar firstCodePoint( char[] str ){ |
|---|
| | 104 | int dummy; |
|---|
| | 105 | return firstCodePoint( str, dummy ); |
|---|
| | 106 | } |
|---|
| | 107 | dchar firstCodePoint( char[] str, out int consumed ){ |
|---|
| | 108 | dchar[1] buf; |
|---|
| | 109 | uint ate; |
|---|
| | 110 | dchar[] res = str.toString32( buf, &ate ); |
|---|
| | 111 | consumed = ate; |
|---|
| | 112 | if( ate is 0 || res.length is 0 ){ |
|---|
| | 113 | Trace.formatln( "dwthelper.utils {}: str.length={} str={:X2}", __LINE__, str.length, cast(ubyte[])str ); |
|---|
| | 114 | } |
|---|
| | 115 | assert( ate > 0 ); |
|---|
| | 116 | assert( res.length is 1 ); |
|---|
| | 117 | return res[0]; |
|---|
| | 118 | } |
|---|
| | 119 | |
|---|
| | 120 | char[] dcharToString( dchar key ){ |
|---|
| | 121 | dchar[1] buf; |
|---|
| | 122 | buf[0] = key; |
|---|
| | 123 | return tango.text.convert.Utf.toString( buf ); |
|---|
| | 124 | } |
|---|
| | 125 | |
|---|
| | 126 | int codepointCount( char[] str ){ |
|---|
| | 127 | scope dchar[] buf = new dchar[]( str.length ); |
|---|
| | 128 | uint ate; |
|---|
| | 129 | dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); |
|---|
| | 130 | assert( ate is str.length ); |
|---|
| | 131 | return res.length; |
|---|
| | 132 | } |
|---|
| | 133 | |
|---|
| | 134 | alias tango.text.convert.Utf.toString16 toString16; |
|---|
| | 135 | alias tango.text.convert.Utf.toString toString; |
|---|
| | 136 | |
|---|
| | 137 | int getRelativeCodePointOffset( char[] str, int startIndex, int searchRelCp ){ |
|---|
| | 138 | int ignore; |
|---|
| | 139 | int i = startIndex; |
|---|
| | 140 | if( searchRelCp > 0 ){ |
|---|
| | 141 | while( searchRelCp !is 0 ){ |
|---|
| | 142 | |
|---|
| | 143 | if( ( i < str.length ) |
|---|
| | 144 | && ( str[i] & 0x80 ) is 0x00 ) |
|---|
| | 145 | { |
|---|
| | 146 | i+=1; |
|---|
| | 147 | } |
|---|
| | 148 | else if( ( i+1 < str.length ) |
|---|
| | 149 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 150 | && (( str[i ] & 0xE0 ) is 0xC0 )) |
|---|
| | 151 | { |
|---|
| | 152 | i+=2; |
|---|
| | 153 | } |
|---|
| | 154 | else if( ( i+2 < str.length ) |
|---|
| | 155 | && (( str[i+2] & 0xC0 ) is 0x80 ) |
|---|
| | 156 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 157 | && (( str[i ] & 0xF0 ) is 0xE0 )) |
|---|
| | 158 | { |
|---|
| | 159 | i+=3; |
|---|
| | 160 | } |
|---|
| | 161 | else if(( i+3 < str.length ) |
|---|
| | 162 | && (( str[i+3] & 0xC0 ) is 0x80 ) |
|---|
| | 163 | && (( str[i+2] & 0xC0 ) is 0x80 ) |
|---|
| | 164 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 165 | && (( str[i ] & 0xF8 ) is 0xF0 )) |
|---|
| | 166 | { |
|---|
| | 167 | i+=4; |
|---|
| | 168 | } |
|---|
| | 169 | else{ |
|---|
| | 170 | tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i ); |
|---|
| | 171 | } |
|---|
| | 172 | searchRelCp--; |
|---|
| | 173 | } |
|---|
| | 174 | } |
|---|
| | 175 | else if( searchRelCp < 0 ){ |
|---|
| | 176 | while( searchRelCp !is 0 ){ |
|---|
| | 177 | do{ |
|---|
| | 178 | i--; |
|---|
| | 179 | if( i < 0 ){ |
|---|
| | 180 | Trace.formatln( "dwthelper.utils getRelativeCodePointOffset {}: str={}, startIndex={}, searchRelCp={}", __LINE__, str, startIndex, searchRelCp ); |
|---|
| | 181 | tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i ); |
|---|
| | 182 | } |
|---|
| | 183 | } while(( str[i] & 0xC0 ) is 0x80 ); |
|---|
| | 184 | searchRelCp++; |
|---|
| | 185 | } |
|---|
| | 186 | } |
|---|
| | 187 | return i - startIndex; |
|---|
| | 188 | } |
|---|
| | 189 | dchar getRelativeCodePoint( char[] str, int startIndex, int searchRelCp, out int relIndex ){ |
|---|
| | 190 | relIndex = getRelativeCodePointOffset( str, startIndex, searchRelCp ); |
|---|
| | 191 | int ignore; |
|---|
| | 192 | return firstCodePoint( str[ startIndex+relIndex .. $ ], ignore ); |
|---|
| | 193 | } |
|---|
| | 194 | |
|---|
| | 195 | int utf8AdjustOffset( char[] str, int offset ){ |
|---|
| | 196 | if( str.length <= offset || offset <= 0 ){ |
|---|
| | 197 | return offset; |
|---|
| | 198 | } |
|---|
| | 199 | while(( str[offset] & 0xC0 ) is 0x80 ){ |
|---|
| | 200 | offset--; |
|---|
| | 201 | } |
|---|
| | 202 | return offset; |
|---|
| | 203 | } |
|---|
| | 204 | |
|---|
| | 205 | dchar CharacterFirstToLower( char[] str ){ |
|---|
| | 206 | int consumed; |
|---|
| | 207 | return CharacterFirstToLower( str, consumed ); |
|---|
| | 208 | } |
|---|
| | 209 | dchar CharacterFirstToLower( char[] str, out int consumed ){ |
|---|
| | 210 | dchar[1] buf; |
|---|
| | 211 | buf[0] = firstCodePoint( str, consumed ); |
|---|
| | 212 | dchar[] r = tango.text.Unicode.toLower( buf ); |
|---|
| | 213 | return r[0]; |
|---|
| | 214 | } |
|---|
| | 215 | |
|---|