| 49 | | dchar getFirstCodepoint( char[] str ){ |
|---|
| 50 | | foreach( dchar d; str ){ |
|---|
| 51 | | return d; |
|---|
| 52 | | } |
|---|
| 53 | | } |
|---|
| | 51 | int codepointIndexToIndex( char[] str, int cpIndex ){ |
|---|
| | 52 | int cps = cpIndex; |
|---|
| | 53 | int res = 0; |
|---|
| | 54 | while( cps > 0 ){ |
|---|
| | 55 | cps--; |
|---|
| | 56 | if( str[res] < 0x80 ){ |
|---|
| | 57 | res+=1; |
|---|
| | 58 | } |
|---|
| | 59 | else if( str[res] < 0xE0 ){ |
|---|
| | 60 | res+=2; |
|---|
| | 61 | } |
|---|
| | 62 | else if( str[res] & 0xF0 ){ |
|---|
| | 63 | res+=3; |
|---|
| | 64 | } |
|---|
| | 65 | else{ |
|---|
| | 66 | res+=4; |
|---|
| | 67 | } |
|---|
| | 68 | } |
|---|
| | 69 | return res; |
|---|
| | 70 | } |
|---|
| | 71 | int indexToCodepointIndex( char[] str, int index ){ |
|---|
| | 72 | int i = 0; |
|---|
| | 73 | int res = 0; |
|---|
| | 74 | while( i < index ){ |
|---|
| | 75 | if( str[i] < 0x80 ){ |
|---|
| | 76 | i+=1; |
|---|
| | 77 | } |
|---|
| | 78 | else if( str[i] < 0xE0 ){ |
|---|
| | 79 | i+=2; |
|---|
| | 80 | } |
|---|
| | 81 | else if( str[i] & 0xF0 ){ |
|---|
| | 82 | i+=3; |
|---|
| | 83 | } |
|---|
| | 84 | else{ |
|---|
| | 85 | i+=4; |
|---|
| | 86 | } |
|---|
| | 87 | res++; |
|---|
| | 88 | } |
|---|
| | 89 | return res; |
|---|
| | 90 | } |
|---|
| | 91 | |
|---|
| | 92 | char[] firstCodePointStr( char[] str, out int consumed ){ |
|---|
| | 93 | dchar[1] buf; |
|---|
| | 94 | uint ate; |
|---|
| | 95 | dchar[] res = str.toString32( buf, &ate ); |
|---|
| | 96 | consumed = ate; |
|---|
| | 97 | return str[ 0 .. ate ]; |
|---|
| | 98 | } |
|---|
| | 99 | |
|---|
| | 100 | dchar firstCodePoint( char[] str ){ |
|---|
| | 101 | int dummy; |
|---|
| | 102 | return firstCodePoint( str, dummy ); |
|---|
| | 103 | } |
|---|
| | 104 | dchar firstCodePoint( char[] str, out int consumed ){ |
|---|
| | 105 | dchar[1] buf; |
|---|
| | 106 | uint ate; |
|---|
| | 107 | dchar[] res = str.toString32( buf, &ate ); |
|---|
| | 108 | consumed = ate; |
|---|
| | 109 | if( ate is 0 || res.length is 0 ){ |
|---|
| | 110 | Trace.formatln( "dwthelper.utils {}: str.length={} str={:X2}", __LINE__, str.length, cast(ubyte[])str ); |
|---|
| | 111 | } |
|---|
| | 112 | assert( ate > 0 ); |
|---|
| | 113 | assert( res.length is 1 ); |
|---|
| | 114 | return res[0]; |
|---|
| | 115 | } |
|---|
| | 116 | |
|---|
| | 117 | char[] dcharToString( dchar key ){ |
|---|
| | 118 | dchar[1] buf; |
|---|
| | 119 | buf[0] = key; |
|---|
| | 120 | return tango.text.convert.Utf.toString( buf ); |
|---|
| | 121 | } |
|---|
| | 122 | |
|---|
| | 123 | int codepointCount( char[] str ){ |
|---|
| | 124 | scope dchar[] buf = new dchar[]( str.length ); |
|---|
| | 125 | uint ate; |
|---|
| | 126 | dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); |
|---|
| | 127 | Trace.formatln( "dwthelper.utils codepointCount {}: res.length={}", __LINE__, res.length ); |
|---|
| | 128 | assert( ate is str.length ); |
|---|
| | 129 | return res.length; |
|---|
| | 130 | } |
|---|
| | 131 | |
|---|
| | 132 | alias tango.text.convert.Utf.toString16 toString16; |
|---|
| | 133 | alias tango.text.convert.Utf.toString toString; |
|---|
| | 134 | |
|---|
| | 135 | int getRelativeCodePointOffset( char[] str, int startIndex, int searchRelCp ){ |
|---|
| | 136 | int ignore; |
|---|
| | 137 | int i = startIndex; |
|---|
| | 138 | if( searchRelCp > 0 ){ |
|---|
| | 139 | while( searchRelCp !is 0 ){ |
|---|
| | 140 | |
|---|
| | 141 | if( ( i < str.length ) |
|---|
| | 142 | && ( str[i] & 0x80 ) is 0x00 ) |
|---|
| | 143 | { |
|---|
| | 144 | i+=1; |
|---|
| | 145 | } |
|---|
| | 146 | else if( ( i+1 < str.length ) |
|---|
| | 147 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 148 | && (( str[i ] & 0xE0 ) is 0xC0 )) |
|---|
| | 149 | { |
|---|
| | 150 | i+=2; |
|---|
| | 151 | } |
|---|
| | 152 | else if( ( i+2 < str.length ) |
|---|
| | 153 | && (( str[i+2] & 0xC0 ) is 0x80 ) |
|---|
| | 154 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 155 | && (( str[i ] & 0xF0 ) is 0xE0 )) |
|---|
| | 156 | { |
|---|
| | 157 | i+=3; |
|---|
| | 158 | } |
|---|
| | 159 | else if(( i+3 < str.length ) |
|---|
| | 160 | && (( str[i+3] & 0xC0 ) is 0x80 ) |
|---|
| | 161 | && (( str[i+2] & 0xC0 ) is 0x80 ) |
|---|
| | 162 | && (( str[i+1] & 0xC0 ) is 0x80 ) |
|---|
| | 163 | && (( str[i ] & 0xF8 ) is 0xF0 )) |
|---|
| | 164 | { |
|---|
| | 165 | i+=4; |
|---|
| | 166 | } |
|---|
| | 167 | else{ |
|---|
| | 168 | tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i ); |
|---|
| | 169 | } |
|---|
| | 170 | searchRelCp--; |
|---|
| | 171 | } |
|---|
| | 172 | } |
|---|
| | 173 | else if( searchRelCp < 0 ){ |
|---|
| | 174 | while( searchRelCp !is 0 ){ |
|---|
| | 175 | do{ |
|---|
| | 176 | i--; |
|---|
| | 177 | if( i < 0 ){ |
|---|
| | 178 | Trace.formatln( "dwthelper.utils getRelativeCodePointOffset {}: str={}, startIndex={}, searchRelCp={}", __LINE__, str, startIndex, searchRelCp ); |
|---|
| | 179 | tango.text.convert.Utf.onUnicodeError( "invalid utf8 input", i ); |
|---|
| | 180 | } |
|---|
| | 181 | } while(( str[i] & 0xC0 ) is 0x80 ); |
|---|
| | 182 | searchRelCp++; |
|---|
| | 183 | } |
|---|
| | 184 | } |
|---|
| | 185 | return i - startIndex; |
|---|
| | 186 | } |
|---|
| | 187 | dchar getRelativeCodePoint( char[] str, int startIndex, int searchRelCp, out int relIndex ){ |
|---|
| | 188 | relIndex = getRelativeCodePointOffset( str, startIndex, searchRelCp ); |
|---|
| | 189 | int ignore; |
|---|
| | 190 | return firstCodePoint( str[ startIndex+relIndex .. $ ], ignore ); |
|---|
| | 191 | } |
|---|
| | 192 | |
|---|
| | 193 | int utf8AdjustOffset( char[] str, int offset ){ |
|---|
| | 194 | if( str.length <= offset || offset <= 0 ){ |
|---|
| | 195 | return offset; |
|---|
| | 196 | } |
|---|
| | 197 | while(( str[offset] & 0xC0 ) is 0x80 ){ |
|---|
| | 198 | offset--; |
|---|
| | 199 | } |
|---|
| | 200 | return offset; |
|---|
| | 201 | } |
|---|
| | 202 | |
|---|
| | 203 | dchar CharacterFirstToLower( char[] str ){ |
|---|
| | 204 | int consumed; |
|---|
| | 205 | return CharacterFirstToLower( str, consumed ); |
|---|
| | 206 | } |
|---|
| | 207 | dchar CharacterFirstToLower( char[] str, out int consumed ){ |
|---|
| | 208 | dchar[1] buf; |
|---|
| | 209 | buf[0] = firstCodePoint( str, consumed ); |
|---|
| | 210 | dchar[] r = tango.text.Unicode.toLower( buf ); |
|---|
| | 211 | return r[0]; |
|---|
| | 212 | } |
|---|
| | 213 | |
|---|