Ticket #1: decodeUTF.patch
| File decodeUTF.patch, 2.7 kB (added by keinfarbton, 2 years ago) |
|---|
-
descent.core/src/descent/core/dom/Lexer.java
old new 125 125 break; 126 126 127 127 default: 128 if ( (c & 0x80) !=0) {128 if (c >= 0x80) { 129 129 int u = decodeUTF(); 130 130 if (u == PS || u == LS) { 131 131 scriptLine.setSourceRange(0, p); … … 948 948 do 949 949 { 950 950 c = input[++p]; 951 } while (c > 0 && Chars.isidchar(c) || ((c & 0x80) != 0&& UniAlpha.isUniAlpha(decodeUTF())));951 } while (c > 0 && Chars.isidchar(c) || ((c >= 0x80) && UniAlpha.isUniAlpha(decodeUTF()))); 952 952 sv = stringtable.update(input, t.ptr, p - t.ptr); 953 953 id = (Identifier) sv.ptrvalue; 954 954 if (id == null) … … 1235 1235 c -= 'a' - 10; 1236 1236 else if (c >= 'A' && c <= 'F') 1237 1237 c -= 'A' - 10; 1238 else if ( (c & 0x80) !=0)1238 else if (c >= 0x80) 1239 1239 { p--; 1240 1240 int u = decodeUTF(); 1241 1241 p++; … … 1320 1320 } 1321 1321 1322 1322 default: 1323 if ( (c & 0x80) !=0)1323 if (c >= 0x80) 1324 1324 { 1325 1325 p--; 1326 1326 c = decodeUTF(); … … 1385 1385 } 1386 1386 1387 1387 default: 1388 if ( (c & 0x80) !=0) {1388 if (c >= 0x80) { 1389 1389 p--; 1390 1390 c = decodeUTF(); 1391 1391 p++; … … 2157 2157 break; 2158 2158 2159 2159 default: 2160 if ( (c & 0x80) !=0) {2160 if (c >= 0x80) { 2161 2161 int u = decodeUTF(); 2162 2162 if (u == PS || u == LS) { 2163 2163 error( … … 2181 2181 continue; 2182 2182 2183 2183 default: 2184 if ( (input[p] & 0x80) !=0) {2184 if (input[p] >= 0x80) { 2185 2185 int u = decodeUTF(); 2186 2186 if (u == PS || u == LS) { 2187 2187 pragma.setSourceRange(start, p - start); … … 2206 2206 } 2207 2207 2208 2208 private int decodeUTF() { 2209 int[] u = new int[] { 0 }; 2210 int s = p; 2211 int len; 2212 int[] idx; 2213 String msg = null; 2214 2215 // Check length of remaining string up to 6 UTF-8 characters 2216 for (len = 1; len < 6 && len < end; len++) 2217 ; 2218 2219 idx = new int[] { 0 }; 2220 msg = Utf.decodeChar(input, s, len, idx, u); 2221 p += idx[0] - 1; 2222 if (msg != null) 2223 { 2224 error(msg, IProblem.InvalidUtf8Sequence, linnum, p, 1); 2225 } 2226 return u[0]; 2209 try { 2210 // decode one codepoint, starting at the index p 2211 int result = Character.codePointAt(input, p); 2212 // increase p with the count of chars for the decoded codepoint. 2213 p = Character.offsetByCodePoints(input, 0, input.length, p, 1); 2214 return result; 2215 } catch (Exception e) { 2216 // a problem while decoding the codepoint occured => invalid input 2217 error("invalid input sequence", IProblem.InvalidUtf8Sequence, linnum, p, 1); 2218 return 0; 2219 } 2227 2220 } 2228 2221 2229 2222 /*
