| 1 |
/************************************************************************************************** |
|---|
| 2 |
* copyright: Copyright (c) 2007-2008 Diggory Hardy. |
|---|
| 3 |
* |
|---|
| 4 |
* author: Diggory Hardy, diggory.hardy@gmail.com |
|---|
| 5 |
* |
|---|
| 6 |
* license: BSD style: $(LICENSE) |
|---|
| 7 |
* |
|---|
| 8 |
* This contains templates for converting a char[] to various data-types. |
|---|
| 9 |
* |
|---|
| 10 |
* parseTo is roughly the inverse of $(B parseFrom) and should read any data output by $(B parseFrom). |
|---|
| 11 |
* |
|---|
| 12 |
* This module basically implements the following templated function for most basic D types: |
|---|
| 13 |
* bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char. |
|---|
| 14 |
* It also supports arrays and associative arrays of any supported type (including of other arrays) |
|---|
| 15 |
* and has special handling for strings (char[]) and binary (ubyte[]) data-types. |
|---|
| 16 |
* ----------------------------- |
|---|
| 17 |
* T parseTo(T) (char[] source); |
|---|
| 18 |
* ----------------------------- |
|---|
| 19 |
* |
|---|
| 20 |
* $(I source) is the string to parse, and data of the templated type that is read from the string |
|---|
| 21 |
* is returned. See the examples to get a better idea of its use. |
|---|
| 22 |
* |
|---|
| 23 |
* Syntax: |
|---|
| 24 |
* The syntax for parsing $(I source) is mostly the same used by D without any prefixes/suffixes |
|---|
| 25 |
* (except 0x, 0b & 0o base specifiers). Also a special ubyte[] syntax is supported; see examples. |
|---|
| 26 |
* The following escape sequences are supported for strings and characters: \' \" \\ |
|---|
| 27 |
* \a \b \f \n \r \t \v . Associative array literals use the same syntax as D, described here: |
|---|
| 28 |
* $(LINK http://www.digitalmars.com/d/2.0/expression.html#AssocArrayLiteral). All whitespace is |
|---|
| 29 |
* ignored (except of course within strings). |
|---|
| 30 |
* |
|---|
| 31 |
* There are also some public utility functions with their own documentation. |
|---|
| 32 |
* |
|---|
| 33 |
* Throws: |
|---|
| 34 |
* On errors, a ParseException or a UnicodeException (both extend TextException) is thrown with a |
|---|
| 35 |
* suitable message. No other exceptions should be thrown. |
|---|
| 36 |
* |
|---|
| 37 |
* Remarks: |
|---|
| 38 |
* There is currently no support for reading wchar/dchar strings. There are, however, unicode |
|---|
| 39 |
* conversions for converting UTF-8 to UTF-16/32. Be careful if converting on a char-by-char basis; |
|---|
| 40 |
* such conversions cannot be used for non-ascii characters. |
|---|
| 41 |
* |
|---|
| 42 |
* Examples: |
|---|
| 43 |
* ------------------------------------------------------------------------------------------------ |
|---|
| 44 |
* // Basic examples: |
|---|
| 45 |
* ulong a = parseTo!(ulong) ("20350"); |
|---|
| 46 |
* float d = parseTo!(float) (" 1.2e-9 "); |
|---|
| 47 |
* int[] b = parseTo!(int[]) ("[0,1,2,3]"); |
|---|
| 48 |
* |
|---|
| 49 |
* // String and char[] syntax: |
|---|
| 50 |
* char[] c = parseTo!(char[]) ("\"A string\""); |
|---|
| 51 |
* char[] e = parseTo!(char[]) ("['a','n','o','t','h','e','r', ' ' ,'s','t','r','i','n','g']"); |
|---|
| 52 |
* |
|---|
| 53 |
* // These be used interchangably; here's a more complex example of an associative array: |
|---|
| 54 |
* bool[char[]] f = parseTo!(bool[char[]]) ("[ \"one\":true, ['t','w','o']:false, \"three\":1, \"four\":000 ]"); |
|---|
| 55 |
* |
|---|
| 56 |
* // There is also a special notation for ubyte[] types: |
|---|
| 57 |
* // The digits following 0x must be in pairs and each specify one ubyte. |
|---|
| 58 |
* assert ( parseTo!(ubyte[]) (`0x01F2AC`) == parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) ); |
|---|
| 59 |
* |
|---|
| 60 |
* // There's no limit to the complexity! |
|---|
| 61 |
* char[char[][][][char]][bool] z = ...; // don't expect me to write this! |
|---|
| 62 |
* ------------------------------------------------------------------------------------------------ |
|---|
| 63 |
*************************************************************************************************/ |
|---|
| 64 |
|
|---|
| 65 |
module tango.scrapple.text.convert.parseTo; |
|---|
| 66 |
|
|---|
| 67 |
// tango imports |
|---|
| 68 |
import tango.core.Exception : TextException, UnicodeException; |
|---|
| 69 |
import cInt = tango.text.convert.Integer; |
|---|
| 70 |
import cFloat = tango.text.convert.Float; |
|---|
| 71 |
import Utf = tango.text.convert.Utf; |
|---|
| 72 |
import Util = tango.text.Util; |
|---|
| 73 |
|
|---|
| 74 |
/** |
|---|
| 75 |
* Base class for parseTo exceptions. |
|---|
| 76 |
*/ |
|---|
| 77 |
class ParseException : TextException |
|---|
| 78 |
{ |
|---|
| 79 |
this( char[] msg ) |
|---|
| 80 |
{ |
|---|
| 81 |
super( msg ); |
|---|
| 82 |
} |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
|
|---|
| 86 |
//BEGIN parseTo templates |
|---|
| 87 |
|
|---|
| 88 |
// Associative arrays |
|---|
| 89 |
|
|---|
| 90 |
const char[] AA_ERR = "Invalid associative array: "; |
|---|
| 91 |
T[S] parseTo(T : T[S], S) (char[] src) { |
|---|
| 92 |
src = Util.trim(src); |
|---|
| 93 |
if (src.length < 2 || src[0] != '[' || src[$-1] != ']') |
|---|
| 94 |
throw new ParseException (AA_ERR ~ "not [ ... ]"); // bad braces. |
|---|
| 95 |
|
|---|
| 96 |
T[S] ret; |
|---|
| 97 |
foreach (char[] pair; split (src[1..$-1])) { |
|---|
| 98 |
uint i = 0; |
|---|
| 99 |
while (i < pair.length) { // advance to the ':' |
|---|
| 100 |
char c = pair[i]; |
|---|
| 101 |
if (c == ':') break; |
|---|
| 102 |
if (c == '\'' || c == '"') { // string or character |
|---|
| 103 |
++i; |
|---|
| 104 |
while (i < pair.length && pair[i] != c) { |
|---|
| 105 |
if (pair[i] == '\\') { |
|---|
| 106 |
if (i+2 >= pair.length) throw new ParseException (AA_ERR ~ "unfinished escape sequence within string/char"); |
|---|
| 107 |
++i; // escape seq. |
|---|
| 108 |
} |
|---|
| 109 |
++i; |
|---|
| 110 |
} |
|---|
| 111 |
if (i == pair.length) { |
|---|
| 112 |
throw new ParseException (AA_ERR ~ "encountered [ ... KEY] (missing :DATA)"); |
|---|
| 113 |
} |
|---|
| 114 |
} |
|---|
| 115 |
++i; |
|---|
| 116 |
} |
|---|
| 117 |
if (i == pair.length) { |
|---|
| 118 |
throw new ParseException (AA_ERR ~ "encountered [ ... KEY:] (missing DATA)"); |
|---|
| 119 |
} |
|---|
| 120 |
ret[parseTo!(S) (pair[0..i])] = parseTo!(T) (pair[i+1..$]); |
|---|
| 121 |
} |
|---|
| 122 |
return ret; |
|---|
| 123 |
} |
|---|
| 124 |
debug (UnitTest) unittest { |
|---|
| 125 |
char[][char] X = parseTo!(char[][char]) (`['a':"animal", 'b':['b','u','s']]`); |
|---|
| 126 |
char[][char] Y = ['a':cast(char[])"animal", 'b':['b','u','s']]; |
|---|
| 127 |
|
|---|
| 128 |
//FIXME: when the compiler's fixed... |
|---|
| 129 |
// just assert (X == Y) |
|---|
| 130 |
assert (X.length == Y.length); |
|---|
| 131 |
assert (X.keys == Y.keys); |
|---|
| 132 |
assert (X.values == Y.values); |
|---|
| 133 |
//X.rehash; Y.rehash; // doesn't make a difference |
|---|
| 134 |
//assert (X == Y); // fails (compiler bug) |
|---|
| 135 |
} |
|---|
| 136 |
|
|---|
| 137 |
|
|---|
| 138 |
// Arrays |
|---|
| 139 |
|
|---|
| 140 |
T[] parseTo(T : T[]) (char[] src) { |
|---|
| 141 |
src = Util.trim(src); |
|---|
| 142 |
if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T[]) (src); |
|---|
| 143 |
throw new ParseException ("Invalid array: not [x, ..., z]"); |
|---|
| 144 |
} |
|---|
| 145 |
|
|---|
| 146 |
// String (array special case) |
|---|
| 147 |
T parseTo(T : char[]) (char[] src) { |
|---|
| 148 |
src = Util.trim(src); |
|---|
| 149 |
if (src.length >= 2 && src[0] == '"' && src[$-1] == '"') { |
|---|
| 150 |
src = src[1..$-1]; |
|---|
| 151 |
T ret; |
|---|
| 152 |
ret.length = src.length; // maximum length; retract to actual length later |
|---|
| 153 |
uint i = 0; |
|---|
| 154 |
for (uint t = 0; t < src.length;) { |
|---|
| 155 |
// process a block of non-escaped characters |
|---|
| 156 |
uint s = t; |
|---|
| 157 |
while (t < src.length && src[t] != '\\') ++t; // non-escaped characters |
|---|
| 158 |
uint j = i + t - s; |
|---|
| 159 |
ret[i..j] = src[s..t]; // copy a block |
|---|
| 160 |
i = j; |
|---|
| 161 |
|
|---|
| 162 |
// process a block of escaped characters |
|---|
| 163 |
while (t < src.length && src[t] == '\\') { |
|---|
| 164 |
t++; |
|---|
| 165 |
if (t == src.length) throw new ParseException ("Invalid string: ends \\\" !"); // next char is " |
|---|
| 166 |
ret[i++] = replaceEscapedChar (src[t++]); // throws if it's invalid |
|---|
| 167 |
} |
|---|
| 168 |
} |
|---|
| 169 |
return ret[0..i]; |
|---|
| 170 |
} |
|---|
| 171 |
else if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); |
|---|
| 172 |
throw new ParseException ("Invalid string: not quoted (\"*\") or char array (['a',...,'c'])"); |
|---|
| 173 |
} |
|---|
| 174 |
// Unicode conversions for strings: |
|---|
| 175 |
T parseTo(T : wchar[]) (char[] src) { |
|---|
| 176 |
// May throw a UnicodeException; don't bother catching and rethrowing: |
|---|
| 177 |
return Utf.toString16 (parseTo!(char[]) (src)); |
|---|
| 178 |
} |
|---|
| 179 |
T parseTo(T : dchar[]) (char[] src) { |
|---|
| 180 |
// May throw a UnicodeException; don't bother catching and rethrowing: |
|---|
| 181 |
return Utf.toString32 (parseTo!(char[]) (src)); |
|---|
| 182 |
} |
|---|
| 183 |
|
|---|
| 184 |
// Binary (array special case) |
|---|
| 185 |
T parseTo(T : ubyte[]) (char[] src) { |
|---|
| 186 |
src = Util.trim(src); |
|---|
| 187 |
// Standard case: |
|---|
| 188 |
if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return toArray!(T) (src); |
|---|
| 189 |
// Special case: sequence of hex digits, each pair of which is a ubyte |
|---|
| 190 |
if (src.length >= 2 && src[0..2] == "0x") { |
|---|
| 191 |
src = src[2..$]; // strip down to actual digits |
|---|
| 192 |
|
|---|
| 193 |
// Must be in pairs: |
|---|
| 194 |
if (src.length % 2 == 1) throw new ParseException ("Invalid binary: odd number of chars"); |
|---|
| 195 |
|
|---|
| 196 |
T ret; |
|---|
| 197 |
ret.length = src.length / 2; // exact |
|---|
| 198 |
|
|---|
| 199 |
for (uint i, pos; pos + 1 < src.length; ++i) { |
|---|
| 200 |
ubyte x = readHexChar(src, pos) << 4; |
|---|
| 201 |
x |= readHexChar(src, pos); |
|---|
| 202 |
ret[i] = x; |
|---|
| 203 |
} |
|---|
| 204 |
return ret; |
|---|
| 205 |
} |
|---|
| 206 |
else throw new ParseException ("Invalid ubyte[]: not an array and doesn't start 0x"); |
|---|
| 207 |
} |
|---|
| 208 |
|
|---|
| 209 |
debug (UnitTest) unittest { |
|---|
| 210 |
assert (parseTo!(double[]) (`[1.0,1.0e-10]`) == [1.0, 1.0e-10]); // generic array stuff |
|---|
| 211 |
assert (parseTo!(double[]) (`[ ]`) == cast(double[]) []); // empty array |
|---|
| 212 |
|
|---|
| 213 |
// char[] and char conversions, with commas, escape sequences and multichar UTF8 characters: |
|---|
| 214 |
assert (parseTo!(char[][]) (`[ ".\"", [',','\''] ,"!\bâ¬" ]`) == [ ".\"".dup, [',','\''] ,"!\bâ¬" ]); |
|---|
| 215 |
|
|---|
| 216 |
// wchar[] and dchar[] conversions: |
|---|
| 217 |
// The characters were pretty-much pulled at random from unicode tables. |
|---|
| 218 |
// The last few cause some wierd (display only) effects in my editor. |
|---|
| 219 |
assert (parseTo!(wchar[]) ("\"Test string: ¶αØà€ |
|---|
| 220 |
àžáæ\"") == "Test string: ¶αØà€ |
|---|
| 221 |
àžáæ"w); |
|---|
| 222 |
assert (parseTo!(dchar[]) ("\"Test string: ¶αØà€ |
|---|
| 223 |
àžáæ\"") == "Test string: ¶αØà€ |
|---|
| 224 |
àžáæ"d); |
|---|
| 225 |
|
|---|
| 226 |
assert (parseTo!(ubyte[]) (`0x01F2AC`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] special notation |
|---|
| 227 |
assert (parseTo!(ubyte[]) (`[01 ,0xF2, 0xAC]`) == cast(ubyte[]) [0x01, 0xF2, 0xAC]); // ubyte[] std notation |
|---|
| 228 |
} |
|---|
| 229 |
|
|---|
| 230 |
|
|---|
| 231 |
// Basic types |
|---|
| 232 |
|
|---|
| 233 |
// Char |
|---|
| 234 |
T parseTo(T : char) (char[] src) { |
|---|
| 235 |
src = Util.trim(src); |
|---|
| 236 |
if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'') |
|---|
| 237 |
throw new ParseException ("Invalid char: not quoted (e.g. 'c')"); |
|---|
| 238 |
if (src[1] != '\\' && src.length == 3) return src[1]; // Either non escaped |
|---|
| 239 |
if (src.length == 4) return replaceEscapedChar (src[2]); // Or escaped |
|---|
| 240 |
|
|---|
| 241 |
// Report various errors; warnings for likely and difficult to tell cases: |
|---|
| 242 |
// Warn in case it's a multibyte UTF-8 character: |
|---|
| 243 |
if (src[1] & 0xC0u) throw new UnicodeException ("Invalid char: too long (non-ASCII UTF-8 characters cannot be read as a single character)", 1); |
|---|
| 244 |
throw new ParseException ("Invalid char: too long"); |
|---|
| 245 |
} |
|---|
| 246 |
/* Basic unicode convertions for wide-chars. |
|---|
| 247 |
* NOTE: c > 127 signals the start of a multibyte UTF-8 sequence which must be converted for |
|---|
| 248 |
* UTF-16/32. But since we don't know what the next char is we can't do the conversion. */ |
|---|
| 249 |
const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted from a single UTF-8 char"; |
|---|
| 250 |
T parseTo(T : wchar) (char[] src) { |
|---|
| 251 |
char c = parseTo!(char) (src); |
|---|
| 252 |
if (c <= 127u) return cast(wchar) c; // this char can be converted |
|---|
| 253 |
else throw new UnicodeException (WIDE_CHAR_ERROR, 1); |
|---|
| 254 |
} |
|---|
| 255 |
T parseTo(T : dchar) (char[] src) { |
|---|
| 256 |
char c = parseTo!(char) (src); |
|---|
| 257 |
if (c <= 127u) return cast(dchar) c; // this char can be converted |
|---|
| 258 |
else throw new UnicodeException (WIDE_CHAR_ERROR, 1); |
|---|
| 259 |
} |
|---|
| 260 |
debug (UnitTest) unittest { |
|---|
| 261 |
assert (parseTo!(char) ("\'\\\'\'") == '\''); |
|---|
| 262 |
assert (parseTo!(wchar) ("'X'") == 'X'); |
|---|
| 263 |
assert (parseTo!(dchar) ("'X'") == 'X'); |
|---|
| 264 |
} |
|---|
| 265 |
|
|---|
| 266 |
// Bool |
|---|
| 267 |
T parseTo(T : bool) (char[] src) { |
|---|
| 268 |
src = Util.trim(src); |
|---|
| 269 |
if (src == "true") return true; |
|---|
| 270 |
if (src == "false") return false; |
|---|
| 271 |
uint pos; |
|---|
| 272 |
while (src.length > pos && src[pos] == '0') ++pos; // strip leading zeros |
|---|
| 273 |
if (src.length == pos && pos > 0) return false; |
|---|
| 274 |
if (src.length == pos + 1 && src[pos] == '1') return true; |
|---|
| 275 |
throw new ParseException ("Invalid bool: not true or false and doesn't evaluate to 0 or 1"); |
|---|
| 276 |
} |
|---|
| 277 |
debug (UnitTest) unittest { |
|---|
| 278 |
assert (parseTo!(bool[]) (`[true,false,01,00]`) == cast(bool[]) [1,0,1,0]); |
|---|
| 279 |
} |
|---|
| 280 |
|
|---|
| 281 |
// Ints |
|---|
| 282 |
T parseTo(T : byte) (char[] src) { |
|---|
| 283 |
return toTInt!(T) (src); |
|---|
| 284 |
} |
|---|
| 285 |
T parseTo(T : short) (char[] src) { |
|---|
| 286 |
return toTInt!(T) (src); |
|---|
| 287 |
} |
|---|
| 288 |
T parseTo(T : int) (char[] src) { |
|---|
| 289 |
return toTInt!(T) (src); |
|---|
| 290 |
} |
|---|
| 291 |
T parseTo(T : long) (char[] src) { |
|---|
| 292 |
return toTInt!(T) (src); |
|---|
| 293 |
} |
|---|
| 294 |
T parseTo(T : ubyte) (char[] src) { |
|---|
| 295 |
return toTInt!(T) (src); |
|---|
| 296 |
} |
|---|
| 297 |
T parseTo(T : ushort) (char[] src) { |
|---|
| 298 |
return toTInt!(T) (src); |
|---|
| 299 |
} |
|---|
| 300 |
T parseTo(T : uint) (char[] src) { |
|---|
| 301 |
return toTInt!(T) (src); |
|---|
| 302 |
} |
|---|
| 303 |
T parseTo(T : ulong) (char[] src) { |
|---|
| 304 |
return toTInt!(T) (src); |
|---|
| 305 |
} |
|---|
| 306 |
debug (UnitTest) unittest { |
|---|
| 307 |
assert (parseTo!(byte) ("-5") == cast(byte) -5); |
|---|
| 308 |
// annoyingly, octal syntax differs from D (blame tango): |
|---|
| 309 |
assert (parseTo!(uint[]) ("[0b0100,0o724,0xFa59c,0xFFFFFFFF,0]") == [0b0100u,0724,0xFa59c,0xFFFFFFFF,0]); |
|---|
| 310 |
} |
|---|
| 311 |
|
|---|
| 312 |
// Floats |
|---|
| 313 |
T parseTo(T : float) (char[] src) { |
|---|
| 314 |
return toTFloat!(T) (src); |
|---|
| 315 |
} |
|---|
| 316 |
T parseTo(T : double) (char[] src) { |
|---|
| 317 |
return toTFloat!(T) (src); |
|---|
| 318 |
} |
|---|
| 319 |
T parseTo(T : real) (char[] src) { |
|---|
| 320 |
return toTFloat!(T) (src); |
|---|
| 321 |
} |
|---|
| 322 |
debug (UnitTest) unittest { |
|---|
| 323 |
assert (parseTo!(float) ("0.0") == 0.0f); |
|---|
| 324 |
assert (parseTo!(double) ("-1e25") == -1e25); |
|---|
| 325 |
assert (parseTo!(real) ("5.24e-269") == cast(real) 5.24e-269); |
|---|
| 326 |
} |
|---|
| 327 |
//END parseTo templates |
|---|
| 328 |
|
|---|
| 329 |
//BEGIN Utility funcs |
|---|
| 330 |
/** Trims whitespace at ends of string and checks for and removes array brackets: [] |
|---|
| 331 |
* |
|---|
| 332 |
* Throws: |
|---|
| 333 |
* ParseException if brackets aren't end non-whitespace characters. |
|---|
| 334 |
* |
|---|
| 335 |
* Returns: |
|---|
| 336 |
* String without brackets (and whitespace outside those brackets). Useful for passing to split. |
|---|
| 337 |
*/ |
|---|
| 338 |
char[] stripBrackets (char[] src) { |
|---|
| 339 |
src = Util.trim(src); |
|---|
| 340 |
if (src.length >= 2 && src[0] == '[' && src[$-1] == ']') return src[1..$-1]; |
|---|
| 341 |
throw new ParseException ("Invalid bracketed string: not [...]"); |
|---|
| 342 |
} |
|---|
| 343 |
|
|---|
| 344 |
/** Splits a string into substrings separated by '$(B ,)' with support for characters and strings |
|---|
| 345 |
* containing escape sequences and for embedded arrays ($(B [...])). |
|---|
| 346 |
* |
|---|
| 347 |
* Params: |
|---|
| 348 |
* src A string to separate on commas. Where used for parsing arrays, the brackets enclosing |
|---|
| 349 |
* the array should be removed before calling this function (stripBrackets can do this). |
|---|
| 350 |
* |
|---|
| 351 |
* Returns: |
|---|
| 352 |
* An array of substrings within src, excluding commas. Whitespace is not stripped and |
|---|
| 353 |
* empty strings may get returned. |
|---|
| 354 |
* |
|---|
| 355 |
* Remarks: |
|---|
| 356 |
* This function is primarily intended for as a utility function for use by the templates |
|---|
| 357 |
* parsing arrays and associative arrays, but it may be useful in other cases too. Hence the |
|---|
| 358 |
* fact no brackets are stripped from src. |
|---|
| 359 |
*/ |
|---|
| 360 |
char[][] split (char[] src) { |
|---|
| 361 |
src = Util.trim (src); |
|---|
| 362 |
if (src == "") return []; // empty array: no elements when no data |
|---|
| 363 |
|
|---|
| 364 |
uint depth = 0; // surface depth (embedded arrays) |
|---|
| 365 |
char[][] ret; |
|---|
| 366 |
ret.length = src.length / 3; // unlikely to need a longer array |
|---|
| 367 |
uint k = 0; // current split piece |
|---|
| 368 |
uint i = 0, j = 0; // current read location, start of current piece |
|---|
| 369 |
|
|---|
| 370 |
while (i < src.length) { |
|---|
| 371 |
char c = src[i]; |
|---|
| 372 |
if (c == '\'' || c == '"') { // string or character |
|---|
| 373 |
++i; |
|---|
| 374 |
while (i < src.length && src[i] != c) { |
|---|
| 375 |
if (src[i] == '\\') ++i; // escape seq. |
|---|
| 376 |
++i; |
|---|
| 377 |
} // Doesn't throw if no terminal quote at end of src, but this should be caught later. |
|---|
| 378 |
} |
|---|
| 379 |
else if (c == '[') ++depth; |
|---|
| 380 |
else if (c == ']') { |
|---|
| 381 |
if (depth) --depth; |
|---|
| 382 |
else throw new ParseException ("Invalid array literal: closes before end of data item."); |
|---|
| 383 |
} |
|---|
| 384 |
else if (c == ',' && depth == 0) { // only if not an embedded array |
|---|
| 385 |
if (ret.length <= k) ret.length = ret.length * 2; |
|---|
| 386 |
ret[k++] = src[j..i]; // add this piece and increment k |
|---|
| 387 |
j = i + 1; |
|---|
| 388 |
} |
|---|
| 389 |
++i; |
|---|
| 390 |
} |
|---|
| 391 |
if (ret.length <= k) ret.length = k + 1; |
|---|
| 392 |
ret[k] = src[j..i]; // add final piece (i >= j) |
|---|
| 393 |
return ret[0..k+1]; |
|---|
| 394 |
} |
|---|
| 395 |
|
|---|
| 396 |
/* Templated read-int function to read (un)signed 1-4 byte integers. |
|---|
| 397 |
* |
|---|
| 398 |
* Actually a reimplementation of tango.text.convert.Integer toLong and parse functions. |
|---|
| 399 |
*/ |
|---|
| 400 |
private TInt toTInt(TInt) (char[] src) { |
|---|
| 401 |
const char[] INT_OUT_OF_RANGE = "Integer out of range"; |
|---|
| 402 |
bool sign; |
|---|
| 403 |
uint radix, ate, ate2; |
|---|
| 404 |
|
|---|
| 405 |
// Trim off whitespace. |
|---|
| 406 |
// NOTE: Cannot use tango.text.convert.Integer.trim to trim leading whitespace since it doesn't |
|---|
| 407 |
// treat new-lines, etc. as whitespace which for our purposes is whitespace. |
|---|
| 408 |
src = Util.trim (src); |
|---|
| 409 |
|
|---|
| 410 |
ate = cInt.trim (src, sign, radix); |
|---|
| 411 |
if (ate == src.length) throw new ParseException ("Invalid integer: no digits"); |
|---|
| 412 |
ulong val = cInt.convert (src[ate..$], radix, &ate2); |
|---|
| 413 |
ate += ate2; |
|---|
| 414 |
|
|---|
| 415 |
if (ate < src.length) |
|---|
| 416 |
throw new ParseException ("Invalid integer at marked character: \"" ~ src[0..ate] ~ "'" ~ src[ate] ~ "'" ~ src[ate+1..$] ~ "\""); |
|---|
| 417 |
|
|---|
| 418 |
if (val > TInt.max) throw new ParseException (INT_OUT_OF_RANGE); |
|---|
| 419 |
if (sign) { |
|---|
| 420 |
long sval = cast(long) -val; |
|---|
| 421 |
if (sval > TInt.min) return cast(TInt) sval; |
|---|
| 422 |
else throw new ParseException (INT_OUT_OF_RANGE); |
|---|
| 423 |
} |
|---|
| 424 |
return cast(TInt) val; |
|---|
| 425 |
} |
|---|
| 426 |
|
|---|
| 427 |
/* Basically a reimplementation of tango.text.convert.Float.toFloat which checks for |
|---|
| 428 |
* whitespace before throwing an exception for overlong input. */ |
|---|
| 429 |
private TFloat toTFloat(TFloat) (char[] src) { |
|---|
| 430 |
// NOTE: As for toTInt(), this needs to strip leading as well as trailing whitespace. |
|---|
| 431 |
src = Util.trim (src); |
|---|
| 432 |
if (src == "") throw new ParseException ("Invalid float: no digits"); |
|---|
| 433 |
uint ate; |
|---|
| 434 |
|
|---|
| 435 |
TFloat x = cFloat.parse (src, &ate); |
|---|
| 436 |
return x; |
|---|
| 437 |
} |
|---|
| 438 |
|
|---|
| 439 |
/* Throws an exception on invalid escape sequences. Supported escape sequences are the following |
|---|
| 440 |
* subset of those supported by D: \" \' \\ \a \b \f \n \r \t \v |
|---|
| 441 |
*/ |
|---|
| 442 |
private char replaceEscapedChar (char c) |
|---|
| 443 |
{ |
|---|
| 444 |
// This code was generated: |
|---|
| 445 |
if (c <= 'b') { |
|---|
| 446 |
if (c <= '\'') { |
|---|
| 447 |
if (c == '\"') { |
|---|
| 448 |
return '\"'; |
|---|
| 449 |
} else if (c == '\'') { |
|---|
| 450 |
return '\''; |
|---|
| 451 |
} |
|---|
| 452 |
} else { |
|---|
| 453 |
if (c == '\\') { |
|---|
| 454 |
return '\\'; |
|---|
| 455 |
} else if (c == 'a') { |
|---|
| 456 |
return '\a'; |
|---|
| 457 |
} else if (c == 'b') { |
|---|
| 458 |
return '\b'; |
|---|
| 459 |
} |
|---|
| 460 |
} |
|---|
| 461 |
} else { |
|---|
| 462 |
if (c <= 'n') { |
|---|
| 463 |
if (c == 'f') { |
|---|
| 464 |
return '\f'; |
|---|
| 465 |
} else if (c == 'n') { |
|---|
| 466 |
return '\n'; |
|---|
| 467 |
} |
|---|
| 468 |
} else { |
|---|
| 469 |
if (c == 'r') { |
|---|
| 470 |
return '\r'; |
|---|
| 471 |
} else if (c == 't') { |
|---|
| 472 |
return '\t'; |
|---|
| 473 |
} else if (c == 'v') { |
|---|
| 474 |
return '\v'; |
|---|
| 475 |
} |
|---|
| 476 |
} |
|---|
| 477 |
} |
|---|
| 478 |
|
|---|
| 479 |
// if we haven't returned: |
|---|
| 480 |
throw new ParseException ("Invalid escape sequence: \\"~c); |
|---|
| 481 |
} |
|---|
| 482 |
|
|---|
| 483 |
// Reads one hex char: [0-9A-Fa-f]. Otherwise throws an exception. Doesn't check src.length. |
|---|
| 484 |
private ubyte readHexChar (char[] src, inout uint pos) { |
|---|
| 485 |
ubyte x; |
|---|
| 486 |
if (src[pos] >= '0' && src[pos] <= '9') x = src[pos] - '0'; |
|---|
| 487 |
else if (src[pos] >= 'A' && src[pos] <= 'F') x = src[pos] - 'A' + 10; |
|---|
| 488 |
else if (src[pos] >= 'a' && src[pos] <= 'f') x = src[pos] - 'a' + 10; |
|---|
| 489 |
else throw new ParseException ("Invalid hex digit."); |
|---|
| 490 |
++pos; |
|---|
| 491 |
return x; |
|---|
| 492 |
} |
|---|
| 493 |
|
|---|
| 494 |
// Generic array reader |
|---|
| 495 |
// Assumes input is of form "[xxxxx]" (i.e. first and last chars are '[', ']' and length >= 2). |
|---|
| 496 |
private T[] toArray(T : T[]) (char[] src) { |
|---|
| 497 |
T[] ret = new T[16]; // avoid unnecessary allocations |
|---|
| 498 |
uint i = 0; |
|---|
| 499 |
foreach (char[] element; split(src[1..$-1])) { |
|---|
| 500 |
if (i == ret.length) ret.length = ret.length * 2; |
|---|
| 501 |
ret[i] = parseTo!(T) (element); |
|---|
| 502 |
++i; |
|---|
| 503 |
} |
|---|
| 504 |
return ret[0..i]; |
|---|
| 505 |
} |
|---|
| 506 |
|
|---|
| 507 |
debug (UnitTest) { |
|---|
| 508 |
import tango.io.Console; |
|---|
| 509 |
|
|---|
| 510 |
unittest { |
|---|
| 511 |
Cout ("Running unittest: parseTo ...").flush; |
|---|
| 512 |
|
|---|
| 513 |
assert (parseTo!(char[]) ("\"\\a |
|---|