| 1 |
/************************************************************************************************** |
|---|
| 2 |
* copyright: Copyright (c) 2007-2008 Diggory Hardy. |
|---|
| 3 |
* |
|---|
| 4 |
* author: Diggory Hardy, diggory.hardy@gmail.com |
|---|
| 5 |
* |
|---|
| 6 |
* license: BSD style: $(LICENSE) |
|---|
| 7 |
* |
|---|
| 8 |
* This contains templates for converting various data-types to a char[]. |
|---|
| 9 |
* |
|---|
| 10 |
* parseFrom is roughly the inverse of $(B parseTo). |
|---|
| 11 |
* |
|---|
| 12 |
* This module basically implements the following templated function for most basic D types: |
|---|
| 13 |
* bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar, |
|---|
| 14 |
* dchar. |
|---|
| 15 |
* It also supports arrays of any supported type (including of other arrays) and has special |
|---|
| 16 |
* handling for strings (char[]) and binary (ubyte[]) data-types. |
|---|
| 17 |
* ----------------------------- |
|---|
| 18 |
* char[] parseFrom(T) (T value); |
|---|
| 19 |
* ----------------------------- |
|---|
| 20 |
* |
|---|
| 21 |
* $(I value) is the value to convert; it is converted to a string and returned. |
|---|
| 22 |
* |
|---|
| 23 |
* Syntax: |
|---|
| 24 |
* The syntax is the same as parseTo; but since this module only generates formatted output |
|---|
| 25 |
* knowing the syntax shouldn't be necessary. There is currently no way to specify options like |
|---|
| 26 |
* output base for ints, precision of floats, or |
|---|
| 27 |
* whether to write char[] or ubyte[] types as arrays or in their more compact forms. |
|---|
| 28 |
* |
|---|
| 29 |
* Throws: |
|---|
| 30 |
* On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other |
|---|
| 31 |
* exceptions should be thrown. |
|---|
| 32 |
* |
|---|
| 33 |
* Remarks: |
|---|
| 34 |
* There is currently no support for outputting wchar/dchar strings. There are, however, unicode |
|---|
| 35 |
* conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters |
|---|
| 36 |
* (any that are non-ascii) will not fit in a single char and an exception will be thrown. |
|---|
| 37 |
* |
|---|
| 38 |
* Examples: |
|---|
| 39 |
* ------------------------------------------------------------------------------------------------ |
|---|
| 40 |
* // Examples are printed via Cout. |
|---|
| 41 |
* |
|---|
| 42 |
* // Basic examples: FIXME: test these outputs are correct! |
|---|
| 43 |
* Cout (parseFrom!(byte) (-13)).newline; // -13 |
|---|
| 44 |
* Cout (parseFrom!(real) (2.56e11)).newline; // 2.55999999999999990000e+11 |
|---|
| 45 |
* Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline; // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000] |
|---|
| 46 |
* Cout (parseFrom!(bool[]) ([true,false,false])).newline; // [true,false,false] |
|---|
| 47 |
* |
|---|
| 48 |
* // String and ubyte[] special syntaxes (always used): |
|---|
| 49 |
* Cout (parseFrom!(char[]) ("A string.")).newline; // "A string." (with quotes) |
|---|
| 50 |
* Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline; // 0x05f110 |
|---|
| 51 |
* |
|---|
| 52 |
* // Associative arrays: |
|---|
| 53 |
* Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline; // [0:"zero",1:"one",-1:"negative one"] |
|---|
| 54 |
* |
|---|
| 55 |
* // No limit on complexity... |
|---|
| 56 |
* char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...); |
|---|
| 57 |
* ------------------------------------------------------------------------------------------------ |
|---|
| 58 |
*************************************************************************************************/ |
|---|
| 59 |
|
|---|
| 60 |
module tango.scrapple.text.convert.parseFrom; |
|---|
| 61 |
|
|---|
| 62 |
// tango imports |
|---|
| 63 |
import tango.core.Exception : UnicodeException, IllegalArgumentException; |
|---|
| 64 |
import cInt = tango.text.convert.Integer; |
|---|
| 65 |
import cFloat = tango.text.convert.Float; |
|---|
| 66 |
import Utf = tango.text.convert.Utf; |
|---|
| 67 |
import Util = tango.text.Util; |
|---|
| 68 |
|
|---|
| 69 |
//BEGIN parseFrom templates |
|---|
| 70 |
/* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output. |
|---|
| 71 |
* Unnecessary for mergetag though. |
|---|
| 72 |
*/ |
|---|
| 73 |
|
|---|
| 74 |
// Associative arrays |
|---|
| 75 |
|
|---|
| 76 |
char[] parseFrom(T : T[S], S) (T[S] val) { |
|---|
| 77 |
char[] ret; |
|---|
| 78 |
// A guess, including values themselves and [,:] elements (must be at least 2). |
|---|
| 79 |
ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2; |
|---|
| 80 |
ret[0] = '['; |
|---|
| 81 |
uint i = 1; |
|---|
| 82 |
foreach (S k, T v; val) { |
|---|
| 83 |
char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v); |
|---|
| 84 |
i += s.length; |
|---|
| 85 |
if (i+1 >= ret.length) ret.length = ret.length * 2; // check. |
|---|
| 86 |
ret[i-s.length .. i] = s; |
|---|
| 87 |
ret[i++] = ','; |
|---|
| 88 |
} |
|---|
| 89 |
if (i == 1) ++i; // special case - not overwriting a comma |
|---|
| 90 |
ret[i-1] = ']'; // replaces last comma |
|---|
| 91 |
return ret[0..i]; |
|---|
| 92 |
} |
|---|
| 93 |
debug (UnitTest) unittest { |
|---|
| 94 |
char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]); |
|---|
| 95 |
char[] Y = `['a':"animal",'b':"bus"]`; |
|---|
| 96 |
assert (X == Y); |
|---|
| 97 |
} |
|---|
| 98 |
|
|---|
| 99 |
|
|---|
| 100 |
// Arrays |
|---|
| 101 |
|
|---|
| 102 |
char[] parseFrom(T : T[]) (T[] val) { |
|---|
| 103 |
char[] ret; |
|---|
| 104 |
// A guess, including commas and brackets (must be at least 2) |
|---|
| 105 |
ret.length = val.length * (defLength!(T) + 1) + 2; |
|---|
| 106 |
ret[0] = '['; |
|---|
| 107 |
uint i = 1; |
|---|
| 108 |
foreach (T x; val) { |
|---|
| 109 |
char[] s = parseFrom!(T) (x); |
|---|
| 110 |
i += s.length; |
|---|
| 111 |
if (i+1 >= ret.length) ret.length = ret.length * 2; // check length |
|---|
| 112 |
ret[i-s.length .. i] = s; |
|---|
| 113 |
ret[i++] = ','; |
|---|
| 114 |
} |
|---|
| 115 |
if (i == 1) ++i; // special case - not overwriting a comma |
|---|
| 116 |
ret[i-1] = ']'; // replaces last comma |
|---|
| 117 |
return ret[0..i]; |
|---|
| 118 |
} |
|---|
| 119 |
|
|---|
| 120 |
// Strings (array special case) |
|---|
| 121 |
char[] parseFrom(T : char[]) (T val) { |
|---|
| 122 |
char[] ret = new char[val.length * 2 + 2]; // Initial storage. This should ALWAYS be enough. |
|---|
| 123 |
ret[0] = '"'; |
|---|
| 124 |
uint i = 1; |
|---|
| 125 |
for (uint t = 0; t < val.length;) { |
|---|
| 126 |
// process a block of non-escapable characters |
|---|
| 127 |
uint s = t; |
|---|
| 128 |
while (t < val.length && !isEscapableChar(val[t])) |
|---|
| 129 |
++t; // skip all non-escapable chars |
|---|
| 130 |
uint j = i + t - s; |
|---|
| 131 |
ret[i..j] = val[s..t]; // copy a block |
|---|
| 132 |
i = j; |
|---|
| 133 |
// process a block of escapable charaters |
|---|
| 134 |
while (t < val.length && isEscapableChar(val[t])) { |
|---|
| 135 |
ret[i++] = '\\'; // backslash; increment i |
|---|
| 136 |
ret[i++] = replaceEscapableChar(val[t++]); // character; increment i and t |
|---|
| 137 |
} |
|---|
| 138 |
} |
|---|
| 139 |
ret[i++] = '"'; |
|---|
| 140 |
return ret[0..i]; |
|---|
| 141 |
} |
|---|
| 142 |
// Unicode conversions for strings: |
|---|
| 143 |
char[] parseFrom(T : dchar[]) (T val) { |
|---|
| 144 |
// May throw a UnicodeException; don't bother catching and rethrowing: |
|---|
| 145 |
return parseFrom!(char[]) (Utf.toString (val)); |
|---|
| 146 |
} |
|---|
| 147 |
char[] parseFrom(T : wchar[]) (T val) { |
|---|
| 148 |
// May throw a UnicodeException; don't bother catching and rethrowing: |
|---|
| 149 |
return parseFrom!(char[]) (Utf.toString (val)); |
|---|
| 150 |
} |
|---|
| 151 |
|
|---|
| 152 |
// Binary (array special case) |
|---|
| 153 |
char[] parseFrom(T : ubyte[]) (T val) { |
|---|
| 154 |
static const char[16] digits = "0123456789abcdef"; |
|---|
| 155 |
|
|---|
| 156 |
char[] ret = new char[val.length * 2 + 2]; // exact length |
|---|
| 157 |
ret[0..2] = "0x"; |
|---|
| 158 |
uint i = 2; |
|---|
| 159 |
|
|---|
| 160 |
foreach (ubyte x; val) { |
|---|
| 161 |
ret[i++] = digits[x >> 4]; |
|---|
| 162 |
ret[i++] = digits[x & 0x0F]; |
|---|
| 163 |
} |
|---|
| 164 |
return ret; |
|---|
| 165 |
} |
|---|
| 166 |
|
|---|
| 167 |
debug (UnitTest) unittest { |
|---|
| 168 |
// generic array stuff: |
|---|
| 169 |
assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`); |
|---|
| 170 |
assert (parseFrom!(double[]) (cast(double[]) []) == `[]`); // empty array |
|---|
| 171 |
|
|---|
| 172 |
// char[] conversions, with commas, escape sequences and multichar UTF8 characters: |
|---|
| 173 |
assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\bâ¬" ]) == `[".\"",",\'","!\bâ¬"]`); |
|---|
| 174 |
|
|---|
| 175 |
// wchar[] and dchar[] conversions: |
|---|
| 176 |
// The characters were pretty-much pulled at random from unicode tables. |
|---|
| 177 |
// The last few cause some wierd (display only) effects in my editor. |
|---|
| 178 |
assert (parseFrom!(wchar[]) ("Test string: ¶αØà€ |
|---|
| 179 |
àžáæ"w) == "\"Test string: ¶αØà€ |
|---|
| 180 |
àžáæ\""); |
|---|
| 181 |
assert (parseFrom!(dchar[]) ("Test string: ¶αØà€ |
|---|
| 182 |
àžáæ"d) == "\"Test string: ¶αØà€ |
|---|
| 183 |
àžáæ\""); |
|---|
| 184 |
|
|---|
| 185 |
assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`); // ubyte[] special notation |
|---|
| 186 |
} |
|---|
| 187 |
|
|---|
| 188 |
|
|---|
| 189 |
// Basic types |
|---|
| 190 |
|
|---|
| 191 |
// Char |
|---|
| 192 |
char[] parseFrom(T : char) (T val) { |
|---|
| 193 |
// NOTE: if (val > 127) "is invalid UTF-8 single char" |
|---|
| 194 |
// However we don't know what this is for, in particular if it will be recombined with other chars later |
|---|
| 195 |
|
|---|
| 196 |
// Can't return reference to static array; making dynamic is cheaper than copying. |
|---|
| 197 |
char[] ret = new char[4]; // max length for an escaped char |
|---|
| 198 |
ret[0] = '\''; |
|---|
| 199 |
|
|---|
| 200 |
if (!isEscapableChar (val)) { |
|---|
| 201 |
ret[1] = val; |
|---|
| 202 |
ret[2] = '\''; |
|---|
| 203 |
return ret[0..3]; |
|---|
| 204 |
} else { |
|---|
| 205 |
ret[1] = '\\'; |
|---|
| 206 |
ret[2] = replaceEscapableChar (val); |
|---|
| 207 |
ret[3] = '\''; |
|---|
| 208 |
return ret; |
|---|
| 209 |
} |
|---|
| 210 |
assert (false); |
|---|
| 211 |
} |
|---|
| 212 |
// Basic unicode convertions for wide-chars. |
|---|
| 213 |
// NOTE: any other wide-chars will not fit in a single UTF-8 encoded char. |
|---|
| 214 |
const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char"; |
|---|
| 215 |
char[] parseFrom(T : wchar) (T val) { |
|---|
| 216 |
if (val <= 127u) return parseFrom!(char) (cast(char) val); // this char can be converted |
|---|
| 217 |
else throw new UnicodeException (WIDE_CHAR_ERROR, 0); |
|---|
| 218 |
} |
|---|
| 219 |
char[] parseFrom(T : dchar) (T val) { |
|---|
| 220 |
if (val <= 127u) return parseFrom!(char) (cast(char) val); // this char can be converted |
|---|
| 221 |
else throw new UnicodeException (WIDE_CHAR_ERROR, 0); |
|---|
| 222 |
} |
|---|
| 223 |
debug (UnitTest) unittest { |
|---|
| 224 |
assert (parseFrom!(char) ('\'') == "\'\\\'\'"); |
|---|
| 225 |
assert (parseFrom!(wchar) ('X') == "'X'"); |
|---|
| 226 |
assert (parseFrom!(dchar) ('X') == "'X'"); |
|---|
| 227 |
} |
|---|
| 228 |
|
|---|
| 229 |
// Bool |
|---|
| 230 |
char[] parseFrom(T : bool) (T val) { |
|---|
| 231 |
if (val) return "true"; |
|---|
| 232 |
else return "false"; |
|---|
| 233 |
} |
|---|
| 234 |
// too simple to need a unittest |
|---|
| 235 |
|
|---|
| 236 |
// Signed ints |
|---|
| 237 |
char[] parseFrom(T : byte) (T val) { |
|---|
| 238 |
return formatLong (val); |
|---|
| 239 |
} |
|---|
| 240 |
char[] parseFrom(T : short) (T val) { |
|---|
| 241 |
return formatLong (val); |
|---|
| 242 |
} |
|---|
| 243 |
char[] parseFrom(T : int) (T val) { |
|---|
| 244 |
return formatLong (val); |
|---|
| 245 |
} |
|---|
| 246 |
char[] parseFrom(T : long) (T val) { |
|---|
| 247 |
return formatLong (val); |
|---|
| 248 |
} |
|---|
| 249 |
// Unsigned ints |
|---|
| 250 |
char[] parseFrom(T : ubyte) (T val) { |
|---|
| 251 |
return formatLong (val); |
|---|
| 252 |
} |
|---|
| 253 |
char[] parseFrom(T : ushort) (T val) { |
|---|
| 254 |
return formatLong (val); |
|---|
| 255 |
} |
|---|
| 256 |
char[] parseFrom(T : uint) (T val) { |
|---|
| 257 |
return formatLong (val); |
|---|
| 258 |
} |
|---|
| 259 |
char[] parseFrom(T : ulong) (T val) { |
|---|
| 260 |
if (val > cast(ulong) long.max) |
|---|
| 261 |
throw new IllegalArgumentException ("No handling available for ulong where value > long.max"); |
|---|
| 262 |
return formatLong (val); |
|---|
| 263 |
} |
|---|
| 264 |
debug (UnitTest) unittest { |
|---|
| 265 |
assert (parseFrom!(byte) (cast(byte) -5) == "-5"); |
|---|
| 266 |
// annoyingly, octal syntax differs from D (blame tango): |
|---|
| 267 |
assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]"); |
|---|
| 268 |
} |
|---|
| 269 |
|
|---|
| 270 |
// Floats |
|---|
| 271 |
/* Old calculation (not used): |
|---|
| 272 |
t.dig+2+4+3 // should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */ |
|---|
| 273 |
char[] parseFrom(T : float) (T val) { |
|---|
| 274 |
char[] ret = new char[32]; // minimum allowed by assert in format |
|---|
| 275 |
return cFloat.format (ret, val, T.dig+2, 1); // from old C++ tests, T.dig+2 gives best(?) accuracy |
|---|
| 276 |
} |
|---|
| 277 |
char[] parseFrom(T : double) (T val) { |
|---|
| 278 |
char[] ret = new char[32]; |
|---|
| 279 |
return cFloat.format (ret, val, T.dig+2, 1); |
|---|
| 280 |
} |
|---|
| 281 |
char[] parseFrom(T : real) (T val) { |
|---|
| 282 |
char[] ret = new char[32]; |
|---|
| 283 |
return cFloat.format (ret, val, T.dig+2, 1); |
|---|
| 284 |
} |
|---|
| 285 |
debug (UnitTest) unittest { |
|---|
| 286 |
// NOTE: these numbers are not particularly meaningful. |
|---|
| 287 |
assert (parseFrom!(float) (0.0f) == "0.00000000"); |
|---|
| 288 |
assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25"); |
|---|
| 289 |
assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300"); |
|---|
| 290 |
} |
|---|
| 291 |
//END parrseFrom templates |
|---|
| 292 |
|
|---|
| 293 |
//BEGIN Length templates |
|---|
| 294 |
/* This template provides the initial length for strings for formatting various types. These strings |
|---|
| 295 |
* can be expanded; this value is intended to cover 90% of cases or so. |
|---|
| 296 |
* |
|---|
| 297 |
* NOTE: This template was intended to provide specialisations for different types. |
|---|
| 298 |
* This one value should do reasonably well for most types. |
|---|
| 299 |
*/ |
|---|
| 300 |
private { |
|---|
| 301 |
template defLength(T) { const uint defLength = 20; } |
|---|
| 302 |
template defLength(T : char) { const uint defLength = 4; } |
|---|
| 303 |
template defLength(T : bool) { const uint defLength = 5; } |
|---|
| 304 |
} |
|---|
| 305 |
//END Length templates |
|---|
| 306 |
|
|---|
| 307 |
//BEGIN Utility funcs |
|---|
| 308 |
private char[] formatLong (long val) { |
|---|
| 309 |
// May throw an IllegalArgumentException; don't bother catching and rethrowing: |
|---|
| 310 |
return cInt.toString (val, cInt.Style.Signed, cInt.Flags.Throw); |
|---|
| 311 |
} |
|---|
| 312 |
private bool isEscapableChar (char c) { |
|---|
| 313 |
return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\'); |
|---|
| 314 |
} |
|---|
| 315 |
// Throws on unsupported escape sequences; however this should never actually happen within parseFrom. |
|---|
| 316 |
private char replaceEscapableChar (char c) { |
|---|
| 317 |
// This code was generated: |
|---|
| 318 |
if (c <= '\v') { |
|---|
| 319 |
if (c <= '\b') { |
|---|
| 320 |
if (c == '\a') { |
|---|
| 321 |
return 'a'; |
|---|
| 322 |
} else if (c == '\b') { |
|---|
| 323 |
return 'b'; |
|---|
| 324 |
} |
|---|
| 325 |
} else { |
|---|
| 326 |
if (c == '\t') { |
|---|
| 327 |
return 't'; |
|---|
| 328 |
} else if (c == '\n') { |
|---|
| 329 |
return 'n'; |
|---|
| 330 |
} else if (c == '\v') { |
|---|
| 331 |
return 'v'; |
|---|
| 332 |
} |
|---|
| 333 |
} |
|---|
| 334 |
} else { |
|---|
| 335 |
if (c <= '\r') { |
|---|
| 336 |
if (c == '\f') { |
|---|
| 337 |
return 'f'; |
|---|
| 338 |
} else if (c == '\r') { |
|---|
| 339 |
return 'r'; |
|---|
| 340 |
} |
|---|
| 341 |
} else { |
|---|
| 342 |
if (c == '\"') { |
|---|
| 343 |
return '\"'; |
|---|
| 344 |
} else if (c == '\'') { |
|---|
| 345 |
return '\''; |
|---|
| 346 |
} else if (c == '\\') { |
|---|
| 347 |
return '\\'; |
|---|
| 348 |
} |
|---|
| 349 |
} |
|---|
| 350 |
} |
|---|
| 351 |
|
|---|
| 352 |
// if we haven't returned: |
|---|
| 353 |
throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)"); |
|---|
| 354 |
} |
|---|
| 355 |
|
|---|
| 356 |
debug (UnitTest) { |
|---|
| 357 |
import tango.io.Console; |
|---|
| 358 |
|
|---|
| 359 |
unittest { |
|---|
| 360 |
Cout ("Running unittest: parseFrom ...").flush; |
|---|
| 361 |
|
|---|
| 362 |
assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\""); |
|---|
| 363 |
|
|---|
| 364 |
Cout (" complete").newline; |
|---|
| 365 |
} |
|---|
| 366 |
} |
|---|
| 367 |
//END Utility funcs |
|---|