root/trunk/tango/scrapple/text/convert/parseFrom.d

Revision 55, 13.0 kB (checked in by flithm, 6 months ago)

Update parseTo and parseFrom

Line 
1 /**************************************************************************************************
2  * copyright: Copyright (c) 2007-2008 Diggory Hardy.
3  *
4  * author: Diggory Hardy, diggory.hardy@gmail.com
5  *
6  * license: BSD style: $(LICENSE)
7  *
8  * This contains templates for converting various data-types to a char[].
9  *
10  * parseFrom is roughly the inverse of $(B parseTo).
11  *
12  * This module basically implements the following templated function for most basic D types:
13  * bool, byte, short, int, long, ubyte, ushort, uint, ulong, float, double, real, char, wchar,
14  * dchar.
15  * It also supports arrays of any supported type (including of other arrays) and has special
16  * handling for strings (char[]) and binary (ubyte[]) data-types.
17  * -----------------------------
18  * char[] parseFrom(T) (T value);
19  * -----------------------------
20  *
21  * $(I value) is the value to convert; it is converted to a string and returned.
22  *
23  * Syntax:
24  * The syntax is the same as parseTo; but since this module only generates formatted output
25  * knowing the syntax shouldn't be necessary. There is currently no way to specify options like
26  * output base for ints, precision of floats, or
27  * whether to write char[] or ubyte[] types as arrays or in their more compact forms.
28  *
29  * Throws:
30  * On errors, an exception is thrown (UnicodeException or IllegalArgumentException). No other
31  * exceptions should be thrown.
32  *
33  * Remarks:
34  * There is currently no support for outputting wchar/dchar strings. There are, however, unicode
35  * conversions for converting UTF-16/32 to UTF-8. Be warned though that many wchar/dchar characters
36  * (any that are non-ascii) will not fit in a single char and an exception will be thrown.
37  *
38  * Examples:
39  * ------------------------------------------------------------------------------------------------
40  * // Examples are printed via Cout.
41  *
42  * // Basic examples: FIXME: test these outputs are correct!
43  * Cout (parseFrom!(byte) (-13)).newline;                       // -13
44  * Cout (parseFrom!(real) (2.56e11)).newline;                   // 2.55999999999999990000e+11
45  * Cout (parseFrom!(double[]) ([0.0, 1.0, 2.0, 3.0])).newline;  // [0.00000000000000000,1.00000000000000000,2.00000000000000000,3.00000000000000000]
46  * Cout (parseFrom!(bool[]) ([true,false,false])).newline;      // [true,false,false]
47  *
48  * // String and ubyte[] special syntaxes (always used):
49  * Cout (parseFrom!(char[]) ("A string.")).newline;             // "A string." (with quotes)
50  * Cout (parseFrom!(ubyte[]) (cast(ubyte[]) [5u, 0xF1u, 0x10u])).newline;   // 0x05f110
51  *
52  * // Associative arrays:
53  * Cout (parseFrom!(char[][byte]) ([-1:"negative one"[], 0:"zero", 1:"one"])).newline;  // [0:"zero",1:"one",-1:"negative one"]
54  *
55  * // No limit on complexity...
56  * char[] somethingComplicated = parseFrom!(real[][][bool[int[][]]]) (...);
57  * ------------------------------------------------------------------------------------------------
58  *************************************************************************************************/
59
60 module tango.scrapple.text.convert.parseFrom;
61
62 // tango imports
63 import tango.core.Exception : UnicodeException, IllegalArgumentException;
64 import cInt = tango.text.convert.Integer;
65 import cFloat = tango.text.convert.Float;
66 import Utf = tango.text.convert.Utf;
67 import Util = tango.text.Util;
68
69 //BEGIN parseFrom templates
70 /* Idea: could extend parseFrom with a second parameter, containing flags for things like base to output.
71  * Unnecessary for mergetag though.
72 */
73
74 // Associative arrays
75
76 char[] parseFrom(T : T[S], S) (T[S] val) {
77     char[] ret;
78     // A guess, including values themselves and [,:] elements (must be at least 2).
79     ret.length = val.length * (defLength!(T) + defLength!(S) + 2) + 2;
80     ret[0] = '[';
81     uint i = 1;
82     foreach (S k, T v; val) {
83         char[] s = parseFrom!(S) (k) ~ ":" ~ parseFrom!(T) (v);
84         i += s.length;
85         if (i+1 >= ret.length) ret.length = ret.length * 2; // check.
86         ret[i-s.length .. i] = s;
87         ret[i++] = ',';
88     }
89     if (i == 1) ++i;    // special case - not overwriting a comma
90     ret[i-1] = ']'; // replaces last comma
91     return ret[0..i];
92 }
93 debug (UnitTest) unittest {
94     char[] X = parseFrom!(char[][char]) (['a':cast(char[])"animal", 'b':['b','u','s']]);
95     char[] Y = `['a':"animal",'b':"bus"]`;
96     assert (X == Y);
97 }
98
99
100 // Arrays
101
102 char[] parseFrom(T : T[]) (T[] val) {
103     char[] ret;
104     // A guess, including commas and brackets (must be at least 2)
105     ret.length = val.length * (defLength!(T) + 1) + 2;
106     ret[0] = '[';
107     uint i = 1;
108     foreach (T x; val) {
109         char[] s = parseFrom!(T) (x);
110         i += s.length;
111         if (i+1 >= ret.length) ret.length = ret.length * 2; // check length
112         ret[i-s.length .. i] = s;
113         ret[i++] = ',';
114     }
115     if (i == 1) ++i;    // special case - not overwriting a comma
116     ret[i-1] = ']'; // replaces last comma
117     return ret[0..i];
118 }
119
120 // Strings (array special case)
121 char[] parseFrom(T : char[]) (T val) {
122     char[] ret = new char[val.length * 2 + 2];  // Initial storage. This should ALWAYS be enough.
123     ret[0] = '"';
124     uint i = 1;
125     for (uint t = 0; t < val.length;) {
126         // process a block of non-escapable characters
127         uint s = t;
128         while (t < val.length && !isEscapableChar(val[t]))
129             ++t;    // skip all non-escapable chars
130         uint j = i + t - s;
131         ret[i..j] = val[s..t];  // copy a block
132         i = j;
133         // process a block of escapable charaters
134         while (t < val.length && isEscapableChar(val[t])) {
135             ret[i++] = '\\';                // backslash; increment i
136             ret[i++] = replaceEscapableChar(val[t++]);  // character; increment i and t
137         }
138     }
139     ret[i++] = '"';
140     return ret[0..i];
141 }
142 // Unicode conversions for strings:
143 char[] parseFrom(T : dchar[]) (T val) {
144     // May throw a UnicodeException; don't bother catching and rethrowing:
145     return parseFrom!(char[]) (Utf.toString (val));
146 }
147 char[] parseFrom(T : wchar[]) (T val) {
148     // May throw a UnicodeException; don't bother catching and rethrowing:
149     return parseFrom!(char[]) (Utf.toString (val));
150 }
151
152 // Binary (array special case)
153 char[] parseFrom(T : ubyte[]) (T val) {
154     static const char[16] digits = "0123456789abcdef";
155    
156     char[] ret = new char[val.length * 2 + 2];  // exact length
157     ret[0..2] = "0x";
158     uint i = 2;
159    
160     foreach (ubyte x; val) {
161         ret[i++] = digits[x >> 4];
162         ret[i++] = digits[x & 0x0F];
163     }
164     return ret;
165 }
166
167 debug (UnitTest) unittest {
168     // generic array stuff:
169     assert (parseFrom!(double[]) ([1.0, 1.0e-10]) == `[1.00000000000000000,0.10000000000000000e-09]`);
170     assert (parseFrom!(double[]) (cast(double[]) []) == `[]`);      // empty array
171    
172     // char[] conversions, with commas, escape sequences and multichar UTF8 characters:
173     assert (parseFrom!(char[][]) ([ ".\""[], [',','\''] ,"!\b€" ]) == `[".\"",",\'","!\b€"]`);
174    
175     // wchar[] and dchar[] conversions:
176     // The characters were pretty-much pulled at random from unicode tables.
177     // The last few cause some wierd (display only) effects in my editor.
178     assert (parseFrom!(wchar[]) ("Test string: ¶α؟à€
179 àžáˆ€æ€"w) == "\"Test string: ¶α؟à€
180 àžáˆ€æ€\"");
181     assert (parseFrom!(dchar[]) ("Test string: ¶α؟à€
182 àžáˆ€æ€"d) == "\"Test string: ¶α؟à€
183 àžáˆ€æ€\"");
184    
185     assert (parseFrom!(ubyte[]) (cast(ubyte[]) [0x01, 0xF2, 0xAC]) == `0x01f2ac`);  // ubyte[] special notation
186 }
187
188
189 // Basic types
190
191 // Char
192 char[] parseFrom(T : char) (T val) {
193     // NOTE: if (val > 127) "is invalid UTF-8 single char"
194     // However we don't know what this is for, in particular if it will be recombined with other chars later
195    
196     // Can't return reference to static array; making dynamic is cheaper than copying.
197     char[] ret = new char[4];   // max length for an escaped char
198     ret[0] = '\'';
199    
200     if (!isEscapableChar (val)) {
201         ret[1] = val;
202         ret[2] = '\'';
203         return ret[0..3];
204     } else {
205         ret[1] = '\\';
206         ret[2] = replaceEscapableChar (val);
207         ret[3] = '\'';
208         return ret;
209     }
210     assert (false);
211 }
212 // Basic unicode convertions for wide-chars.
213 // NOTE: any other wide-chars will not fit in a single UTF-8 encoded char.
214 const char[] WIDE_CHAR_ERROR = "Error: unicode non-ascii character cannot be converted to a single UTF-8 char";
215 char[] parseFrom(T : wchar) (T val) {
216     if (val <= 127u) return parseFrom!(char) (cast(char) val);  // this char can be converted
217     else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
218 }
219 char[] parseFrom(T : dchar) (T val) {
220     if (val <= 127u) return parseFrom!(char) (cast(char) val);  // this char can be converted
221     else throw new UnicodeException (WIDE_CHAR_ERROR, 0);
222 }
223 debug (UnitTest) unittest {
224     assert (parseFrom!(char) ('\'') == "\'\\\'\'");
225     assert (parseFrom!(wchar) ('X') == "'X'");
226     assert (parseFrom!(dchar) ('X') == "'X'");
227 }
228
229 // Bool
230 char[] parseFrom(T : bool) (T val) {
231     if (val) return "true";
232     else return "false";
233 }
234 // too simple to need a unittest
235
236 // Signed ints
237 char[] parseFrom(T : byte) (T val) {
238     return formatLong (val);
239 }
240 char[] parseFrom(T : short) (T val) {
241     return formatLong (val);
242 }
243 char[] parseFrom(T : int) (T val) {
244     return formatLong (val);
245 }
246 char[] parseFrom(T : long) (T val) {
247     return formatLong (val);
248 }
249 // Unsigned ints
250 char[] parseFrom(T : ubyte) (T val) {
251     return formatLong (val);
252 }
253 char[] parseFrom(T : ushort) (T val) {
254     return formatLong (val);
255 }
256 char[] parseFrom(T : uint) (T val) {
257     return formatLong (val);
258 }
259 char[] parseFrom(T : ulong) (T val) {
260     if (val > cast(ulong) long.max)
261         throw new IllegalArgumentException ("No handling available for ulong where value > long.max");
262     return formatLong (val);
263 }
264 debug (UnitTest) unittest {
265     assert (parseFrom!(byte) (cast(byte) -5) == "-5");
266     // annoyingly, octal syntax differs from D (blame tango):
267     assert (parseFrom!(uint[]) ([0b0100u,0724,0xFa59c,0xFFFFFFFF,0]) == "[4,468,1025436,4294967295,0]");
268 }
269
270 // Floats
271 /* Old calculation (not used):
272 t.dig+2+4+3 // should be sufficient length (mant + (neg, dot, e, exp neg) + exp (3,4,5 for float,double,real resp.)) */
273 char[] parseFrom(T : float) (T val) {
274     char[] ret = new char[32];  // minimum allowed by assert in format
275     return cFloat.format (ret, val, T.dig+2, 1);    // from old C++ tests, T.dig+2 gives best(?) accuracy
276 }
277 char[] parseFrom(T : double) (T val) {
278     char[] ret = new char[32];
279     return cFloat.format (ret, val, T.dig+2, 1);
280 }
281 char[] parseFrom(T : real) (T val) {
282     char[] ret = new char[32];
283     return cFloat.format (ret, val, T.dig+2, 1);
284 }
285 debug (UnitTest) unittest {
286     // NOTE: these numbers are not particularly meaningful.
287     assert (parseFrom!(float) (0.0f) == "0.00000000");
288     assert (parseFrom!(double) (-1e25) == "-1.00000000000000000e+25");
289     assert (parseFrom!(real) (cast(real) 4.918e300) == "4.91800000000000000000e+300");
290 }
291 //END parrseFrom templates
292
293 //BEGIN Length templates
294 /* This template provides the initial length for strings for formatting various types. These strings
295  * can be expanded; this value is intended to cover 90% of cases or so.
296  *
297  * NOTE: This template was intended to provide specialisations for different types.
298  * This one value should do reasonably well for most types.
299  */
300 private {
301     template defLength(T)        { const uint defLength = 20; }
302     template defLength(T : char) { const uint defLength = 4;  }
303     template defLength(T : bool) { const uint defLength = 5;  }
304 }
305 //END Length templates
306
307 //BEGIN Utility funcs
308 private char[] formatLong (long val) {
309     // May throw an IllegalArgumentException; don't bother catching and rethrowing:
310     return cInt.toString (val, cInt.Style.Signed, cInt.Flags.Throw);
311 }
312 private bool isEscapableChar (char c) {
313     return ((c <= '\r' && c >= '\a') || c == '\"' || c == '\'' || c == '\\');
314 }
315 // Throws on unsupported escape sequences; however this should never actually happen within parseFrom.
316 private char replaceEscapableChar (char c) {
317     // This code was generated:
318     if (c <= '\v') {
319         if (c <= '\b') {
320             if (c == '\a') {
321                 return 'a';
322             } else if (c == '\b') {
323                 return 'b';
324             }
325         } else {
326             if (c == '\t') {
327                 return 't';
328             } else if (c == '\n') {
329                 return 'n';
330             } else if (c == '\v') {
331                 return 'v';
332             }
333         }
334     } else {
335         if (c <= '\r') {
336             if (c == '\f') {
337                 return 'f';
338             } else if (c == '\r') {
339                 return 'r';
340             }
341         } else {
342             if (c == '\"') {
343                 return '\"';
344             } else if (c == '\'') {
345                 return '\'';
346             } else if (c == '\\') {
347                 return '\\';
348             }
349         }
350     }
351    
352     // if we haven't returned:
353     throw new IllegalArgumentException ("Character is not escapable (internal parseFrom error)");
354 }
355
356 debug (UnitTest) {
357     import tango.io.Console;
358    
359     unittest {
360         Cout ("Running unittest: parseFrom ...").flush;
361        
362         assert (parseFrom!(char[]) ("\a\b\t\n\v\f\r\"\'\\") == "\"\\a\\b\\t\\n\\v\\f\\r\\\"\\\'\\\\\"");
363        
364         Cout (" complete").newline;
365     }
366 }
367 //END Utility funcs
Note: See TracBrowser for help on using the browser.