Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

Ticket #282: utf_test.d

File utf_test.d, 26.8 kB (added by Deewiant, 13 years ago)

UTF conversion testing module

Line 
1 // A UTF conversion testing module largely based on the Unicode, Inc. test
2 // harness for "ConvertUTF.c", "harness.c".
3 // Both can be found at http://unicode.org/Public/PROGRAMS/CVTUTF/
4
5 // Written in the D programming language and tests both the Phobos and Tango libraries.
6
7 // Changes include output verbosity, lack of fatal errors (if something fails,
8 // only tests that depend on it to have worked are skipped), and renaming (due
9 // to partial reordering) of the tests.
10
11 /+
12  + The original test01() had to be majorly changed. A large part of it depended
13  + on testing isLegalUTF8, a function which took a UTF-8 sequence and the length
14  + of the code point encoded by that sequence, by passing one less than the
15  + actual length and testing the result.
16  +
17  + I believe this was equivalent to testing whether a sequence followed by any
18  + valid continuation byte was considered valid. For instance, the first such
19  + case in the testData array, [0xf0 0x93 0xb2 0xa1] was tested with a length of
20  + 3, which would mean that [0xf0 0x93 0xb2 (any in the range 0x80-0xbf)] would
21  + have validated.
22
23  + Since Tango doesn't have any equivalents to the isLegalUTF8 and
24  + isLegalUTF8Sequence functions, testData is amended with the correct UTF-16
25  + and UTF-32 encodings, which are instead done a round trip with: like the
26  + original test02() but testing each intermediate value for correctness, and
27  + trying also sequences with more than one code point. The length of the
28  + sequence is thus left out of the struct.
29  +
30  + Plenty of additions were also done to the testData array to make the test
31  + fairly comprehensive.
32  +/
33
34 // By Matti "Deewiant" Niemenmaa, 2007-02-14 and 2007-02-17.
35
36 version (Tango) {
37     import tango.io.Stdout;
38     import tango.text.convert.Format;
39     static import tango.text.convert.Integer;
40     import tango.text.convert.Utf;
41
42     alias tango.text.convert.Integer.toUtf8 toString;
43
44     // can't alias expressions (yet)
45     void output(char[] s) {
46         Stdout.println(s);
47     }
48 } else {
49     import std.stdio;
50     import std.string : format, toString;
51     import std.utf;
52
53     alias toUTF8  toUtf8;
54     alias toUTF16 toUtf16;
55     alias toUTF32 toUtf32;
56
57     alias writefln output;
58 }
59
60 bool[5] failed;
61
62 struct TestData {
63     bool valid;
64     char [] utf8;
65     wchar[] utf16;
66     dchar[] utf32;
67 }
68
69 const TestData[] testData = [
70     { true,  [0x7a],                   [0x__7a],         [0x____7a] },
71     { true,  [0xc2, 0xac],             [0x__ac],         [0x____ac] },
72     { true,  [0xdf, 0xb2],             [0x_7f2],         [0x___7f2] },
73     { true,  [0xe0, 0xa1, 0x81],       [0x_841],         [0x___841] },
74     { true,  [0xe1, 0xac, 0x90],       [0x1b10],         [0x__1b10] },
75     { true,  [0xf0, 0x93, 0xb2, 0xa1], [0xd80f, 0xdca1], [0x_13ca1] },
76     { true,  [0xf1, 0x87, 0x9a, 0xb0], [0xd8dd, 0xdeb0], [0x_476b0] },
77     { true,  [0xf3, 0x88, 0x9b, 0xad], [0xdae1, 0xdeed], [0x_c86ed] },
78     { true,  [0xf4, 0x82, 0x89, 0xbf], [0xdbc8, 0xde7f], [0x10227f] },
79     { false, [0x82, 0x00, 0x00]                                     },
80     { false, [0xf8, 0xac]                                           },
81     { false, [0xe1, 0xfc, 0xff]                                     },
82     { false, [0xc2, 0xfc, 0x00]                                     },
83     { false, [0xe1, 0xc2, 0x81]                                     },
84     { false, [0xc2, 0xc1]                                           },
85     { false, [0xe0, 0x9f, 0x00]                                     },
86     { false, [0xf0, 0x93, 0xb2, 0xc1]                               },
87     { true,  [0xed, 0x9f, 0xbf],       [0xd7ff],         [0x__d7ff] },
88     { true,  [0xee, 0x80, 0x80],       [0xe000],         [0x__e000] },
89     { false, [0xed, 0xa0, 0x80]                                     },
90     { false, [0xed, 0xbf, 0xbf]                                     },
91     { false, [0xf0, 0x93, 0xb2, 0xc3]                               },
92
93     // from Markus Kuhn's UTF-8 decoder capability and stress test
94     // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
95
96     { true,  [0xef, 0xbb, 0xbf],       [0xfeff],          [0x__feff] }, // Byte Order Mark
97     { true,  [0x00],                   [0x__00],          [0x____00] }, // lowest sequences for each length
98     { true,  [0xc2, 0x80],             [0x__80],          [0x____80] },
99     { true,  [0xe0, 0xa0, 0x80],       [0x_800],          [0x___800] },
100     { true,  [0xf0, 0x90, 0x80, 0x80], [0xd800, 0xdc00],  [0x_10000] },
101     { false, [0xf8, 0x88, 0x80, 0x80, 0x80],                         }, // 5 and 6 byte are no longer valid
102     { false, [0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]                    },
103     { true,  [0x7f],                   [0x__7f],          [0x____7f] }, // highest sequences for each length
104     { true,  [0xdf, 0xbf],             [0x_7ff],          [0x___7ff] },
105     { true,  [0xef, 0xbf, 0xbf],       [0xffff],          [0x__ffff] },
106     { true,  [0xf4, 0x8f, 0xbf, 0xbf], [0xdbff, 0xdfff],  [0x10ffff] }, // stress test is old: U+1fffff is no longer valid, test U+10ffff instead
107     { false, [0xf4, 0x90, 0x80, 0x80]                                }, // U+110000
108     { false, [0x80] }, { false, [0x81] }, { false, [0x82] }, { false, [0x83] }, // continuation bytes
109     { false, [0x84] }, { false, [0x85] }, { false, [0x86] }, { false, [0x87] },
110     { false, [0x88] }, { false, [0x89] }, { false, [0x8a] }, { false, [0x8b] },
111     { false, [0x8c] }, { false, [0x8d] }, { false, [0x8e] }, { false, [0x8f] },
112     { false, [0x90] }, { false, [0x91] }, { false, [0x92] }, { false, [0x93] },
113     { false, [0x94] }, { false, [0x95] }, { false, [0x96] }, { false, [0x97] },
114     { false, [0x98] }, { false, [0x99] }, { false, [0x9a] }, { false, [0x9b] },
115     { false, [0x9c] }, { false, [0x9d] }, { false, [0x9e] }, { false, [0x9f] },
116     { false, [0xa0] }, { false, [0xa1] }, { false, [0xa2] }, { false, [0xa3] },
117     { false, [0xa4] }, { false, [0xa5] }, { false, [0xa6] }, { false, [0xa7] },
118     { false, [0xa8] }, { false, [0xa9] }, { false, [0xaa] }, { false, [0xab] },
119     { false, [0xac] }, { false, [0xad] }, { false, [0xae] }, { false, [0xaf] },
120     { false, [0xb0] }, { false, [0xb1] }, { false, [0xb2] }, { false, [0xb3] },
121     { false, [0xb4] }, { false, [0xb5] }, { false, [0xb6] }, { false, [0xb7] },
122     { false, [0xb8] }, { false, [0xb9] }, { false, [0xba] }, { false, [0xbb] },
123     { false, [0xbc] }, { false, [0xbd] }, { false, [0xbe] }, { false, [0xbf] },
124
125     // in the below, some checks are false for the additional reason that the
126     // sequences have to do with 5 or 6 byte sequences
127
128     { false, [0xc0] }, { false, [0xc1] }, { false, [0xc2] }, { false, [0xc3] }, // lonely first bytes of greater than 1-byte sequences
129     { false, [0xc4] }, { false, [0xc5] }, { false, [0xc6] }, { false, [0xc7] },
130     { false, [0xc8] }, { false, [0xc9] }, { false, [0xca] }, { false, [0xcb] },
131     { false, [0xcc] }, { false, [0xcd] }, { false, [0xce] }, { false, [0xcf] },
132     { false, [0xd0] }, { false, [0xd1] }, { false, [0xd2] }, { false, [0xd3] },
133     { false, [0xd4] }, { false, [0xd5] }, { false, [0xd6] }, { false, [0xd7] },
134     { false, [0xd8] }, { false, [0xd9] }, { false, [0xda] }, { false, [0xdb] },
135     { false, [0xdc] }, { false, [0xdd] }, { false, [0xde] }, { false, [0xdf] },
136     { false, [0xe0] }, { false, [0xe1] }, { false, [0xe2] }, { false, [0xe3] },
137     { false, [0xe4] }, { false, [0xe5] }, { false, [0xe6] }, { false, [0xe7] },
138     { false, [0xe8] }, { false, [0xe9] }, { false, [0xea] }, { false, [0xeb] },
139     { false, [0xec] }, { false, [0xed] }, { false, [0xee] }, { false, [0xef] },
140     { false, [0xf0] }, { false, [0xf1] }, { false, [0xf2] }, { false, [0xf3] },
141     { false, [0xf4] }, { false, [0xf5] }, { false, [0xf6] }, { false, [0xf7] },
142     { false, [0xf8] }, { false, [0xf9] }, { false, [0xfa] }, { false, [0xfb] },
143     { false, [0xfc] }, { false, [0xfd] },
144
145     { false, [0xe0, 0x80]                         }, // lowest sequences with last continuation byte missing
146     { false, [0xf0, 0x80, 0x80]                   },
147     { false, [0xf8, 0x80, 0x80, 0x80]             },
148     { false, [0xfc, 0x80, 0x80, 0x80, 0x80]       },
149     { false, [0xef, 0xbf]                         }, // highest sequences with last continuation byte missing
150     { false, [0xf7, 0xbf, 0xbf]                   },
151     { false, [0xfb, 0xbf, 0xbf, 0xbf]             },
152     { false, [0xfd, 0xbf, 0xbf, 0xbf, 0xbf]       },
153     { false, [0xfe]                               }, // 0xfe and 0xff cannot appear in UTF-8
154     { false, [0xff]                               },
155     { false, [0x20, 0xfe, 0x20, 0xff]             },
156     { false, [0xc0, 0xaf]                         }, // overlong representations of 0x2f
157     { false, [0xe0, 0x80, 0xaf]                   },
158     { false, [0xf0, 0x80, 0x80, 0xaf]             },
159     { false, [0xf8, 0x80, 0x80, 0x80, 0xaf]       },
160     { false, [0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf] },
161     { false, [0xc0, 0x80]                         }, // lowest overlong representations
162     { false, [0xe0, 0x80, 0x80]                   },
163     { false, [0xf0, 0x80, 0x80, 0x80]             },
164     { false, [0xf8, 0x80, 0x80, 0x80, 0x80]       },
165     { false, [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80] },
166     { false, [0xc1, 0xbf]                         }, // highest overlong representations
167     { false, [0xe0, 0x9f, 0xbf]                   },
168     { false, [0xf0, 0x8f, 0xbf, 0xbf]             },
169     { false, [0xf8, 0x87, 0xbf, 0xbf, 0xbf]       },
170     { false, [0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf] },
171     { false, [0xed, 0xa0, 0x80]                   }, // single UTF-16 surrogates
172     { false, [0xed, 0xad, 0xbf]                   },
173     { false, [0xed, 0xae, 0x80]                   },
174     { false, [0xed, 0xaf, 0xbf]                   },
175     { false, [0xed, 0xb0, 0x80]                   },
176     { false, [0xed, 0xbe, 0x80]                   },
177     { false, [0xed, 0xbf, 0xbf]                   },
178     { false, [0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80] }, // paired UTF-16 surrogates
179     { false, [0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf] },
180     { false, [0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80] },
181     { false, [0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf] },
182     { false, [0xed, 0xae, 0x80, 0xed, 0xb0, 0x80] },
183     { false, [0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf] },
184     { false, [0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80] },
185     { false, [0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf] },
186
187     // Some text in various languages: tests conversion of strings with more than just one code point
188
189     // "Good night" in Finnish
190     { true,
191         [0x48, 0x79, 0x76, 0xc3, 0xa4, 0xc3, 0xa4, 0x20, 0x79, 0xc3, 0xb6, 0x74, 0xc3, 0xa4],
192         [0x48, 0x79, 0x76, 0xe4,       0xe4,       0x20, 0x79, 0xf6,       0x74, 0xe4      ],
193         [0x48, 0x79, 0x76, 0xe4,       0xe4,       0x20, 0x79, 0xf6,       0x74, 0xe4      ]
194     },
195
196     // "I don't understand" in Japanese (Hiragana)
197     { true,
198         [ 0xe3, 0x82, 0x8f, 0xe3, 0x81, 0x8b, 0xe3, 0x82, 0x8a, 0xe3, 0x81, 0xbe, 0xe3, 0x81, 0x9b, 0xe3, 0x82, 0x93 ],
199         [ 0x308f,           0x304b,           0x308a,           0x307e,           0x305b,           0x3093 ],
200         [ 0x308f,           0x304b,           0x308a,           0x307e,           0x305b,           0x3093 ]
201     },
202
203     // the International Phonetic Alphabet representation of the above
204     { true,
205         [ 0xc9, 0xb0, 0x61, 0xe2, 0x86, 0x91, 0x6b, 0x61, 0xc9, 0xba, 0x69, 0x6d, 0x61, 0x73, 0x65, 0xe2, 0x86, 0x93, 0xc9, 0xb4],
206         [ 0x_270,     0x61, 0x2191,           0x6b, 0x61, 0x_27a,     0x69, 0x6d, 0x61, 0x73, 0x65, 0x2193,           0x_274 ],
207         [ 0x_270,     0x61, 0x2191,           0x6b, 0x61, 0x_27a,     0x69, 0x6d, 0x61, 0x73, 0x65, 0x2193,           0x_274 ]
208     },
209
210     // the Axiom of infinity from Zermelo-Fraenkel set theory
211     // one symbol per line
212     { true,
213     [
214         0xe2, 0x88, 0x83,       // there exists
215         0xf0, 0x9d, 0x90, 0x8d, // mathematical bold capital N
216         0x3a,                   // :
217         0xe2, 0x88, 0x85,       // empty set
218         0xe2, 0x88, 0x88,       // element of
219         0xf0, 0x9d, 0x90, 0x8d, // N
220         0xe2, 0x88, 0xa7,       // logical and
221         0x28,                   // (
222         0xe2, 0x88, 0x80,       // for all
223         0xf0, 0x9d, 0x91, 0xa5, // mathematical italic small X
224         0x3a,                   // such that
225         0xf0, 0x9d, 0x91, 0xa5, // x
226         0xe2, 0x88, 0x88,       // element of
227         0xf0, 0x9d, 0x90, 0x8d, // N
228         0xe2, 0x87, 0x92,       // rightwards double arrow
229         0xf0, 0x9d, 0x91, 0xa5, // x
230         0xe2, 0x88, 0xaa,       // union
231         0x7b,                   // {
232         0xf0, 0x9d, 0x91, 0xa5, // x
233         0x7d,                   // }
234         0xe2, 0x88, 0x88,       // element of
235         0xf0, 0x9d, 0x90, 0x8d, // N
236         0x29                    // )
237     ], [
238         0x2203,
239         0xd835, 0xdc0d,
240         0x__3a,
241         0x2205,
242         0x2208,
243         0xd835, 0xdc0d,
244         0x2227,
245         0x__28,
246         0x2200,
247         0xd835, 0xdc65,
248         0x__3a,
249         0xd835, 0xdc65,
250         0x2208,
251         0xd835, 0xdc0d,
252         0x21d2,
253         0xd835, 0xdc65,
254         0x222a,
255         0x__7b,
256         0xd835, 0xdc65,
257         0x__7d,
258         0x2208,
259         0xd835, 0xdc0d,
260         0x__29
261     ], [
262         0x__2203,
263         0x_1d40d,
264         0x____3a,
265         0x__2205,
266         0x__2208,
267         0x_1d40d,
268         0x__2227,
269         0x____28,
270         0x__2200,
271         0x_1d465,
272         0x____3a,
273         0x_1d465,
274         0x__2208,
275         0x_1d40d,
276         0x__21d2,
277         0x_1d465,
278         0x__222a,
279         0x____7b,
280         0x_1d465,
281         0x____7d,
282         0x__2208,
283         0x_1d40d,
284         0x____29
285     ]}
286 ];
287
288 void main() {
289     dchar[] d;
290     wchar[] w;
291      char[] c;
292
293     ///////////////
294     // TEST 0
295     ///////////////
296
297     foreach (i, td; testData) {
298         static char[] toByteString(char[] s) {
299             char[] r;
300             foreach (c; s) {
301                 version (Tango) r ~= Formatter("0x{0:X} ", cast(ubyte)c);
302                 else            r ~= format("0x%X ", c);
303             }
304             return r[0..$-1];
305         }
306
307         version (Tango) {
308             check(
309                 works({w = toUtf16(td.utf8);}), td.valid,
310                 Formatter("0-{0:d3}a", i),
311                 Formatter("UTF-16 conversion of the UTF-8 sequence {0}", toByteString(td.utf8)),
312                 {failed[0] = true;}
313             );
314             check(
315                 works({d = toUtf32(td.utf8);}), td.valid,
316                 Formatter("0-{0:d3}b", i),
317                 Formatter("UTF-32 conversion of the UTF-8 sequence {0}", toByteString(td.utf8)),
318                 {failed[0] = true;}
319             );
320         } else {
321             check(
322                 works({w = toUtf16(td.utf8);}), td.valid,
323                 format("0-%03da", i),
324                 format("UTF-16 conversion of the UTF-8 sequence %s", toByteString(td.utf8)),
325                 {failed[0] = true;}
326             );
327             check(
328                 works({d = toUtf32(td.utf8);}), td.valid,
329                 format("0-%03db", i),
330                 format("UTF-32 conversion of the UTF-8 sequence %s", toByteString(td.utf8)),
331                 {failed[0] = true;}
332             );
333         }
334
335         if (td.valid) {
336             if (failed[0])
337                 continue;
338
339             if (w != td.utf16) {
340                 failed[0] = true;
341                 version (Tango) Stdout.formatln("UTF-16 conversion of the UTF-8 sequence {0} didn't error, but gave the wrong result.", toByteString(td.utf8));
342                 else            writefln("UTF-16 conversion of the UTF-8 sequence %s didn't error, but gave the wrong result.", toByteString(td.utf8));
343             }
344             if (d != td.utf32) {
345                 failed[0] = true;
346                 version (Tango) Stdout.formatln("UTF-32 conversion of the UTF-8 sequence {0} didn't error, but gave the wrong result.", toByteString(td.utf8));
347                 else            writefln("UTF-32 conversion of the UTF-8 sequence %s didn't error, but gave the wrong result.", toByteString(td.utf8));
348             }
349
350             if (failed[0])
351                 continue;
352
353             version (Tango) check(
354                 works({c = toUtf8(w);}), true,
355                 Formatter("0-{0:d3}c", i),
356                 Formatter("converting the UTF-8 sequence {0} back from UTF-16 to UTF-8", toByteString(td.utf8)),
357                 {failed[0] = true;}
358             );
359             else check(
360                 works({c = toUtf8(w);}), true,
361                 format("0-%03dc", i),
362                 format("converting the UTF-8 sequence %s back from UTF-16 to UTF-8", toByteString(td.utf8)),
363                 {failed[0] = true;}
364             );
365
366             if (failed[0])
367                 continue;
368
369             if (c != td.utf8) {
370                 failed[0] = true;
371                 version (Tango) Stdout.formatln("Converting the UTF-8 sequence {0} back from UTF-16 to UTF-8 didn't error, but gave the wrong result.", toByteString(td.utf8));
372                 else            writefln("Converting the UTF-8 sequence %s back from UTF-16 to UTF-8 didn't error, but gave the wrong result.", toByteString(td.utf8));
373             }
374
375             version (Tango) check(
376                 works({c = toUtf8(d);}), true,
377                 Formatter("0-{0:d3}d", i),
378                 Formatter("converting the UTF-8 sequence {0} back from UTF-32 to UTF-8", toByteString(td.utf8)),
379                 {failed[0] = true;}
380             );
381             else check(
382                 works({c = toUtf8(d);}), true,
383                 format("0-%03dd", i),
384                 format("converting the UTF-8 sequence %s back from UTF-32 to UTF-8", toByteString(td.utf8)),
385                 {failed[0] = true;}
386             );
387
388             if (failed[0])
389                 continue;
390
391             if (c != td.utf8) {
392                 failed[0] = true;
393                 version (Tango) Stdout.formatln("Converting the UTF-8 sequence {0} back from UTF-32 to UTF-8 didn't error, but gave the wrong result.", toByteString(td.utf8));
394                 else            writefln("Converting the UTF-8 sequence %s back from UTF-32 to UTF-8 didn't error, but gave the wrong result.", toByteString(td.utf8));
395             }
396
397             version (Tango) check(
398                 works({d = toUtf32(w);}), true,
399                 Formatter("0-{0:d3}e", i),
400                 Formatter("converting the UTF-8 sequence {0} from UTF-16 to UTF-32", toByteString(td.utf8)),
401                 {failed[0] = true;}
402             );
403             else check(
404                 works({d = toUtf32(w);}), true,
405                 format("0-%03de", i),
406                 format("converting the UTF-8 sequence %s from UTF-16 to UTF-32", toByteString(td.utf8)),
407                 {failed[0] = true;}
408             );
409
410             if (failed[0])
411                 continue;
412
413             if (d != td.utf32) {
414                 failed[0] = true;
415                 version (Tango) Stdout.formatln("Converting the UTF-8 sequence {0} from UTF-16 to UTF-32 didn't error, but gave the wrong result.", toByteString(td.utf8));
416                 else            writefln("Converting the UTF-8 sequence %s from UTF-16 to UTF-32 didn't error, but gave the wrong result.", toByteString(td.utf8));
417
418                 continue;
419             }
420
421             version (Tango) check(
422                 works({w = toUtf16(d);}), true,
423                 Formatter("0-{0:d3}f", i),
424                 Formatter("converting the UTF-8 sequence {0} from UTF-32 to UTF-16", toByteString(td.utf8)),
425                 {failed[0] = true;}
426             );
427             else check(
428                 works({w = toUtf16(d);}), true,
429                 format("0-%03df", i),
430                 format("converting the UTF-8 sequence %s from UTF-32 to UTF-16", toByteString(td.utf8)),
431                 {failed[0] = true;}
432             );
433
434             if (failed[0])
435                 continue;
436
437             if (w != td.utf16) {
438                 failed[0] = true;
439                 version (Tango) Stdout.formatln("Converting the UTF-8 sequence {0} from UTF-32 to UTF-16 didn't error, but gave the wrong result.", toByteString(td.utf8));
440                 else            writefln("Converting the UTF-8 sequence %s from UTF-32 to UTF-16 didn't error, but gave the wrong result.", toByteString(td.utf8));
441             }
442         }
443     }
444
445     if (!failed[0])
446         output("Test 0 passed.");
447
448     ///////////////
449     // TEST 1
450     ///////////////
451
452     const dchar
453         HI_SURROGATE_BEG = cast(dchar)0xd800,
454         HI_SURROGATE_END = cast(dchar)0xdbff,
455         //unneeded: LO_SURROGATE_BEG = cast(dchar)0xdc00,
456         LO_SURROGATE_END = cast(dchar)0xdfff;
457
458     ushort
459         failed16High, failed16Low,
460         failed8High, failed8Low,
461         failed16_8High, failed16_8Low;
462
463     d.length = 1;
464
465     for (d[0] = HI_SURROGATE_BEG; d[0] <= LO_SURROGATE_END; ++d[0]) {
466         if (works(toUtf16(d))) {
467             if (d[0] <= HI_SURROGATE_END)
468                 ++failed16High;
469             else
470                 ++failed16Low;
471         }
472
473         if (works(toUtf8(d))) {
474             if (d[0] <= HI_SURROGATE_END)
475                 ++failed8High;
476             else
477                 ++failed8Low;
478         }
479
480         if (works(toUtf8( [ cast(wchar)d[0] ] ))) {
481             if (d[0] <= HI_SURROGATE_END)
482                 ++failed16_8High;
483             else
484                 ++failed16_8Low;
485         }
486
487     }
488     if (failed16High || failed16Low || failed8High || failed8Low) {
489         failed[1] = true;
490
491         char[] failStr(ushort failed) {
492             if (failed)
493                 return "failed";
494             else
495                 return "passed";
496         }
497
498         version (Tango)
499         Stdout
500         .formatln("Test 1-0 {1}: {0:d}/1024 successes (expected 0) in converting UTF-32 high surrogates (U+D800 to U+DBFF) to UTF-16.", failed16High,   failStr(failed16High))
501         .formatln("Test 1-1 {1}: {0:d}/1024 successes (expected 0) in converting UTF-32 low surrogates (U+DC00 to U+DFFF) to UTF-16.",  failed16Low,    failStr(failed16Low))
502         .formatln("Test 1-2 {1}: {0:d}/1024 successes (expected 0) in converting UTF-32 high surrogates (U+D800 to U+DBFF) to UTF-8.",  failed8High,    failStr(failed8High))
503         .formatln("Test 1-3 {1}: {0:d}/1024 successes (expected 0) in converting UTF-32 low surrogates (U+DC00 to U+DFFF) to UTF-8.",   failed8Low,     failStr(failed8Low))
504         .formatln("Test 1-4 {1}: {0:d}/1024 successes (expected 0) in converting UTF-16 high surrogates (U+D800 to U+DBFF) to UTF-8.",  failed16_8High, failStr(failed16_8High))
505         .formatln("Test 1-5 {1}: {0:d}/1024 successes (expected 0) in converting UTF-16 low surrogates (U+DC00 to U+DFFF) to UTF-8.",   failed16_8Low,  failStr(failed16_8Low))
506         ;
507
508         else
509         writefln(
510             "Test 1-0 %s: %d/1024 successes (expected 0) in converting UTF-32 high surrogates (U+D800 to U+DBFF) to UTF-16.\n", failStr(failed16High),   failed16High,
511             "Test 1-1 %s: %d/1024 successes (expected 0) in converting UTF-32 low surrogates (U+DC00 to U+DFFF) to UTF-16.\n",  failStr(failed16Low),    failed16Low,
512             "Test 1-2 %s: %d/1024 successes (expected 0) in converting UTF-32 high surrogates (U+D800 to U+DBFF) to UTF-8.\n",  failStr(failed8High),    failed8High,
513             "Test 1-3 %s: %d/1024 successes (expected 0) in converting UTF-32 low surrogates (U+DC00 to U+DFFF) to UTF-8.\n",   failStr(failed8Low),     failed8Low,
514             "Test 1-4 %s: %d/1024 successes (expected 0) in converting UTF-16 high surrogates (U+D800 to U+DBFF) to UTF-8.\n",  failStr(failed16_8High), failed16_8High,
515             "Test 1-5 %s: %d/1024 successes (expected 0) in converting UTF-16 low surrogates (U+DC00 to U+DFFF) to UTF-8.",     failStr(failed16_8Low),  failed16_8Low
516         );
517
518     }
519
520     if (!failed[1])
521         output("Test 1 passed.");
522
523     ///////////////
524     // TEST 2 and 3
525     ///////////////
526
527     for (d[0] = dchar.min; d[0] <= dchar.max; ++d[0]) {
528         if (d[0] == HI_SURROGATE_BEG)
529             d[0] = LO_SURROGATE_END + 1;
530
531         auto d2 = d;
532
533         version (Tango) {
534             if (!check(
535                 works({w = toUtf16(d);}), true,
536                 Formatter("2-{0:X}a", cast(uint)d[0]),
537                 Formatter("UTF-16 conversion of UTF-32 encoded code point U+{0:X}", cast(uint)d[0]),
538                 {failed[2] = true;}
539             ))
540                 continue;
541
542             if (!check(
543                 works({c = toUtf8(w);}), true,
544                 Formatter("2-{0:X}b", cast(uint)d[0]),
545                 Formatter("UTF-8 conversion of UTF-16 encoded code point U+{0:X}", cast(uint)d[0]),
546                 {failed[2] = true;}
547             ))
548                 continue;
549
550             if (!check(
551                 works({w = toUtf16(c);}), true,
552                 Formatter("2-{0:X}c", cast(uint)d[0]),
553                 Formatter("UTF-16 conversion of UTF-8 encoded code point U+{0:X}", cast(uint)d[0]),
554                 {failed[2] = true;}
555             ))
556                 continue;
557
558             if (!check(
559                 works({d2 = toUtf32(w);}), true,
560                 Formatter("2-{0:X}d", cast(uint)d[0]),
561                 Formatter("UTF-32 conversion of UTF-16 encoded code point U+{0:X}", cast(uint)d[0]),
562                 {failed[2] = true;}
563             ))
564                 continue;
565         } else {
566             if (!check(
567                 works({w = toUtf16(d);}), true,
568                 format("2-%Xa", d[0]),
569                 format("UTF-16 conversion of UTF-32 encoded code point U+{%X}", d[0]),
570                 {failed[2] = true;}
571             ))
572                 continue;
573
574             if (!check(
575                 works({c = toUtf8(w);}), true,
576                 format("2-%Xb", d[0]),
577                 format("UTF-8 conversion of UTF-16 encoded code point U+{%X}", d[0]),
578                 {failed[2] = true;}
579             ))
580                 continue;
581
582             if (!check(
583                 works({w = toUtf16(c);}), true,
584                 format("2-%Xc", d[0]),
585                 format("UTF-16 conversion of UTF-8 encoded code point U+{%X}", d[0]),
586                 {failed[2] = true;}
587             ))
588                 continue;
589
590             if (!check(
591                 works({d2 = toUtf32(w);}), true,
592                 format("2-%Xd", d[0]),
593                 format("UTF-32 conversion of UTF-16 encoded code point U+{%X}", d[0]),
594                 {failed[2] = true;}
595             ))
596                 continue;
597         }
598
599         if (d2 != d) {
600             failed[2] = true;
601             version (Tango) Stdout.formatln("Round trip UTF-32 -> UTF-16 -> UTF-8 -> UTF-16 -> UTF-32 failed for U+{0:X}: ended up with U+{1:X}", cast(uint)d[0], cast(uint)d2[0]);
602             else            writefln("Round trip UTF-32 -> UTF-16 -> UTF-8 -> UTF-16 -> UTF-32 failed for U+%X: ended up with U+%X", d[0], d2[0]);
603         }
604
605         version (Tango) {
606             if (!check(
607                 works({c = toUtf8(d);}), true,
608                 Formatter("3-{0:X}a", cast(uint)d[0]),
609                 Formatter("UTF-8 conversion of UTF-32 encoded code point U+{0:X}", cast(uint)d[0]),
610                 {failed[3] = true;}
611             ))
612                 continue;
613
614             if (!check(
615                 works({d2 = toUtf32(c);}), true,
616                 Formatter("3-{0:X}b", cast(uint)d[0]),
617                 Formatter("UTF-32 conversion of UTF-8 encoded code point U+{0:X}", cast(uint)d[0]),
618                 {failed[3] = true;}
619             ))
620                 continue;
621         } else {
622             if (!check(
623                 works({c = toUtf8(d);}), true,
624                 format("3-%Xa", d[0]),
625                 format("UTF-8 conversion of UTF-32 encoded code point U+%X", d[0]),
626                 {failed[3] = true;}
627             ))
628                 continue;
629
630             if (!check(
631                 works({d2 = toUtf32(c);}), true,
632                 format("3-%Xb", d[0]),
633                 format("UTF-32 conversion of UTF-8 encoded code point U+%X", d[0]),
634                 {failed[3] = true;}
635             ))
636                 continue;
637         }
638
639         if (d2 != d) {
640             failed[3] = true;
641             version (Tango) Stdout.formatln("Round trip UTF-32 -> UTF-8 -> UTF-32 failed for U+{0:X}: ended up with U+{1:X}", cast(uint)d[0], cast(uint)d2[0]);
642             else            writefln("Round trip UTF-32 -> UTF-8 -> UTF-32 failed for U+%X: ended up with U+%X", d[0], d2[0]);
643         }
644     }
645
646     if (!failed[2])
647         output("Test 2 passed.");
648     if (!failed[3])
649         output("Test 3 passed.");
650
651     ///////////////
652     // TEST 4
653     ///////////////
654
655     // arbitrary illegal value
656     d[0] = cast(dchar)(dchar.max + 5);
657
658     version (Tango) {
659         if (check(
660             works({toUtf8(d);}), false,
661             "4-a",
662             Formatter("UTF-8 conversion of illegal UTF-32 encoded code point U+{0:X}", cast(uint)d[0]),
663             {failed[4] = true;}
664         ))
665             check(
666                 works({toUtf16(d);}), false,
667                 "4-b",
668                 Formatter("UTF-16 conversion of illegal UTF-32 encoded code point U+{0:X}", cast(uint)d[0]),
669                 {failed[4] = true;}
670             );
671     } else {
672         if (check(
673             works({toUtf8(d);}), false,
674             "4-a",
675             format("UTF-8 conversion of illegal UTF-32 encoded code point U+%X", d[0]),
676             {failed[4] = true;}
677         ))
678             check(
679                 works({toUtf16(d);}), false,
680                 "4-b",
681                 format("UTF-16 conversion of illegal UTF-32 encoded code point U+%X", d[0]),
682                 {failed[4] = true;}
683             );
684     }
685
686     if (!failed[4])
687         output("Test 4 passed.");
688
689     ///////////////
690     // DONE TESTING
691     ///////////////
692
693     char[] s;
694     foreach (test, failure; failed)
695         if (failure)
696             s ~= toString(test) ~ ",";
697
698     if (s.length == 6) {
699         version (Tango) Stdout.newline.formatln("All {0:d} tests passed.", failed.length);
700         else            writefln("\nAll %d tests passed.", failed.length);
701     } else {
702         s[$-1] = ']';
703         version (Tango) Stdout.newline.format("Of the {0:d} tests, [{1}", failed.length, s).println(" failed.");
704         else            writefln("\nOf the %d tests, [%s", failed.length, s, " failed.");
705     }
706 }
707
708 bool check(bool result, bool expected, lazy char[] test, lazy char[] action, void delegate() onError = null) {
709     bool good = (result == expected);
710     if (!good) {
711         version (Tango) Stdout.formatln("Test {0} failed: {1} should have {2}.", test, action, (expected == true) ? "succeeded" : "failed");
712         else            writefln("Test %s failed: %s should have %s.", test, action, (expected == true) ? "succeeded" : "failed");
713         if (onError !is null)
714             onError();
715     }
716     return good;
717 }
718
719 bool works(T)(lazy T expression) {
720     try {
721         static if (is(typeof(expression()())))
722             expression()();
723         else
724             expression();
725     } catch {
726         return false;
727     }
728     return true;
729 }