root/trunk/src/semitwist/util/text.d

Revision 241, 28.2 kB (checked in by Abscissa, 6 months ago)

Another 64bit fix.

  • Property svn:eol-style set to native
Line 
1 // SemiTwist Library
2 // Written in the D programming language.
3
4 module semitwist.util.text;
5
6 import std.algorithm;
7 import std.array;
8 import std.conv;
9 import std.md5;
10 import std.stdio;
11 import std.traits;
12 import std.stream;
13 import std.string;
14 import std.system;
15 import std.utf;
16
17 public import std.stream: BOM;
18
19 import semitwist.util.all;
20
21 private alias semitwist.util.ctfe.ctfe_strip ctfe_strip;
22
23 /**
24 Notes:
25 Anything in "data" must be doubly escaped.
26
27 For instance, if you want the generated function to return newline (ie, "\n"),
28 then "data" must be ['\\', 'n'], and thus the mixin call would look like this:
29
30 ----
31 mixin(multiTypeString("unixNewline", "\\n"));
32 // Or
33 mixin(multiTypeString("unixNewline", r"\n"));
34 ----
35
36 Or, if you want the generated function to return the escape sequence
37 for newline (ie, r"\n", or "\\n", or ['\\', 'n']), then "data" must
38 be ['\\', '\\', 'n'], and thus the mixin call would look like this:
39
40 ----
41 mixin(multiTypeString("unixNewlineEscSequence", "\\\\n"));
42 // Or
43 mixin(multiTypeString("unixNewlineEscSequence", r"\\n"));
44 ----
45
46 (This requirement could be changed if there is a way to automatically
47 escape a string at compile-time.)
48 */
49 template multiTypeString(string name, string data, string access="public")
50 {
51     enum multiTypeString =
52     access~" T[] "~name~"(T)()"~
53     "{"~
54     "        static if(is(T ==  char)) { return \""~data~"\"c; }"~
55     "   else static if(is(T == wchar)) { return \""~data~"\"w; }"~
56     "   else static if(is(T == dchar)) { return \""~data~"\"d; }"~
57     "   else static assert(\"T must be char, wchar, or dchar\");"~
58     "}";
59 }
60
61 /// Warning: This is missing some unicode whitespace chars
62 mixin(multiTypeString!("whitespaceChars", r" \n\r\t\v\f"));
63
64 /// Unix EOL: "\n"
65 void toUnixEOL(T)(ref T[] str)
66 {
67     str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Linux)); // Win  -> Unix
68     str = replace(str, to!(T[])(nlStr_Mac9),    to!(T[])(nlStr_Linux)); // Mac9 -> Unix
69 }
70
71 /// Mac9 EOL: "\r"
72 void toMac9EOL(T)(ref T[] str)
73 {
74     str = replace(str, to!(T[])(nlStr_Windows), to!(T[])(nlStr_Mac9)); // Win  -> Mac9
75     str = replace(str, to!(T[])(nlStr_Linux),   to!(T[])(nlStr_Mac9)); // Unix -> Mac9
76 }
77
78 /// Win EOL: "\r\n"
79 void toWinEOL(T)(ref T[] str)
80 {
81     toUnixEOL(str); // All -> Unix
82     str = replace(str, to!(T[])(nlStr_Linux), to!(T[])(nlStr_Windows)); // Unix -> Win
83 }
84
85 T[] toNativeEOL(T)(T[] str)
86 {
87     version(Windows) toWinEOL(str);
88     version(OSX)     toUnixEOL(str);
89     version(linux)   toUnixEOL(str);
90     return str;
91 }
92
93 T[] toNativeEOLFromUnix(T)(T[] str)
94 {
95          version(Windows) return str.toNativeEOL();
96     else return str;
97 }
98
99 T[] toNativeEOLFromWin(T)(T[] str)
100 {
101          version(OSX)   return str.toNativeEOL();
102     else version(linux) return str.toNativeEOL();
103     else return str;
104 }
105
106 T[] toNativeEOLFromMac9(T)(T[] str)
107 {
108     return str.toNativeEOL();
109 }
110
111 enum EscapeSequence
112 {
113     DDQS, // D Double Quote String, ex: `"foo\t"` <--> `foo `
114     HTML, // ex: `&amp;` <--> `&`
115
116     //TODO: Implement these
117     //URI,  // ex: `%20` <--> ` `
118     //SQL,  //TODO: Include different types of SQL escaping (SQL's about as standardized as BASIC)
119 }
120
121 /++
122 Note:
123 For the escape and unescape functions, chaining one with the other
124 (ex: "unescape(escape(str))") will result in a string that is
125 semantically equivalent to the original, but it is *not* necessarily
126 guaranteed to be exactly identical to the original string.
127
128 For example:
129   string str;
130   str = `"\x41\t"`;        // 0x41 is ASCII and UTF-8 for A
131   str = unescapeDDQS(str); // == `A ` (That's an actual tab character)
132   str = escapeDDQS(str);   // == `"A\t"c`
133
134   Note that "\x41\t" and "A\t"c are equivalent, but not identical.
135 +/
136 T escape(T)(T str, EscapeSequence type) if(isSomeString!T)
137 {
138     //mixin(ensureCharType!("T"));
139
140     T ret;
141    
142     switch(type)
143     {
144     case EscapeSequence.DDQS:
145         ret = escapeDDQS(str);
146         break;
147        
148     case EscapeSequence.HTML:
149         ret = escapeHTML(str);
150         break;
151        
152     default:
153         throw new Exception("Unsupported EscapeSequence");
154     }
155    
156     return ret;
157 }
158
159 T unescape(T)(T str, EscapeSequence type) if(isSomeString!T)
160 {
161     //mixin(ensureCharType!("T"));
162
163     T ret;
164    
165     switch(type)
166     {
167     case EscapeSequence.DDQS:
168         ret = unescapeDDQS(str);
169         break;
170        
171     case EscapeSequence.HTML:
172         ret = unescapeHTML(str);
173         break;
174        
175     default:
176         throw new Exception("Unsupported EscapeSequence");
177     }
178    
179     return ret;
180 }
181
182 T unescapeChar(T)(T str, T escapeSequence) if(isSomeString!T)
183 {
184     //mixin(ensureCharType!("T"));
185
186     T ret = str.dup;
187     ret = substitute(ret, escapeSequence, escapeSequence[$-1..$]);
188     return ret;
189 }
190
191 /// Warning: This doesn't unescape all escape sequences yet.
192 T unescapeDDQS(T)(T str) if(isSomeString!T)
193 {
194     //mixin(ensureCharType!("T"));
195     enum errStr = "str doesn't contain a valid D Double Quote String";
196
197     if(str.length < 2)
198         throw new Exception(errStr);
199        
200     T ret = str;//.dup;
201    
202     //TODO: Do this better
203     ret = ctfe_substitute!(T)(ret, `\\`, `\`);
204     ret = ctfe_substitute!(T)(ret, `\"`, `"`);
205     ret = ctfe_substitute!(T)(ret, `\'`, `'`);
206
207     ret = ctfe_substitute!(T)(ret, `\r`, "\r");
208     ret = ctfe_substitute!(T)(ret, `\n`, "\n");
209     ret = ctfe_substitute!(T)(ret, `\t`, "\t");
210
211     ret = ctfe_substitute!(T)(ret, `\?`, "\?");
212     ret = ctfe_substitute!(T)(ret, `\a`, "\a");
213     ret = ctfe_substitute!(T)(ret, `\b`, "\b");
214     ret = ctfe_substitute!(T)(ret, `\f`, "\f");
215     ret = ctfe_substitute!(T)(ret, `\v`, "\v");
216     //TODO: All the others
217
218     if(ret[0..1] != `"`)
219         throw new Exception(errStr);
220    
221     auto last = ret[$-1..$];
222     auto secondLast = ret[$-2..$-1];
223    
224     if(last != `"`)
225     {
226         if(secondLast != `"`)
227             throw new Exception(errStr);
228         else if(secondLast != "c" && secondLast != "w" && secondLast != "d")
229             throw new Exception(errStr);
230         else
231             return ret[1..$-2];
232     }
233    
234     return ret[1..$-1];
235 }
236
237 T escapeDDQS(T)(T str) if(isSomeString!T)
238 {
239     T ret = str;
240    
241     ret = ctfe_substitute!(T)(ret, `\`, `\\`);
242     ret = ctfe_substitute!(T)(ret, `"`, `\"`);
243     ret = ctfe_substitute!(T)(ret, "\r", `\r`); // To prevent accidential conversions to platform-specific EOL
244     ret = ctfe_substitute!(T)(ret, "\n", `\n`); // To prevent accidential conversions to platform-specific EOL
245     ret = ctfe_substitute!(T)(ret, "\t", `\t`); // To prevent possible problems with automatic tab->space conversion
246     // The rest don't need to be escaped
247    
248     return `"`~ret~`"`;
249 }
250
251 /+
252 enum doubleQuoteTestStr = `"They said \"10 \\ 5 = 2\""`;
253
254 pragma(msg, "orig:        "~doubleQuoteTestStr);
255 pragma(msg, "unesc:       "~unescapeDDQS(doubleQuoteTestStr));
256 pragma(msg, "esc:         "~escapeDDQS(doubleQuoteTestStr));
257 pragma(msg, "esc(unesc):  "~escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
258 pragma(msg, "unesc(esc):  "~unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
259
260 pragma(msg, "unesc:       "~unescape(doubleQuoteTestStr, EscapeSequence.DDQS));
261 pragma(msg, "unesc:       "~doubleQuoteTestStr.unescape(EscapeSequence.DDQS));
262
263 mixin(unittestSemiTwistDLib("Outputting some things", q{
264     enum wstring ctEscW = escapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
265     enum dstring ctEscD = escapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
266     enum wstring ctUnescW = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w);
267     enum dstring ctUnescD = unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d);
268     writefln("%s%s", "ctEscW:      ", ctEscW);
269     writefln("%s%s", "ctEscD:      ", ctEscD);
270     writefln("%s%s", "ctUnescW:    ", ctUnescW);
271     writefln("%s%s", "ctUnescD:    ", ctUnescD);
272
273     writefln("%s%s", "unesc wchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
274     writefln("%s%s", "unesc dchar: ", unescapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
275     writefln("%s%s", "esc wchar:   ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`w));
276     writefln("%s%s", "esc dchar:   ", escapeDDQS(`"They said \"10 \\ 5 = 2\""`d));
277 //  writefln("%s%s", "int:         ", unescapeDDQS([cast(int)1,2,3]));
278
279     writefln("%s%s", "orig:        ", doubleQuoteTestStr);
280     writefln("%s%s", "unesc:       ", unescapeDDQS(doubleQuoteTestStr));
281     writefln("%s%s", "esc:         ", escapeDDQS(doubleQuoteTestStr));
282     writefln("%s%s", "esc(unesc):  ", escapeDDQS(unescapeDDQS(doubleQuoteTestStr)));
283     writefln("%s%s", "unesc(esc):  ", unescapeDDQS(escapeDDQS(doubleQuoteTestStr)));
284 }));
285 +/
286
287 /// Warning: This doesn't unescape all escape sequences yet.
288 T unescapeHTML(T)(T str) if(isSomeString!T)
289 {
290     auto ret = str;
291    
292     ret = ctfe_substitute!(T)(ret, "&lt;",  "<");
293     ret = ctfe_substitute!(T)(ret, "&gt;",  ">");
294     ret = ctfe_substitute!(T)(ret, "&amp;", "&");
295    
296     return ret;
297 }
298
299 T escapeHTML(T)(T str) if(isSomeString!T)
300 {
301     auto ret = str;
302    
303     ret = ctfe_substitute!(T)(ret, "&", "&amp;");
304     ret = ctfe_substitute!(T)(ret, "<", "&lt;");
305     ret = ctfe_substitute!(T)(ret, ">", "&gt;");
306    
307     return ret;
308 }
309
310 /// Like std.string.indexOf, but with an optional 'start' parameter,
311 /// and returns s.length when not found (instead of -1).
312 //TODO*: Unittest these
313 size_t locate(Char)(in Char[] s, dchar c, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
314 {
315     auto index = std.string.indexOf(s[start..$], c, cs);
316     return (index == -1)? s.length : index + start;
317 }
318
319 /// ditto
320 size_t locatePrior(Char)(in Char[] s, dchar c, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
321 {
322     if(start > s.length)
323         start = s.length;
324        
325     auto index = std.string.lastIndexOf(s[0..start], c, cs);
326     return (index == -1)? s.length : index;
327 }
328
329 /// ditto
330 size_t locate(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=0, CaseSensitive cs = CaseSensitive.yes)
331 {
332     auto index = std.string.indexOf(s[start..$], sub, cs);
333     return (index == -1)? s.length : index + start;
334 }
335
336 /// ditto
337 size_t locatePrior(Char1, Char2)(in Char1[] s, in Char2[] sub, size_t start=int.max, CaseSensitive cs = CaseSensitive.yes)
338 {
339     if(start > s.length)
340         start = s.length;
341        
342     auto index = std.string.lastIndexOf(s[0..start], sub, cs);
343     return (index == -1)? s.length : index;
344 }
345
346 /// Suggested usage:
347 ///   "Hello %s!".formatln("World");
348 string formatln(T...)(T args)
349 {
350     return format(args)~"\n";
351 }
352
353 //TODO*: Fix stripNonPrintable
354 T stripNonPrintable(T)(T str) if(isSomeString!T)
355 {
356     //T ret = str.dup;
357     //auto numRemaining = ret.removeIf( (T c){return !isPrintable(c);} );
358     //return ret[0..numRemaining];
359     return str;
360 }
361
362 /// Return value is number of code units
363 size_t nextCodePointSize(T)(T str) if(is(T==string) || is(T==wstring))
364 {
365     size_t ret;
366     str.decode(ret);
367     return ret;
368 }
369
370 /// Indents every line with indentStr
371 T indent(T)(T str, T indentStr="\t") if(isSomeString!T)
372 {
373     if(str == "")
374         return indentStr;
375        
376     return
377         indentStr ~
378         str[0..$-1].replace("\n", "\n"~indentStr) ~
379         str[$-1];
380 }
381
382 /// ditto
383 T[] indent(T)(T[] lines, T indentStr="\t") if(isSomeString!T)
384 {
385     // foreach(ref) doesn't work right at compile time: DMD Issue #3835
386     foreach(i, line; lines)
387         lines[i] = indentStr ~ line;
388        
389     return lines;
390 }
391
392 /// Unindents the lines of text as much as possible while preserving
393 /// all relative indentation.
394 ///
395 /// Inconsistent indentation (on lines that contain non-whitespace) is an error
396 /// and throws an exception at runtime, or asserts when executed at compile-time.
397 T unindent(T)(T str) if(isSomeString!T)
398 {
399     if(str == "")
400         return "";
401        
402     T[] lines;
403     if(__ctfe)
404         lines = str.ctfe_split("\n");
405     else
406         lines = str.split("\n");
407    
408     lines = unindentImpl(lines, str);
409    
410     if(__ctfe)
411         return lines.ctfe_join("\n");
412     else
413         return lines.join("\n");
414 }
415
416 /// ditto
417 T[] unindent(T)(T[] lines) if(isSomeString!T)
418 {
419     return unindentImpl(lines);
420 }
421
422 private T[] unindentImpl(T)(T[] lines, T origStr=null) if(isSomeString!T)
423 {
424     if(lines == [])
425         return [];
426        
427     bool isNonWhite(dchar ch)
428     {
429         if(__ctfe)
430             return !ctfe_iswhite(ch);
431         else
432             return !iswhite(ch);
433     }
434     T leadingWhiteOf(T str)
435         { return str[ 0 .. $-find!(isNonWhite)(str).length ]; }
436    
437     // Apply leadingWhiteOf, but emit null instead for whitespace-only lines
438     T[] indents;
439     if(__ctfe)
440         indents = semitwist.util.functional.map( lines,
441             (T str){ return str.ctfe_strip()==""? null : leadingWhiteOf(str);}
442         );
443     else
444     {
445         string mapPredicate(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
446         indents = array( std.algorithm.map!(
447             mapPredicate//(T str){ return str.strip()==""? null : leadingWhiteOf(str);}
448             )(lines) );
449     }
450
451     T shorterAndNonNull(T a, T b) {
452         if(a is null) return b;
453         if(b is null) return a;
454        
455         return (a.length < b.length)? a : b;
456     };
457     auto shortestIndent = std.algorithm.reduce!(shorterAndNonNull)(indents);
458    
459     if(shortestIndent is null || shortestIndent == "")
460     {
461         if(origStr == null)
462             return stripLinesLeft(lines);
463         else
464             return [origStr.stripl()];
465     }
466        
467     foreach(i; 0..lines.length)
468     {
469         if(indents[i] is null)
470             lines[i] = "";
471         else if(indents.startsWith(shortestIndent))
472             lines[i] = lines[i][shortestIndent.length..$];
473         else
474         {
475             if(__ctfe)
476                 assert(false, "Inconsistent indentation");
477             else
478                 throw new Exception("Inconsistent indentation");
479         }
480     }
481    
482     return lines;
483 }
484
485 T stripLinesTop(T)(T str) if(isSomeString!T)
486 {
487     return stripLinesBox_StrImpl!(T, true, false, false, false)(str);
488 }
489 T stripLinesBottom(T)(T str) if(isSomeString!T)
490 {
491     return stripLinesBox_StrImpl!(T, false, true, false, false)(str);
492 }
493 T stripLinesTopBottom(T)(T str) if(isSomeString!T)
494 {
495     return stripLinesBox_StrImpl!(T, true, true, false, false)(str);
496 }
497
498 T stripLinesLeft(T)(T str) if(isSomeString!T)
499 {
500     return stripLinesBox_StrImpl!(T, false, false, true, false)(str);
501 }
502 T stripLinesRight(T)(T str) if(isSomeString!T)
503 {
504     return stripLinesBox_StrImpl!(T, false, false, false, true)(str);
505 }
506 T stripLinesLeftRight(T)(T str) if(isSomeString!T)
507 {
508     return stripLinesBox_StrImpl!(T, false, false, true, true)(str);
509 }
510
511 T stripLinesBox(T)(T str) if(isSomeString!T)
512 {
513     return stripLinesBox_StrImpl!(T, true, true, true, true)(str);
514 }
515
516 private T stripLinesBox_StrImpl
517     (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
518     (T str)
519     if(isSomeString!T)
520 {
521     if(str == "")
522         return "";
523        
524     T[] lines;
525     if(__ctfe)
526         lines = str.ctfe_split("\n");
527     else
528         lines = str.split("\n");
529
530     lines = stripLinesBox_LineImpl!(T, stripTop, stripBottom, stripLeft, stripRight)(lines);
531    
532     if(__ctfe)
533         return lines.ctfe_join("\n");
534     else
535         return lines.join("\n");
536 }
537
538 private T[] stripLinesBox_LineImpl
539     (T, bool stripTop, bool stripBottom, bool stripLeft, bool stripRight)
540     (T[] lines)
541     if(isSomeString!T)
542 {
543     static if(stripTop)    lines = stripLinesTop(lines);
544     static if(stripBottom) lines = stripLinesBottom(lines);
545    
546     static if(stripLeft && stripRight)
547     {
548         lines = stripLinesLeftRight(lines);
549     }
550     else
551     {
552         static if(stripLeft)  lines = stripLinesLeft(lines);
553         static if(stripRight) lines = stripLinesRight(lines);
554     }
555    
556     return lines;
557 }
558
559 T[] stripLinesBox(T)(T[] str) if(isSomeString!T)
560 {
561     return stripLinesBox_LineImpl!(T, true, true, true, true)(str);
562 }
563
564 T[] stripLinesTop(T)(T[] lines) if(isSomeString!T)
565 {
566     auto firstLine = lines.length-1;
567
568     foreach(i, line; lines)
569     if(line.ctfe_strip() != "")
570     {
571         firstLine = i;
572         break;
573     }
574
575     return lines[firstLine..$];
576 }
577
578 T[] stripLinesBottom(T)(T[] lines) if(isSomeString!T)
579 {
580     size_t lastLine = 0;
581
582     foreach_reverse(i, line; lines)
583     if(line.ctfe_strip() != "")
584     {
585         lastLine = i;
586         break;
587     }
588
589     return lines[0..lastLine+1];
590 }
591
592 T[] stripLinesTopBottom(T)(T[] lines) if(isSomeString!T)
593 {
594     lines = stripLinesTop(lines);
595     lines = stripLinesBottom(lines);
596     return lines;
597 }
598
599 T[] stripLinesLeft(T)(T[] lines) if(isSomeString!T)
600 {
601     // foreach(ref) doesn't work right at compile time: DMD Issue #3835
602     foreach(i, line; lines)
603         lines[i] = line.ctfe_stripl();
604        
605     return lines;
606 }
607
608 T[] stripLinesRight(T)(T[] lines) if(isSomeString!T)
609 {
610     // foreach(ref) doesn't work right at compile time: DMD Issue #3835
611     foreach(i, line; lines)
612         lines[i] = line.ctfe_stripr();
613        
614     return lines;
615 }
616
617 T[] stripLinesLeftRight(T)(T[] lines) if(isSomeString!T)
618 {
619     // foreach(ref) doesn't work right at compile time: DMD Issue #3835
620     foreach(i, line; lines)
621         lines[i] = line.ctfe_strip();
622    
623     return lines;
624 }
625
626 //TODO*: Unittest this
627 bool contains(T1,T2)(T1 haystack, T2 needle)
628 {
629     return std.algorithm.find(haystack, needle) != [];
630 }
631
632 /++
633 Unindents, strips whitespace-only lines from top and bottom,
634 and strips trailing whitespace from eash line.
635 (Also converts Windows "\r\n" line endings to Unix "\n" line endings.)
636
637 See also the documentation for unindent().
638
639 Good for making easily-readable multi-line string literals without
640 leaving extra indents and whitespace in the resulting string:
641
642 Do this:
643 --------------------
644 void foo()
645 {
646     enum codeStr = q{
647         // Written in the D Programming Langauge
648         // by John Doe
649
650         int main()
651         {
652             return 0;
653         }
654     }.normalize();
655 }
656 --------------------
657
658 Instead of this:
659 --------------------
660 void foo()
661 {
662     enum codeStr =
663 q{// Written in the D Programming Langauge
664 // by John Doe
665
666 int main()
667 {
668     return 0;
669 }};
670 }
671 --------------------
672
673 The resulting string is exactly the same.
674 +/
675 T normalize(T)(T str) if(isSomeString!T)
676 {
677     if(str == "")
678         return "";
679        
680     T[] lines;
681     if(__ctfe)
682         lines = str.ctfe_split("\n");
683     else
684         lines = str.split("\n");
685
686     lines = normalize(lines);
687    
688     if(__ctfe)
689         return lines.ctfe_join("\n");
690     else
691         return lines.join("\n");
692 }
693
694 /// ditto
695 T[] normalize(T)(T[] lines) if(isSomeString!T)
696 {
697     lines = stripLinesTopBottom(lines);
698     lines = unindent(lines);
699     lines = stripLinesRight(lines);
700     return lines;
701 }
702
703 string md5(string data)
704 {
705     MD5_CTX context;
706     context.start();
707     context.update(data);
708     ubyte digest[16];
709     context.finish(digest);
710    
711     return digestToString(digest);
712 }
713
714 immutable(ubyte)[] bomCodeOf(BOM bom)
715 {
716     final switch(bom)
717     {
718     case BOM.UTF8:    return cast(immutable(ubyte)[])x"EF BB BF";
719     case BOM.UTF16LE: return cast(immutable(ubyte)[])x"FF FE";
720     case BOM.UTF16BE: return cast(immutable(ubyte)[])x"FE FF";
721     case BOM.UTF32LE: return cast(immutable(ubyte)[])x"FF FE 00 00";
722     case BOM.UTF32BE: return cast(immutable(ubyte)[])x"00 00 FE FF";
723     }
724 }
725
726 BOM bomOf(const(ubyte)[] str)
727 {
728     if(str.startsWith(bomCodeOf(BOM.UTF8   ))) return BOM.UTF8;
729     if(str.startsWith(bomCodeOf(BOM.UTF16LE))) return BOM.UTF16LE;
730     if(str.startsWith(bomCodeOf(BOM.UTF16BE))) return BOM.UTF16BE;
731     if(str.startsWith(bomCodeOf(BOM.UTF32LE))) return BOM.UTF32LE;
732     if(str.startsWith(bomCodeOf(BOM.UTF32BE))) return BOM.UTF32BE;
733    
734     return BOM.UTF8;
735 }
736
737 version(LittleEndian)
738 {
739     enum BOM native16BitBOM    = BOM.UTF16LE;
740     enum BOM native32BitBOM    = BOM.UTF32LE;
741     enum BOM nonNative16BitBOM = BOM.UTF16BE;
742     enum BOM nonNative32BitBOM = BOM.UTF32BE;
743 }
744 else
745 {
746     enum BOM native16BitBOM    = BOM.UTF16BE;
747     enum BOM native32BitBOM    = BOM.UTF32BE;
748     enum BOM nonNative16BitBOM = BOM.UTF16LE;
749     enum BOM nonNative32BitBOM = BOM.UTF32LE;
750 }
751
752 bool isNativeEndian(BOM bom)
753 {
754     return bom == native16BitBOM || bom == native32BitBOM || bom == BOM.UTF8;
755 }
756
757 bool isNonNativeEndian(BOM bom)
758 {
759     return !isNativeEndian(bom);
760 }
761
762 bool is8Bit(BOM bom)
763 {
764     return bom == BOM.UTF8;
765 }
766
767 bool is16Bit(BOM bom)
768 {
769     return bom == native16BitBOM || bom == nonNative16BitBOM;
770 }
771
772 bool is32Bit(BOM bom)
773 {
774     return bom == native32BitBOM || bom == nonNative32BitBOM;
775 }
776
777 Endian endianOf(BOM bom)
778 {
779     // DMD 2.055 changed "LittleEndian" to "littleEndian", etc...
780     static if(__traits(compiles, Endian.littleEndian))
781     {
782         final switch(bom)
783         {
784         case BOM.UTF8: return endian;
785         case BOM.UTF16LE, BOM.UTF32LE: return Endian.littleEndian;
786         case BOM.UTF16BE, BOM.UTF32BE: return Endian.bigEndian;
787         }
788     }
789     else
790     {
791         final switch(bom)
792         {
793         case BOM.UTF8: return endian;
794         case BOM.UTF16LE, BOM.UTF32LE: return Endian.LittleEndian;
795         case BOM.UTF16BE, BOM.UTF32BE: return Endian.BigEndian;
796         }
797     }
798 }
799
800 template isInsensitive(T)
801 {
802     enum isInsensitive =
803         is(T==InsensitiveT!string ) ||
804         is(T==InsensitiveT!wstring) ||
805         is(T==InsensitiveT!dstring);
806 }
807 static assert(isInsensitive!Insensitive);
808 static assert(isInsensitive!WInsensitive);
809 static assert(isInsensitive!DInsensitive);
810 static assert(!isInsensitive!string);
811
812 struct InsensitiveT(T) if(isSomeString!T)
813 {
814     private T str;
815     private T foldingCase;
816    
817     this(T newStr)
818     {
819         str = newStr;
820         updateFoldingCase();
821     }
822    
823     T toString()
824     {
825         return str;
826     }
827    
828     private void updateFoldingCase()
829     {
830         // Phobos doesn't actually have a tofolding() yet
831         foldingCase = tolower(str);
832     }
833    
834     const hash_t toHash()
835     {
836         return typeid(string).getHash(&foldingCase);
837     }
838    
839     void opAssign(T2)(T2 b) if(isInsensitive!T2 || isSomeString!T2)
840     {
841         static if(is(isInsensitive!T == T2))
842         {
843             str = b.str;
844             foldingCase = newStr.foldingCase;
845         }
846         else static if(isInsensitive!T2)
847         {
848             str = to!T(b.str);
849             updateFoldingCase();
850         }
851         else
852         {
853             str = b;
854             updateFoldingCase();
855         }
856     }
857    
858     InsensitiveT!T opBinary(string op)(InsensitiveT!T b) if(op=="~")
859     {
860         return InsensitiveT!T(str ~ b.str);
861     }
862    
863     InsensitiveT!T opOpAssign(string op)(ref InsensitiveT!T b) if(op=="~")
864     {
865         str ~= b.str;
866         foldingCase ~= b.foldingCase;
867         return this;
868     }
869    
870     const bool opEquals(ref const InsensitiveT!T b)
871     {
872         /+if (str is b.str) return true;
873         if (str is null || b.str is null) return false;
874         return foldingCase == b.foldingCase;+/
875         return this.opCmp(b) == 0;
876     }
877    
878     const int opCmp(ref const InsensitiveT!T b)
879     {
880         if (str   is b.str) return 0;
881         if (str   is null ) return -1;
882         if (b.str is null ) return 1;
883         return std.string.cmp(foldingCase, b.foldingCase);
884     }
885    
886     InsensitiveT!T opSlice()
887     {
888         return this;
889     }
890
891     auto opSlice(size_t x)
892     {
893         return str[x];
894     }
895
896     InsensitiveT!T opSlice(size_t x, size_t y)
897     {
898         return InsensitiveT!T(str[x..y]);
899     }
900 }
901
902 alias InsensitiveT!string  Insensitive;
903 alias InsensitiveT!wstring WInsensitive;
904 alias InsensitiveT!dstring DInsensitive;
905
906 mixin(unittestSemiTwistDLib(q{
907
908     // Insensitive
909     mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("Test") }));
910     mixin(deferAssert!(q{ Insensitive("TEST") == Insensitive("TEST") }));
911     mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive("ABCD") }));
912     mixin(deferAssert!(q{ Insensitive("TEST") != Insensitive(null)   }));
913     mixin(deferAssert!(q{ Insensitive(null)   == Insensitive(null)   }));
914     mixin(deferAssert!(q{ Insensitive("Test") == Insensitive("TEST") }));
915     mixin(deferAssert!(q{ Insensitive("ABCD") != Insensitive("TEST") }));
916     mixin(deferAssert!(q{ Insensitive(null)   != Insensitive("TEST") }));
917
918     mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("ES") }));
919     mixin(deferAssert!(q{ Insensitive("TEST")[1..3] == Insensitive("es") }));
920     mixin(deferAssert!(q{ Insensitive("TEST")[1..3] != Insensitive("AB") }));
921
922     mixin(deferAssert!(q{ Insensitive("TE")~Insensitive("ST") == Insensitive("TesT") }));
923    
924     Insensitive ins;
925     ins = Insensitive("TEST");
926     ins = "ab";
927     ins ~= Insensitive("cd");
928
929     mixin(deferAssert!(q{ ins == Insensitive("AbcD") }));
930    
931     int[Insensitive] ins_aa = [Insensitive("ABC"):1, Insensitive("DEF"):2, Insensitive("Xyz"):3];
932     mixin(deferAssert!(q{ Insensitive("ABC") in ins_aa }));
933     mixin(deferAssert!(q{ Insensitive("DEF") in ins_aa }));
934     mixin(deferAssert!(q{ Insensitive("Xyz") in ins_aa }));
935     mixin(deferAssert!(q{ Insensitive("aBc") in ins_aa }));
936     mixin(deferAssert!(q{ Insensitive("dEf") in ins_aa }));
937     mixin(deferAssert!(q{ Insensitive("xYZ") in ins_aa }));
938     mixin(deferAssert!(q{ Insensitive("HI") !in ins_aa }));
939    
940     mixin(deferAssert!(q{ ins_aa[Insensitive("aBc")] == 1 }));
941     mixin(deferAssert!(q{ ins_aa[Insensitive("dEf")] == 2 }));
942     mixin(deferAssert!(q{ ins_aa[Insensitive("xYZ")] == 3 }));
943
944     // escapeDDQS, unescapeDDQS
945     mixin(deferEnsure!(q{ `hello`.escapeDDQS()     }, q{ _ == `"hello"` }));
946     mixin(deferEnsure!(q{ `"hello"`.unescapeDDQS() }, q{ _ == "hello"   }));
947     mixin(deferEnsure!(q{ `"I"`.unescapeDDQS()     }, q{ _ == "I"       }));
948    
949     mixin(deferEnsure!(q{ (`And...`~"\n"~`sam\nick said "Hi!".`).escapeDDQS()  }, q{ _ == `"And...\nsam\\nick said \"Hi!\"."`  }));
950     //TODO: Make this one pass
951     //mixin(deferEnsure!(q{ `"And...\nsam\\nick said \"Hi!\"."`.unescapeDDQS() }, q{ _ == `And...`~"\n"~`sam\nick said "Hi!".` }));
952     mixin(deferEnsureThrows!(q{ "hello".unescapeDDQS(); }, Exception));
953
954     // indent
955     mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("  ") }, q{ _ == "  A\n  \tB\n  \n  C" }));
956     mixin(deferEnsure!(q{ "A\nB\n".indent("\t")      }, q{ _ == "\tA\n\tB\n"          }));
957     mixin(deferEnsure!(q{ "".indent("\t")            }, q{ _ == "\t"                  }));
958     mixin(deferEnsure!(q{ "A".indent("\t")           }, q{ _ == "\tA"                 }));
959     mixin(deferEnsure!(q{ "A\n\tB\n\nC".indent("")   }, q{ _ == "A\n\tB\n\nC"         }));
960
961     // unindent
962     mixin(deferEnsure!(q{ " \t A\n \t \tB\n \t C\n  \t\n \t D".unindent() }, q{ _ == "A\n\tB\nC\n\nD" }));
963     mixin(deferEnsure!(q{ " D\n".unindent()    }, q{ _ == "D\n" }));
964     mixin(deferEnsure!(q{ " D\n ".unindent()   }, q{ _ == "D\n" }));
965     mixin(deferEnsure!(q{ "D".unindent()       }, q{ _ == "D"   }));
966     mixin(deferEnsure!(q{ "".unindent()        }, q{ _ == ""    }));
967     mixin(deferEnsure!(q{ " ".unindent()       }, q{ _ == ""    }));
968     mixin(deferEnsureThrows!(q{ " \tA\n\t B".unindent(); }, Exception));
969     mixin(deferEnsureThrows!(q{ "  a\n \tb".unindent();    }, Exception));
970
971     // unindent at compile-time
972     enum ctfe_unindent_dummy1 = " \t A\n \t \tB\n \t C\n  \t\n \t D".unindent();
973     enum ctfe_unindent_dummy2 = " D".unindent();
974     enum ctfe_unindent_dummy3 = " D\n".unindent();
975     enum ctfe_unindent_dummy4 = "".unindent();
976
977     mixin(deferEnsure!(q{ ctfe_unindent_dummy1 }, q{ _ == "A\n\tB\nC\n\nD" }));
978     mixin(deferEnsure!(q{ ctfe_unindent_dummy2 }, q{ _ == "D"   }));
979     mixin(deferEnsure!(q{ ctfe_unindent_dummy3 }, q{ _ == "D\n" }));
980     mixin(deferEnsure!(q{ ctfe_unindent_dummy4 }, q{ _ == ""    }));
981    
982     //enum ctfe_unindent_dummy5 = "  a\n \tb".unindent(); // Should fail to compile
983    
984     // contains
985     mixin(deferEnsure!(q{ contains("abcde", 'a') }, q{ _==true  }));
986     mixin(deferEnsure!(q{ contains("abcde", 'c') }, q{ _==true  }));
987     mixin(deferEnsure!(q{ contains("abcde", 'e') }, q{ _==true  }));
988     mixin(deferEnsure!(q{ contains("abcde", 'x') }, q{ _==false }));
989
990     // stripLines: Top and Bottom
991     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTop()       }, q{ _ == " ABC \n \n DEF \n \t \n\t \n" }));
992     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBottom()    }, q{ _ == " \t \n\t \n ABC \n \n DEF "   }));
993     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesTopBottom() }, q{ _ == " ABC \n \n DEF "              }));
994
995     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTop()       }, q{ _ == "ABC\n " }));
996     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBottom()    }, q{ _ == "\nABC"  }));
997     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesTopBottom() }, q{ _ == "ABC"    }));
998
999     mixin(deferEnsure!(q{ "\n".stripLinesTop()       }, q{ _ == "" }));
1000     mixin(deferEnsure!(q{ "\n".stripLinesBottom()    }, q{ _ == "" }));
1001     mixin(deferEnsure!(q{ "\n".stripLinesTopBottom() }, q{ _ == "" }));
1002
1003     mixin(deferEnsure!(q{ "ABC".stripLinesTopBottom()      }, q{ _ == "ABC" }));
1004     mixin(deferEnsure!(q{ "".stripLinesTopBottom()         }, q{ _ == ""    }));
1005
1006     // stripLines: Left and Right
1007     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeft()      }, q{ _ == "\n\nABC \n\nDEF \n\n\n" }));
1008     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesRight()     }, q{ _ == "\n\n ABC\n\n DEF\n\n\n" }));
1009     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight() }, q{ _ == "\n\nABC\n\nDEF\n\n\n"   }));
1010
1011     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeft()      }, q{ _ == "\nABC\n" }));
1012     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesRight()     }, q{ _ == "\nABC\n" }));
1013     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesLeftRight() }, q{ _ == "\nABC\n" }));
1014
1015     mixin(deferEnsure!(q{ "\n".stripLinesLeft()      }, q{ _ == "\n" }));
1016     mixin(deferEnsure!(q{ "\n".stripLinesRight()     }, q{ _ == "\n" }));
1017     mixin(deferEnsure!(q{ "\n".stripLinesLeftRight() }, q{ _ == "\n" }));
1018
1019     mixin(deferEnsure!(q{ "ABC".stripLinesLeftRight() }, q{ _ == "ABC" }));
1020     mixin(deferEnsure!(q{ "".stripLinesLeftRight()    }, q{ _ == ""    }));
1021
1022     // stripLinesBox
1023     mixin(deferEnsure!(q{ " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox() }, q{ _ == "ABC\n\nDEF" }));
1024     mixin(deferEnsure!(q{ "\nABC\n ".stripLinesBox() }, q{ _ == "ABC" }));
1025     mixin(deferEnsure!(q{ "\n".stripLinesBox()       }, q{ _ == ""    }));
1026     mixin(deferEnsure!(q{ "ABC".stripLinesBox()      }, q{ _ == "ABC" }));
1027     mixin(deferEnsure!(q{ "".stripLinesBox()         }, q{ _ == ""    }));
1028    
1029     // stripLines at compile-time
1030     enum ctfe_stripLinesBox_dummy1 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesBox();
1031     enum ctfe_stripLinesBox_dummy2 = " \t \n\t \n ABC \n \n DEF \n \t \n\t \n".stripLinesLeftRight();
1032     enum ctfe_stripLinesBox_dummy3 = "".stripLinesBox();
1033
1034     mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy1 }, q{ _ == "ABC\n\nDEF" }));
1035     mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy2 }, q{ _ == "\n\nABC\n\nDEF\n\n\n" }));
1036     mixin(deferEnsure!(q{ ctfe_stripLinesBox_dummy3 }, q{ _ == "" }));
1037
1038     // normalize
1039     mixin(deferEnsure!(q{
1040                 q{
1041             // test
1042             void foo() { 
1043                 int x = 2;
1044             }
1045     }.normalize()
1046     }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1047
1048     enum ctfe_normalize_dummy1 = q{
1049             // test
1050             void foo() { 
1051                 int x = 2;
1052             }
1053     }.normalize();
1054     mixin(deferEnsure!(q{ ctfe_normalize_dummy1 }, q{ _ == "// test\nvoid foo() {\n\tint x = 2;\n}" }));
1055 }));
Note: See TracBrowser for help on using the browser.