Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

root/trunk/tango/text/Util.d

Revision 3992, 55.4 kB (checked in by kris, 1 month ago)

changed return type

  • Property svn:mime-type set to text/x-dsrc
  • Property svn:eol-style set to native
<
Line 
1 /*******************************************************************************
2
3         copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
4
5         license:        BSD style: $(LICENSE)
6
7         version:        Apr 2004: Initial release
8                         Dec 2006: South Seas version
9
10         author:         Kris
11
12
13         Placeholder for a variety of wee functions. These functions are all
14         templated with the intent of being used for arrays of char, wchar,
15         and dchar. However, they operate correctly with other array types
16         also.
17
18         Several of these functions return an index value, representing where
19         some criteria was identified. When said criteria is not matched, the
20         functions return a value representing the array length provided to
21         them. That is, for those scenarios where C functions might typically
22         return -1 these functions return length instead. This operate nicely
23         with D slices:
24         ---
25         auto text = "happy:faces";
26         
27         assert (text[0 .. locate (text, ':')] == "happy");
28         
29         assert (text[0 .. locate (text, '!')] == "happy:faces");
30         ---
31
32         The contains() function is more convenient for trivial lookup
33         cases:
34         ---
35         if (contains ("fubar", '!'))
36             ...
37         ---
38
39         Note that where some functions expect a uint as an argument, the
40         D template-matching algorithm will fail where an int is provided
41         instead. This is the typically the cause of "template not found"
42         errors. Also note that name overloading is not supported cleanly
43         by IFTI at this time, so is not applied here.
44
45
46         Applying the D "import alias" mechanism to this module is highly
47         recommended, in order to limit namespace pollution:
48         ---
49         import Util = tango.text.Util;
50
51         auto s = Util.trim ("  foo ");
52         ---
53                 
54
55         Function templates:
56         ---
57         trim (source)                               // trim whitespace
58         triml (source)                              // trim whitespace
59         trimr (source)                              // trim whitespace
60         strip (source, match)                       // trim elements
61         stripl (source, match)                      // trim elements
62         stripr (source, match)                      // trim elements
63         chopl (source, match)                       // trim pattern match
64         chopr (source, match)                       // trim pattern match
65         delimit (src, set)                          // split on delims
66         split (source, pattern)                     // split on pattern
67         splitLines (source);                        // split on lines
68         head (source, pattern, tail)                // split to head & tail
69         join (source, postfix, output)              // join text segments
70         prefix (dst, prefix, content...)            // prefix text segments
71         postfix (dst, postfix, content...)          // postfix text segments
72         combine (dst, prefix, postfix, content...)  // combine lotsa stuff
73         repeat (source, count, output)              // repeat source
74         replace (source, match, replacement)        // replace chars
75         substitute (source, match, replacement)     // replace/remove matches
76         count (source, match)                       // count instances
77         contains (source, match)                    // has char?
78         containsPattern (source, match)             // has pattern?
79         locate (source, match, start)               // find char
80         locatePrior (source, match, start)          // find prior char
81         locatePattern (source, match, start);       // find pattern
82         locatePatternPrior (source, match, start);  // find prior pattern
83         indexOf (s*, match, length)                 // low-level lookup
84         mismatch (s1*, s2*, length)                 // low-level compare
85         matching (s1*, s2*, length)                 // low-level compare
86         isSpace (match)                             // is whitespace?
87         unescape(source, output)                    // convert '\' prefixes
88         layout (destination, format ...)            // featherweight printf
89         lines (str)                                 // foreach lines
90         quotes (str, set)                           // foreach quotes
91         delimiters (str, set)                       // foreach delimiters
92         patterns (str, pattern)                     // foreach patterns
93         ---
94
95         Please note that any 'pattern' referred to within this module
96         refers to a pattern of characters, and not some kind of regex
97         descriptor. Use the Regex module for regex operation.
98
99 *******************************************************************************/
100
101 module tango.text.Util;
102
103 /******************************************************************************
104
105         Trim the provided array by stripping whitespace from both
106         ends. Returns a slice of the original content
107
108 ******************************************************************************/
109
110 T[] trim(T) (T[] source)
111 {
112         T*   head = source.ptr,
113              tail = head + source.length;
114
115         while (head < tail && isSpace(*head))
116                ++head;
117
118         while (tail > head && isSpace(*(tail-1)))
119                --tail;
120
121         return head [0 .. tail - head];
122 }
123
124 /******************************************************************************
125
126         Trim the provided array by stripping whitespace from the left.
127         Returns a slice of the original content
128
129 ******************************************************************************/
130
131 T[] triml(T) (T[] source)
132 {
133         T*   head = source.ptr,
134              tail = head + source.length;
135
136         while (head < tail && isSpace(*head))
137                ++head;
138
139         return head [0 .. tail - head];
140 }
141
142 /******************************************************************************
143
144         Trim the provided array by stripping whitespace from the right.
145         Returns a slice of the original content
146
147 ******************************************************************************/
148
149 T[] trimr(T) (T[] source)
150 {
151         T*   head = source.ptr,
152              tail = head + source.length;
153
154         while (tail > head && isSpace(*(tail-1)))
155                --tail;
156
157         return head [0 .. tail - head];
158 }
159
160 /******************************************************************************
161
162         Trim the given array by stripping the provided match from
163         both ends. Returns a slice of the original content
164
165 ******************************************************************************/
166
167 T[] strip(T) (T[] source, T match)
168 {
169         T*   head = source.ptr,
170              tail = head + source.length;
171
172         while (head < tail && *head is match)
173                ++head;
174
175         while (tail > head && *(tail-1) is match)
176                --tail;
177
178         return head [0 .. tail - head];
179 }
180
181 /******************************************************************************
182
183         Trim the given array by stripping the provided match from
184         the left hand side. Returns a slice of the original content
185
186 ******************************************************************************/
187
188 T[] stripl(T) (T[] source, T match)
189 {
190         T*   head = source.ptr,
191              tail = head + source.length;
192
193         while (head < tail && *head is match)
194                ++head;
195
196         return head [0 .. tail - head];
197 }
198
199 /******************************************************************************
200
201         Trim the given array by stripping the provided match from
202         the right hand side. Returns a slice of the original content
203
204 ******************************************************************************/
205
206 T[] stripr(T) (T[] source, T match)
207 {
208         T*   head = source.ptr,
209              tail = head + source.length;
210
211         while (tail > head && *(tail-1) is match)
212                --tail;
213
214         return head [0 .. tail - head];
215 }
216
217 /******************************************************************************
218
219         Chop the given source by stripping the provided match from
220         the left hand side. Returns a slice of the original content
221
222 ******************************************************************************/
223
224 T[] chopl(T) (T[] source, T[] match)
225 {
226         if (match.length <= source.length)
227             if (source[0 .. match.length] == match)
228                 source = source [match.length .. $];
229
230         return source;
231 }
232
233 /******************************************************************************
234
235         Chop the given source by stripping the provided match from
236         the right hand side. Returns a slice of the original content
237
238 ******************************************************************************/
239
240 T[] chopr(T) (T[] source, T[] match)
241 {
242         if (match.length <= source.length)
243             if (source[$-match.length .. $] == match)
244                 source = source [0 .. $-match.length];
245
246         return source;
247 }
248
249 /******************************************************************************
250
251         Replace all instances of one element with another (in place)
252
253 ******************************************************************************/
254
255 T[] replace(T) (T[] source, T match, T replacement)
256 {
257         foreach (inout c; source)
258                  if (c is match)
259                      c = replacement;
260         return source;
261 }
262
263 /******************************************************************************
264
265         Substitute all instances of match from source. Set replacement
266         to null in order to remove instead of replace
267
268 ******************************************************************************/
269
270 T[] substitute(T) (T[] source, T[] match, T[] replacement)
271 {
272         T[] output;
273
274         foreach (s; patterns (source, match, replacement))
275                     output ~= s;
276         return output;
277 }
278
279 /******************************************************************************
280
281         Count all instances of match within source
282
283 ******************************************************************************/
284
285 uint count(T) (T[] source, T[] match)
286 {
287         uint c;
288
289         foreach (s; patterns (source, match))
290                     ++c;
291         return c;
292 }
293
294 /******************************************************************************
295
296         Returns whether or not the provided array contains an instance
297         of the given match
298         
299 ******************************************************************************/
300
301 bool contains(T) (T[] source, T match)
302 {
303         return indexOf (source.ptr, match, source.length) != source.length;
304 }
305
306 /******************************************************************************
307
308         Returns whether or not the provided array contains an instance
309         of the given match
310         
311 ******************************************************************************/
312
313 bool containsPattern(T) (T[] source, T[] match)
314 {
315         return locatePattern (source, match) != source.length;
316 }
317
318 /******************************************************************************
319
320         Return the index of the next instance of 'match' starting at
321         position 'start', or source.length where there is no match.
322
323         Parameter 'start' defaults to 0
324
325 ******************************************************************************/
326
327 uint locate(T, U=uint) (T[] source, T match, U start=0)
328 {return locate!(T) (source, match, start);}
329
330 uint locate(T) (T[] source, T match, uint start=0)
331 {
332         if (start > source.length)
333             start = source.length;
334        
335         return indexOf (source.ptr+start, match, source.length - start) + start;
336 }
337
338 /******************************************************************************
339
340         Return the index of the prior instance of 'match' starting
341         just before 'start', or source.length where there is no match.
342
343         Parameter 'start' defaults to source.length
344
345 ******************************************************************************/
346
347 uint locatePrior(T, U=uint) (T[] source, T match, U start=uint.max)
348 {return locatePrior!(T)(source, match, start);}
349
350 uint locatePrior(T) (T[] source, T match, uint start=uint.max)
351 {
352         if (start > source.length)
353             start = source.length;
354
355         while (start > 0)
356                if (source[--start] is match)
357                    return start;
358         return source.length;
359 }
360
361 /******************************************************************************
362
363         Return the index of the next instance of 'match' starting at
364         position 'start', or source.length where there is no match.
365
366         Parameter 'start' defaults to 0
367
368 ******************************************************************************/
369
370 uint locatePattern(T, U=uint) (T[] source, T[] match, U start=0)
371 {return locatePattern!(T) (source, match, start);}
372
373 uint locatePattern(T) (T[] source, T[] match, uint start=0)
374 {
375         uint    idx;
376         T*      p = source.ptr + start;
377         uint    extent = source.length - start - match.length + 1;
378
379         if (match.length && extent <= source.length)
380             while (extent)
381                    if ((idx = indexOf (p, match[0], extent)) is extent)
382                         break;
383                    else
384                       if (matching (p+=idx, match.ptr, match.length))
385                           return p - source.ptr;
386                       else
387                          {
388                          extent -= (idx+1);
389                          ++p;
390                          }
391
392         return source.length;
393 }
394    
395 /******************************************************************************
396
397         Return the index of the prior instance of 'match' starting
398         just before 'start', or source.length where there is no match.
399
400         Parameter 'start' defaults to source.length
401
402 ******************************************************************************/
403
404 uint locatePatternPrior(T, U=uint) (T[] source, T[] match, U start=uint.max)
405 {return locatePatternPrior!(T)(source, match, start);}
406
407 uint locatePatternPrior(T) (T[] source, T[] match, uint start=uint.max)
408 {
409         auto len = source.length;
410        
411         if (start > len)
412             start = len;
413
414         if (match.length && match.length <= len)
415             while (start)
416                   {
417                   start = locatePrior (source, match[0], start);
418                   if (start is len)
419                       break;
420                   else
421                      if ((start + match.length) <= len)
422                           if (matching (source.ptr+start, match.ptr, match.length))
423                               return start;
424                   }
425
426         return len;
427 }
428
429 /******************************************************************************
430
431         Split the provided array on the first pattern instance, and
432         return the resultant head and tail. The pattern is excluded
433         from the two segments.
434
435         Where a segment is not found, tail will be null and the return
436         value will be the original array.
437         
438 ******************************************************************************/
439
440 T[] head(T) (T[] src, T[] pattern, out T[] tail)
441 {
442         auto i = locatePattern (src, pattern);
443         if (i != src.length)
444            {
445            tail = src [i + pattern.length .. $];
446            src = src [0 .. i];
447            }
448         return src;
449 }
450
451 /******************************************************************************
452
453         Split the provided array on the last pattern instance, and
454         return the resultant head and tail. The pattern is excluded
455         from the two segments.
456
457         Where a segment is not found, head will be null and the return
458         value will be the original array.
459         
460 ******************************************************************************/
461
462 T[] tail(T) (T[] src, T[] pattern, out T[] head)
463 {
464         auto i = locatePatternPrior (src, pattern);
465         if (i != src.length)
466            {
467            head = src [0 .. i];
468            src = src [i + pattern.length .. $];
469            }
470         return src;
471 }
472
473 /******************************************************************************
474
475         Split the provided array wherever a delimiter-set instance is
476         found, and return the resultant segments. The delimiters are
477         excluded from each of the segments. Note that delimiters are
478         matched as a set of alternates rather than as a pattern.
479
480         Splitting on a single delimiter is considerably faster than
481         splitting upon a set of alternatives.
482
483         Note that the src content is not duplicated by this function,
484         but is sliced instead.
485
486 ******************************************************************************/
487
488 T[][] delimit(T) (T[] src, T[] set)
489 {
490         T[][] result;
491
492         foreach (segment; delimiters (src, set))
493                  result ~= segment;
494         return result;
495 }
496
497 /******************************************************************************
498
499         Split the provided array wherever a pattern instance is
500         found, and return the resultant segments. The pattern is
501         excluded from each of the segments.
502         
503         Note that the src content is not duplicated by this function,
504         but is sliced instead.
505
506 ******************************************************************************/
507
508 T[][] split(T) (T[] src, T[] pattern)
509 {
510         T[][] result;
511
512         foreach (segment; patterns (src, pattern))
513                  result ~= segment;
514         return result;
515 }
516
517 /******************************************************************************
518
519         Convert text into a set of lines, where each line is identified
520         by a \n or \r\n combination. The line terminator is stripped from
521         each resultant array
522
523         Note that the src content is not duplicated by this function, but
524         is sliced instead.
525
526 ******************************************************************************/
527
528 T[][] splitLines(T) (T[] src)
529 {
530         int count;
531        
532         foreach (line; lines (src))
533                  ++count;
534        
535         T[][] result = new T[][count];
536
537         count = 0;
538         foreach (line; lines (src))
539                  result [count++] = line;
540
541         return result;
542 }
543
544 /******************************************************************************
545
546         Combine a series of text segments together, each appended with
547         a postfix pattern. An optional output buffer can be provided to
548         avoid heap activity - it should be large enough to contain the
549         entire output, otherwise the heap will be used instead.
550
551         Returns a valid slice of the output, containing the concatenated
552         text.
553
554 ******************************************************************************/
555
556 T[] join(T) (T[][] src, T[] postfix=null, T[] dst=null)
557 {
558         return combine!(T) (dst, null, postfix, src);
559 }
560
561 /******************************************************************************
562
563         Combine a series of text segments together, each prepended with
564         a prefix pattern. An optional output buffer can be provided to
565         avoid heap activity - it should be large enough to contain the
566         entire output, otherwise the heap will be used instead.
567
568         Note that, unlike join(), the output buffer is specified first
569         such that a set of trailing strings can be provided.
570
571         Returns a valid slice of the output, containing the concatenated
572         text.
573
574 ******************************************************************************/
575
576 T[] prefix(T) (T[] dst, T[] prefix, T[][] src...)
577 {
578         return combine!(T) (dst, prefix, null, src);
579 }
580
581 /******************************************************************************
582
583         Combine a series of text segments together, each appended with an
584         optional postfix pattern. An optional output buffer can be provided
585         to avoid heap activity - it should be large enough to contain the
586         entire output, otherwise the heap will be used instead.
587
588         Note that, unlike join(), the output buffer is specified first
589         such that a set of trailing strings can be provided.
590
591         Returns a valid slice of the output, containing the concatenated
592         text.
593
594 ******************************************************************************/
595
596 T[] postfix(T) (T[] dst, T[] postfix, T[][] src...)
597 {
598         return combine!(T) (dst, null, postfix, src);
599 }
600
601 /******************************************************************************
602
603         Combine a series of text segments together, each prefixed and/or
604         postfixed with optional strings. An optional output buffer can be
605         provided to avoid heap activity - which should be large enough to
606         contain the entire output, otherwise the heap will be used instead.
607
608         Note that, unlike join(), the output buffer is specified first
609         such that a set of trailing strings can be provided.
610
611         Returns a valid slice of the output, containing the concatenated
612         text.
613
614 ******************************************************************************/
615
616 T[] combine(T) (T[] dst, T[] prefix, T[] postfix, T[][] src ...)
617 {
618         uint len = src.length * prefix.length +
619                    src.length * postfix.length;
620
621         foreach (segment; src)
622                  len += segment.length;
623                
624         if (dst.length < len)
625             dst.length = len;
626            
627         T* p = dst.ptr;
628         foreach (segment; src)
629                 {
630                 p[0 .. prefix.length] = prefix;
631                 p += prefix.length;
632                 p[0 .. segment.length] = segment;
633                 p += segment.length;
634                 p[0 .. postfix.length] = postfix;
635                 p += postfix.length;
636                 }
637
638         // remove trailing seperator
639         if (len)
640             len -= postfix.length;
641         return dst [0 .. len];       
642 }
643
644 /******************************************************************************
645
646         Repeat an array for a specific number of times. An optional output
647         buffer can be provided to avoid heap activity - it should be large
648         enough to contain the entire output, otherwise the heap will be used
649         instead.
650
651         Returns a valid slice of the output, containing the concatenated
652         text.
653
654 ******************************************************************************/
655
656 T[] repeat(T, U=uint) (T[] src, U count, T[] dst=null)
657 {return repeat!(T)(src, count, dst);}
658
659 T[] repeat(T) (T[] src, uint count, T[] dst=null)
660 {
661         uint len = src.length * count;
662         if (len is 0)
663             return null;
664
665         if (dst.length < len)
666             dst.length = len;
667            
668         for (auto p = dst.ptr; count--; p += src.length)
669              p[0 .. src.length] = src;
670
671         return dst [0 .. len];
672 }
673
674 /******************************************************************************
675
676         Is the argument a whitespace character?
677
678 ******************************************************************************/
679
680