Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

Ticket #970: StreamIterator.d

File StreamIterator.d, 11.5 kB (added by Daniel919, 7 months ago)
Line 
1 /*******************************************************************************
2
3         copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
4
5         license:        BSD style: $(LICENSE)
6
7         version:        Initial release: December 2005
8
9         author:         Kris
10
11 *******************************************************************************/
12
13 module tango.text.stream.StreamIterator;
14
15 public  import tango.io.Buffer;
16
17 private import Text = tango.text.Util;
18
19 private import tango.io.model.IConduit;
20
21 /*******************************************************************************
22
23         The base class for a set of stream iterators. These operate
24         upon a buffered input stream, and are designed to deal with
25         partial content. That is, stream iterators go to work the
26         moment any data becomes available in the buffer. Contrast
27         this behaviour with the tango.text.Util iterators, which
28         operate upon the extent of an array.
29
30         There are two types of iterators supported; exclusive and
31         inclusive. The former are the more common kind, where a token
32         is delimited by elements that are considered foreign. Examples
33         include space, comma, and end-of-line delineation. Inclusive
34         tokens are just the opposite: they look for patterns in the
35         text that should be part of the token itself - everything else
36         is considered foreign. Currently tango.text.stream includes the
37         exclusive variety only.
38
39         Each pattern is exposed to the client as a slice of the original
40         content, where the slice is transient. If you need to retain the
41         exposed content, then you should .dup it appropriately.
42
43         The content provided to these iterators is intended to be fully
44         read-only. All current tokenizers abide by this rule, but it is
45         possible a user could mutate the content through a token slice.
46         To enforce the desired read-only aspect, the code would have to
47         introduce redundant copying or the compiler would have to support
48         read-only arrays.
49
50         See LineIterator, CharIterator, RegexIterator, QuotedIterator,
51         and SimpleIterator
52
53 *******************************************************************************/
54
55 class StreamIterator(T) : InputStream, Buffered
56 {
57         protected T[]           slice,
58                                 pushed;
59         private IBuffer         input;
60
61         /***********************************************************************
62
63                 The pattern scanner, implemented via subclasses
64
65         ***********************************************************************/
66
67         abstract protected uint scan (void[] data);
68
69         /***********************************************************************
70
71                 Instantiate with a buffer
72
73         ***********************************************************************/
74
75         this (InputStream stream = null)
76         {
77                 if (stream)
78                     set (stream);
79         }
80
81         /***********************************************************************
82
83                 Set the provided stream as the scanning source
84
85         ***********************************************************************/
86
87         final StreamIterator set (InputStream stream)
88         {
89                 assert (stream);
90                 input = Buffer.share (stream);
91                 return this;
92         }
93
94         /***********************************************************************
95
96                 Return the current token as a slice of the content
97
98         ***********************************************************************/
99
100         final T[] get ()
101         {
102                 return slice;
103         }
104
105         /***********************************************************************
106
107                 Push one token back into the stream, to be returned by a
108                 subsequent call to next()
109
110                 Push null to cancel a prior assignment
111
112         ***********************************************************************/
113
114         final StreamIterator push (T[] token)
115         {
116                 pushed = token;
117                 return this;
118         }
119
120         /**********************************************************************
121
122                 Iterate over the set of tokens. This should really
123                 provide read-only access to the tokens, but D does
124                 not support that at this time
125
126         **********************************************************************/
127
128         int opApply (int delegate(inout T[]) dg)
129         {
130                 bool more;
131                 int  result;
132
133                 do {
134                    more = consume;
135                    result = dg (slice);
136                    } while (more && !result);
137                 return result;
138         }
139
140         /**********************************************************************
141
142                 Iterate over a set of tokens, exposing a token count
143                 starting at zero
144
145         **********************************************************************/
146
147         int opApply (int delegate(inout int, inout T[]) dg)
148         {
149                 bool more;
150                 int  result,
151                      tokens;
152
153                 do {
154                    more = consume;
155                    result = dg (tokens, slice);
156                    ++tokens;
157                    } while (more && !result);
158                 return result;
159         }
160
161         /***********************************************************************
162
163                 Locate the next token. Returns the token if found, null
164                 otherwise. Null indicates an end of stream condition. To
165                 sweep a conduit for lines using method next():
166                 ---
167                 auto lines = new LineIterator!(char) (new FileConduit("myfile"));
168                 while (lines.next)
169                        Cout (lines.get).newline;
170                 ---
171
172                 Alternatively, we can extract one line from a conduit:
173                 ---
174                 auto line = (new LineIterator!(char) (new FileConduit("myfile"))).next;
175                 ---
176
177                 The difference between next() and foreach() is that the
178                 latter processes all tokens in one go, whereas the former
179                 processes in a piecemeal fashion. To wit:
180                 ---
181                 foreach (line; new LineIterator!(char) (new FileConduit("myfile")))
182                          Cout(line).newline;
183                 ---
184
185                 Note that tokens exposed via push() are returned immediately
186                 when available, taking priority over the input stream itself
187                 
188         ***********************************************************************/
189
190         final T[] next ()
191         {
192                 if (pushed.ptr)
193                     return pushed;
194                 else
195                    if (consume() || slice.length)
196                        return slice;
197                 return null;
198         }
199
200         /***********************************************************************
201
202                 Set the content of the current slice
203
204         ***********************************************************************/
205
206         protected final uint set (T* content, uint start, uint end)
207         {
208                 slice = content [start .. end];
209                 return end;
210         }
211
212         /***********************************************************************
213
214                 Called when a scanner fails to find a matching pattern.
215                 This may cause more content to be loaded, and a rescan
216                 initiated
217
218         ***********************************************************************/
219
220         protected final uint notFound ()
221         {
222                 return IConduit.Eof;
223         }
224
225         /***********************************************************************
226
227                 Invoked when a scanner matches a pattern. The provided
228                 value should be the index of the last element of the
229                 matching pattern, which is converted back to a void[]
230                 index.
231
232         ***********************************************************************/
233
234         protected final uint found (uint i)
235         {
236                 return (i + 1) * T.sizeof;
237         }
238
239         /***********************************************************************
240
241                 See if set of characters holds a particular instance
242
243         ***********************************************************************/
244
245         protected final bool has (T[] set, T match)
246         {
247                 foreach (T c; set)
248                          if (match is c)
249                              return true;
250                 return false;
251         }
252
253         /***********************************************************************
254
255                 Consume the next token and place it in 'slice'. Returns
256                 true when there are potentially more tokens
257
258         ***********************************************************************/
259
260         protected bool consume ()
261         {
262                 if (input.next (&scan))
263                     return true;
264
265                 auto tmp = input.slice (buffer.readable);
266                 slice = (cast(T*) tmp.ptr) [0 .. tmp.length/T.sizeof];
267                 return false;
268         }
269
270
271         /**********************************************************************/
272         /************************ Buffered Interface **************************/
273         /**********************************************************************/
274
275
276         /***********************************************************************
277
278                 Return the associated buffer
279
280         ***********************************************************************/
281
282         final IBuffer buffer ()
283         {
284                 return input;
285         }
286
287         /**********************************************************************/
288         /********************** InputStream Interface *************************/
289         /**********************************************************************/
290
291
292         /***********************************************************************
293         
294                 Return the host conduit
295
296         ***********************************************************************/
297
298         final IConduit conduit ()
299         {
300                 return input.conduit;
301         }
302
303         /***********************************************************************
304         
305                 Read from conduit into a target array. The provided dst
306                 will be populated with content from the conduit.
307
308                 Returns the number of bytes read, which may be less than
309                 requested in dst
310
311         ***********************************************************************/
312
313         uint read (void[] dst)
314         {
315                 return input.read (dst);
316         }               
317                        
318         /***********************************************************************
319         
320                 Clear any buffered content
321
322         ***********************************************************************/
323
324         final InputStream clear ()               
325         {
326                 return input.clear;
327         }
328                                  
329         /***********************************************************************
330         
331                 Close the input
332
333         ***********************************************************************/
334
335         final void close ()
336         {
337                 input.close;
338         }               
339 }