| 1 |
/******************************************************************************* |
|---|
| 2 |
|
|---|
| 3 |
copyright: Copyright (c) 2004 Kris Bell. All rights reserved |
|---|
| 4 |
|
|---|
| 5 |
license: BSD style: $(LICENSE) |
|---|
| 6 |
|
|---|
| 7 |
version: Initial release: December 2005 |
|---|
| 8 |
|
|---|
| 9 |
author: Kris |
|---|
| 10 |
|
|---|
| 11 |
*******************************************************************************/ |
|---|
| 12 |
|
|---|
| 13 |
module tango.text.stream.StreamIterator; |
|---|
| 14 |
|
|---|
| 15 |
public import tango.io.Buffer; |
|---|
| 16 |
|
|---|
| 17 |
private import Text = tango.text.Util; |
|---|
| 18 |
|
|---|
| 19 |
private import tango.io.model.IConduit; |
|---|
| 20 |
|
|---|
| 21 |
/******************************************************************************* |
|---|
| 22 |
|
|---|
| 23 |
The base class for a set of stream iterators. These operate |
|---|
| 24 |
upon a buffered input stream, and are designed to deal with |
|---|
| 25 |
partial content. That is, stream iterators go to work the |
|---|
| 26 |
moment any data becomes available in the buffer. Contrast |
|---|
| 27 |
this behaviour with the tango.text.Util iterators, which |
|---|
| 28 |
operate upon the extent of an array. |
|---|
| 29 |
|
|---|
| 30 |
There are two types of iterators supported; exclusive and |
|---|
| 31 |
inclusive. The former are the more common kind, where a token |
|---|
| 32 |
is delimited by elements that are considered foreign. Examples |
|---|
| 33 |
include space, comma, and end-of-line delineation. Inclusive |
|---|
| 34 |
tokens are just the opposite: they look for patterns in the |
|---|
| 35 |
text that should be part of the token itself - everything else |
|---|
| 36 |
is considered foreign. Currently tango.text.stream includes the |
|---|
| 37 |
exclusive variety only. |
|---|
| 38 |
|
|---|
| 39 |
Each pattern is exposed to the client as a slice of the original |
|---|
| 40 |
content, where the slice is transient. If you need to retain the |
|---|
| 41 |
exposed content, then you should .dup it appropriately. |
|---|
| 42 |
|
|---|
| 43 |
The content provided to these iterators is intended to be fully |
|---|
| 44 |
read-only. All current tokenizers abide by this rule, but it is |
|---|
| 45 |
possible a user could mutate the content through a token slice. |
|---|
| 46 |
To enforce the desired read-only aspect, the code would have to |
|---|
| 47 |
introduce redundant copying or the compiler would have to support |
|---|
| 48 |
read-only arrays. |
|---|
| 49 |
|
|---|
| 50 |
See LineIterator, CharIterator, RegexIterator, QuotedIterator, |
|---|
| 51 |
and SimpleIterator |
|---|
| 52 |
|
|---|
| 53 |
*******************************************************************************/ |
|---|
| 54 |
|
|---|
| 55 |
class StreamIterator(T) : InputStream, Buffered |
|---|
| 56 |
{ |
|---|
| 57 |
protected T[] slice, |
|---|
| 58 |
pushed; |
|---|
| 59 |
private IBuffer input; |
|---|
| 60 |
|
|---|
| 61 |
/*********************************************************************** |
|---|
| 62 |
|
|---|
| 63 |
The pattern scanner, implemented via subclasses |
|---|
| 64 |
|
|---|
| 65 |
***********************************************************************/ |
|---|
| 66 |
|
|---|
| 67 |
abstract protected uint scan (void[] data); |
|---|
| 68 |
|
|---|
| 69 |
/*********************************************************************** |
|---|
| 70 |
|
|---|
| 71 |
Instantiate with a buffer |
|---|
| 72 |
|
|---|
| 73 |
***********************************************************************/ |
|---|
| 74 |
|
|---|
| 75 |
this (InputStream stream = null) |
|---|
| 76 |
{ |
|---|
| 77 |
if (stream) |
|---|
| 78 |
set (stream); |
|---|
| 79 |
} |
|---|
| 80 |
|
|---|
| 81 |
/*********************************************************************** |
|---|
| 82 |
|
|---|
| 83 |
Set the provided stream as the scanning source |
|---|
| 84 |
|
|---|
| 85 |
***********************************************************************/ |
|---|
| 86 |
|
|---|
| 87 |
final StreamIterator set (InputStream stream) |
|---|
| 88 |
{ |
|---|
| 89 |
assert (stream); |
|---|
| 90 |
input = Buffer.share (stream); |
|---|
| 91 |
return this; |
|---|
| 92 |
} |
|---|
| 93 |
|
|---|
| 94 |
/*********************************************************************** |
|---|
| 95 |
|
|---|
| 96 |
Return the current token as a slice of the content |
|---|
| 97 |
|
|---|
| 98 |
***********************************************************************/ |
|---|
| 99 |
|
|---|
| 100 |
final T[] get () |
|---|
| 101 |
{ |
|---|
| 102 |
return slice; |
|---|
| 103 |
} |
|---|
| 104 |
|
|---|
| 105 |
/*********************************************************************** |
|---|
| 106 |
|
|---|
| 107 |
Push one token back into the stream, to be returned by a |
|---|
| 108 |
subsequent call to next() |
|---|
| 109 |
|
|---|
| 110 |
Push null to cancel a prior assignment |
|---|
| 111 |
|
|---|
| 112 |
***********************************************************************/ |
|---|
| 113 |
|
|---|
| 114 |
final StreamIterator push (T[] token) |
|---|
| 115 |
{ |
|---|
| 116 |
pushed = token; |
|---|
| 117 |
return this; |
|---|
| 118 |
} |
|---|
| 119 |
|
|---|
| 120 |
/********************************************************************** |
|---|
| 121 |
|
|---|
| 122 |
Iterate over the set of tokens. This should really |
|---|
| 123 |
provide read-only access to the tokens, but D does |
|---|
| 124 |
not support that at this time |
|---|
| 125 |
|
|---|
| 126 |
**********************************************************************/ |
|---|
| 127 |
|
|---|
| 128 |
int opApply (int delegate(inout T[]) dg) |
|---|
| 129 |
{ |
|---|
| 130 |
bool more; |
|---|
| 131 |
int result; |
|---|
| 132 |
|
|---|
| 133 |
do { |
|---|
| 134 |
more = consume; |
|---|
| 135 |
result = dg (slice); |
|---|
| 136 |
} while (more && !result); |
|---|
| 137 |
return result; |
|---|
| 138 |
} |
|---|
| 139 |
|
|---|
| 140 |
/********************************************************************** |
|---|
| 141 |
|
|---|
| 142 |
Iterate over a set of tokens, exposing a token count |
|---|
| 143 |
starting at zero |
|---|
| 144 |
|
|---|
| 145 |
**********************************************************************/ |
|---|
| 146 |
|
|---|
| 147 |
int opApply (int delegate(inout int, inout T[]) dg) |
|---|
| 148 |
{ |
|---|
| 149 |
bool more; |
|---|
| 150 |
int result, |
|---|
| 151 |
tokens; |
|---|
| 152 |
|
|---|
| 153 |
do { |
|---|
| 154 |
more = consume; |
|---|
| 155 |
result = dg (tokens, slice); |
|---|
| 156 |
++tokens; |
|---|
| 157 |
} while (more && !result); |
|---|
| 158 |
return result; |
|---|
| 159 |
} |
|---|
| 160 |
|
|---|
| 161 |
/*********************************************************************** |
|---|
| 162 |
|
|---|
| 163 |
Locate the next token. Returns the token if found, null |
|---|
| 164 |
otherwise. Null indicates an end of stream condition. To |
|---|
| 165 |
sweep a conduit for lines using method next(): |
|---|
| 166 |
--- |
|---|
| 167 |
auto lines = new LineIterator!(char) (new FileConduit("myfile")); |
|---|
| 168 |
while (lines.next) |
|---|
| 169 |
Cout (lines.get).newline; |
|---|
| 170 |
--- |
|---|
| 171 |
|
|---|
| 172 |
Alternatively, we can extract one line from a conduit: |
|---|
| 173 |
--- |
|---|
| 174 |
auto line = (new LineIterator!(char) (new FileConduit("myfile"))).next; |
|---|
| 175 |
--- |
|---|
| 176 |
|
|---|
| 177 |
The difference between next() and foreach() is that the |
|---|
| 178 |
latter processes all tokens in one go, whereas the former |
|---|
| 179 |
processes in a piecemeal fashion. To wit: |
|---|
| 180 |
--- |
|---|
| 181 |
foreach (line; new LineIterator!(char) (new FileConduit("myfile"))) |
|---|
| 182 |
Cout(line).newline; |
|---|
| 183 |
--- |
|---|
| 184 |
|
|---|
| 185 |
Note that tokens exposed via push() are returned immediately |
|---|
| 186 |
when available, taking priority over the input stream itself |
|---|
| 187 |
|
|---|
| 188 |
***********************************************************************/ |
|---|
| 189 |
|
|---|
| 190 |
final T[] next () |
|---|
| 191 |
{ |
|---|
| 192 |
if (pushed.ptr) |
|---|
| 193 |
return pushed; |
|---|
| 194 |
else |
|---|
| 195 |
if (consume() || slice.length) |
|---|
| 196 |
return slice; |
|---|
| 197 |
return null; |
|---|
| 198 |
} |
|---|
| 199 |
|
|---|
| 200 |
/*********************************************************************** |
|---|
| 201 |
|
|---|
| 202 |
Set the content of the current slice |
|---|
| 203 |
|
|---|
| 204 |
***********************************************************************/ |
|---|
| 205 |
|
|---|
| 206 |
protected final uint set (T* content, uint start, uint end) |
|---|
| 207 |
{ |
|---|
| 208 |
slice = content [start .. end]; |
|---|
| 209 |
return end; |
|---|
| 210 |
} |
|---|
| 211 |
|
|---|
| 212 |
/*********************************************************************** |
|---|
| 213 |
|
|---|
| 214 |
Called when a scanner fails to find a matching pattern. |
|---|
| 215 |
This may cause more content to be loaded, and a rescan |
|---|
| 216 |
initiated |
|---|
| 217 |
|
|---|
| 218 |
***********************************************************************/ |
|---|
| 219 |
|
|---|
| 220 |
protected final uint notFound () |
|---|
| 221 |
{ |
|---|
| 222 |
return IConduit.Eof; |
|---|
| 223 |
} |
|---|
| 224 |
|
|---|
| 225 |
/*********************************************************************** |
|---|
| 226 |
|
|---|
| 227 |
Invoked when a scanner matches a pattern. The provided |
|---|
| 228 |
value should be the index of the last element of the |
|---|
| 229 |
matching pattern, which is converted back to a void[] |
|---|
| 230 |
index. |
|---|
| 231 |
|
|---|
| 232 |
***********************************************************************/ |
|---|
| 233 |
|
|---|
| 234 |
protected final uint found (uint i) |
|---|
| 235 |
{ |
|---|
| 236 |
return (i + 1) * T.sizeof; |
|---|
| 237 |
} |
|---|
| 238 |
|
|---|
| 239 |
/*********************************************************************** |
|---|
| 240 |
|
|---|
| 241 |
See if set of characters holds a particular instance |
|---|
| 242 |
|
|---|
| 243 |
***********************************************************************/ |
|---|
| 244 |
|
|---|
| 245 |
protected final bool has (T[] set, T match) |
|---|
| 246 |
{ |
|---|
| 247 |
foreach (T c; set) |
|---|
| 248 |
if (match is c) |
|---|
| 249 |
return true; |
|---|
| 250 |
return false; |
|---|
| 251 |
} |
|---|
| 252 |
|
|---|
| 253 |
/*********************************************************************** |
|---|
| 254 |
|
|---|
| 255 |
Consume the next token and place it in 'slice'. Returns |
|---|
| 256 |
true when there are potentially more tokens |
|---|
| 257 |
|
|---|
| 258 |
***********************************************************************/ |
|---|
| 259 |
|
|---|
| 260 |
protected bool consume () |
|---|
| 261 |
{ |
|---|
| 262 |
if (input.next (&scan)) |
|---|
| 263 |
return true; |
|---|
| 264 |
|
|---|
| 265 |
auto tmp = input.slice (buffer.readable); |
|---|
| 266 |
slice = (cast(T*) tmp.ptr) [0 .. tmp.length/T.sizeof]; |
|---|
| 267 |
return false; |
|---|
| 268 |
} |
|---|
| 269 |
|
|---|
| 270 |
|
|---|
| 271 |
/**********************************************************************/ |
|---|
| 272 |
/************************ Buffered Interface **************************/ |
|---|
| 273 |
/**********************************************************************/ |
|---|
| 274 |
|
|---|
| 275 |
|
|---|
| 276 |
/*********************************************************************** |
|---|
| 277 |
|
|---|
| 278 |
Return the associated buffer |
|---|
| 279 |
|
|---|
| 280 |
***********************************************************************/ |
|---|
| 281 |
|
|---|
| 282 |
final IBuffer buffer () |
|---|
| 283 |
{ |
|---|
| 284 |
return input; |
|---|
| 285 |
} |
|---|
| 286 |
|
|---|
| 287 |
/**********************************************************************/ |
|---|
| 288 |
/********************** InputStream Interface *************************/ |
|---|
| 289 |
/**********************************************************************/ |
|---|
| 290 |
|
|---|
| 291 |
|
|---|
| 292 |
/*********************************************************************** |
|---|
| 293 |
|
|---|
| 294 |
Return the host conduit |
|---|
| 295 |
|
|---|
| 296 |
***********************************************************************/ |
|---|
| 297 |
|
|---|
| 298 |
final IConduit conduit () |
|---|
| 299 |
{ |
|---|
| 300 |
return input.conduit; |
|---|
| 301 |
} |
|---|
| 302 |
|
|---|
| 303 |
/*********************************************************************** |
|---|
| 304 |
|
|---|
| 305 |
Read from conduit into a target array. The provided dst |
|---|
| 306 |
will be populated with content from the conduit. |
|---|
| 307 |
|
|---|
| 308 |
Returns the number of bytes read, which may be less than |
|---|
| 309 |
requested in dst |
|---|
| 310 |
|
|---|
| 311 |
***********************************************************************/ |
|---|
| 312 |
|
|---|
| 313 |
uint read (void[] dst) |
|---|
| 314 |
{ |
|---|
| 315 |
return input.read (dst); |
|---|
| 316 |
} |
|---|
| 317 |
|
|---|
| 318 |
/*********************************************************************** |
|---|
| 319 |
|
|---|
| 320 |
Clear any buffered content |
|---|
| 321 |
|
|---|
| 322 |
***********************************************************************/ |
|---|
| 323 |
|
|---|
| 324 |
final InputStream clear () |
|---|
| 325 |
{ |
|---|
| 326 |
return input.clear; |
|---|
| 327 |
} |
|---|
| 328 |
|
|---|
| 329 |
/*********************************************************************** |
|---|
| 330 |
|
|---|
| 331 |
Close the input |
|---|
| 332 |
|
|---|
| 333 |
***********************************************************************/ |
|---|
| 334 |
|
|---|
| 335 |
final void close () |
|---|
| 336 |
{ |
|---|
| 337 |
input.close; |
|---|
| 338 |
} |
|---|
| 339 |
} |
|---|