| 1 |
/******************************************************************************* |
|---|
| 2 |
|
|---|
| 3 |
copyright: Copyright (c) 2004 Kris Bell. All rights reserved |
|---|
| 4 |
|
|---|
| 5 |
license: BSD style: $(LICENSE) |
|---|
| 6 |
|
|---|
| 7 |
version: Initial release: January 2006 |
|---|
| 8 |
|
|---|
| 9 |
author: Kris |
|---|
| 10 |
|
|---|
| 11 |
*******************************************************************************/ |
|---|
| 12 |
|
|---|
| 13 |
module tango.text.stream.RegexIterator; |
|---|
| 14 |
|
|---|
| 15 |
public import tango.text.Regex; |
|---|
| 16 |
|
|---|
| 17 |
private import tango.text.stream.StreamIterator; |
|---|
| 18 |
|
|---|
| 19 |
/******************************************************************************* |
|---|
| 20 |
|
|---|
| 21 |
Iterate across a set of text patterns. |
|---|
| 22 |
|
|---|
| 23 |
Each pattern is exposed to the client as a slice of the original |
|---|
| 24 |
content, where the slice is transient. If you need to retain the |
|---|
| 25 |
exposed content, then you should .dup it appropriately. |
|---|
| 26 |
|
|---|
| 27 |
These iterators are based upon the IBuffer construct, and can |
|---|
| 28 |
thus be used in conjunction with other Iterators and/or Reader |
|---|
| 29 |
instances upon a common buffer ~ each will stay in lockstep via |
|---|
| 30 |
state maintained within the IBuffer. |
|---|
| 31 |
|
|---|
| 32 |
The content exposed via an iterator is supposed to be entirely |
|---|
| 33 |
read-only. All current iterators abide by this rule, but it is |
|---|
| 34 |
possible a user could mutate the content through a get() slice. |
|---|
| 35 |
To enforce the desired read-only aspect, the code would have to |
|---|
| 36 |
introduce redundant copying or the compiler would have to support |
|---|
| 37 |
read-only arrays. |
|---|
| 38 |
|
|---|
| 39 |
See LineIterator, SimpleIterator, RegexIterator, QuotedIterator. |
|---|
| 40 |
|
|---|
| 41 |
|
|---|
| 42 |
*******************************************************************************/ |
|---|
| 43 |
|
|---|
| 44 |
class RegexIterator : StreamIterator!(char) |
|---|
| 45 |
{ |
|---|
| 46 |
private Regex regex, delim; |
|---|
| 47 |
private alias char T; |
|---|
| 48 |
|
|---|
| 49 |
/*********************************************************************** |
|---|
| 50 |
|
|---|
| 51 |
Construct an uninitialized iterator. For example: |
|---|
| 52 |
--- |
|---|
| 53 |
auto lines = new LineIterator!(char); |
|---|
| 54 |
|
|---|
| 55 |
void somefunc (IBuffer buffer) |
|---|
| 56 |
{ |
|---|
| 57 |
foreach (line; lines.set(buffer)) |
|---|
| 58 |
Cout (line).newline; |
|---|
| 59 |
} |
|---|
| 60 |
--- |
|---|
| 61 |
|
|---|
| 62 |
Construct a streaming iterator upon a buffer: |
|---|
| 63 |
--- |
|---|
| 64 |
void somefunc (IBuffer buffer) |
|---|
| 65 |
{ |
|---|
| 66 |
foreach (line; new LineIterator!(char) (buffer)) |
|---|
| 67 |
Cout (line).newline; |
|---|
| 68 |
} |
|---|
| 69 |
--- |
|---|
| 70 |
|
|---|
| 71 |
Construct a streaming iterator upon a conduit: |
|---|
| 72 |
--- |
|---|
| 73 |
foreach (line; new LineIterator!(char) (new FileConduit ("myfile"))) |
|---|
| 74 |
Cout (line).newline; |
|---|
| 75 |
--- |
|---|
| 76 |
|
|---|
| 77 |
***********************************************************************/ |
|---|
| 78 |
|
|---|
| 79 |
this (T[] pattern, InputStream stream = null) |
|---|
| 80 |
{ |
|---|
| 81 |
regex = new Regex (pattern, ""); |
|---|
| 82 |
super (stream); |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
/********************************************************************** |
|---|
| 86 |
|
|---|
| 87 |
Iterate over a set of tokens, exposing the next delimiter |
|---|
| 88 |
|
|---|
| 89 |
**********************************************************************/ |
|---|
| 90 |
|
|---|
| 91 |
int opApply (int delegate(inout T[], inout Regex) dg) |
|---|
| 92 |
{ |
|---|
| 93 |
bool more; |
|---|
| 94 |
int result; |
|---|
| 95 |
|
|---|
| 96 |
do { |
|---|
| 97 |
delim = null; |
|---|
| 98 |
more = consume; //delim is set as a sideeffect |
|---|
| 99 |
result = dg (slice, delim); |
|---|
| 100 |
} while (more && !result); |
|---|
| 101 |
return result; |
|---|
| 102 |
} |
|---|
| 103 |
|
|---|
| 104 |
/*********************************************************************** |
|---|
| 105 |
|
|---|
| 106 |
***********************************************************************/ |
|---|
| 107 |
|
|---|
| 108 |
protected uint scan (void[] data) |
|---|
| 109 |
{ |
|---|
| 110 |
auto content = (cast(T*) data.ptr) [0 .. data.length / T.sizeof]; |
|---|
| 111 |
|
|---|
| 112 |
if (regex.test (content)) |
|---|
| 113 |
{ |
|---|
| 114 |
int start = regex.registers_[0]; |
|---|
| 115 |
int finish = regex.registers_[1]; |
|---|
| 116 |
delim = regex; |
|---|
| 117 |
set (content.ptr, 0, start); |
|---|
| 118 |
return found (finish-1); |
|---|
| 119 |
} |
|---|
| 120 |
|
|---|
| 121 |
return notFound; |
|---|
| 122 |
} |
|---|
| 123 |
} |
|---|