Changeset 84

Show
Ignore:
Timestamp:
12/17/05 09:09:42 (3 years ago)
Author:
pragma
Message:

Updated the bejezus out of this thing.


Currently Supported

  • character classes (including inverse char classes via [...])
  • match one or more (+)
  • match zero or more (*)
  • match zero or one (?)
  • escape sequences
  • whitespace matching (ws chars are treated literally right now)
  • {n} and {n,m} predicates
  • at the start of an expression
  • $ at the end of an expression
  • grouping via ()
  • most standard escape sequences
  • union operator (outside of parens)
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/meta/regex.d

    r83 r84  
    3131     
    3232    -------------------- 
    33     auto exp = &regexMatch!("[a-z]* *world"); 
     33    auto exp = &regexMatch!("[a-z]*\s*\w*"); 
    3434    writefln("matches: %s",exp("hello    world"); 
    3535    -------------------- 
     
    3838    portions of your expression, which make up the runtime expression engine.  The 
    3939    resulting generated code set and call tree are absolutely minimalistic and match 
    40     the original expression's logic one-for-one. 
    41      
     40    the original expression's logic with as few productions as possible. 
     41        
    4242    Currently Supported 
    43     - character classes 
     43    - character classes (including inverse char classes via [^...]) 
    4444    - match one or more (+) 
    4545    - match zero or more (*) 
    4646    - match zero or one (?) 
    47      
    48     Planned 
    4947    - escape sequences 
    5048    - whitespace matching (ws chars are treated literally right now) 
    51     - {n,m} syntax for predicates 
    52     - union operator (|) 
    53 */ 
    54 /* 
    55     Problems with D templates  
    56     (Eric: these are not reported as bugs as of this time as I still have to write proper reproduction cases) 
    57  
    58     - Cannot use an alias as a term within a static if 
    59     // this is why there are so many templates in this file 
    60     [[DAC: This is part of the rule that 'static if' can't refer to variables declared in the same scope. 
    61     It makes it difficult to do complicated functions. 
    62     But, there's a bug in the spec for static if, the "AssignExpression" doesn't make sense.]] 
     49    - {n} and {n,m} predicates 
     50    - ^ at the start of an expression 
     51    - $ at the end of an expression 
     52    - grouping via () 
     53    - most standard escape sequences 
     54    - union operator (outside of parens) 
     55 
     56    Planned 
     57    - {,m} and {n,} predicates 
     58    - different match operations other than first match 
     59    - move pipe to low parsing precedence rather than highest (support inside parens) 
    6360     
    64     - No way to stop the compiler dead if there's an error ("pragma(halt)" would be nice) 
    65     static assert(false); // flags, but keeps going 
    66      
    67     - Templates do not always specialize on alias params (bad), but they do on value params (good) 
    68     template foobar(alias gorf){} 
    69     foobar!(a); 
    70     foobar!(b); // both are the same exact template 
    71     // the above is why the param 'key' is used all over regexPredicate.d    
    72     // (!) it can also lend to a form of namespace pollution where  
    73     // multiple compiled regexps overlap (very bad!) 
    74      
    75     - I've also had an unspecified problem with some boolean expressions in static if() statments. 
     61    Possible?! 
     62    - \d (decimal) 
     63    - \o (octal) 
     64    - multi-line matching semantics (like ^ and $ matching \n and such) 
     65    - \b word boundary 
     66    - \B non word boundary 
     67    - lazy matching (current implementation is greedy) 
     68    - replacement expressions 
     69    - named groups 
    7670*/ 
    7771module meta.regex; 
     
    8276 
    8377import meta.regexPredicate; 
     78 
     79template getAt(char[] str,uint index){ 
     80    const char getAt = str[index]; 
     81} 
     82 
     83template first(char[] str){ 
     84    const char first = str[0]; 
     85} 
     86 
     87template last(char[] str){ 
     88    const char last = getAt!(str,strlen!(str)-1); 
     89} 
    8490 
    8591template isSpecial(char[] pattern){ 
     
    9197        pattern[0] == '[' || 
    9298        pattern[0] == '{' || 
    93         pattern[0] == '}' || 
    94         pattern[0] == ']' || 
     99        pattern[0] == '(' || 
     100        //pattern[0] == ')' || 
     101        //pattern[0] == '}' || 
     102        //pattern[0] == ']' || 
     103        pattern[0] == '$' || 
     104        pattern[0] == '^' || 
    95105        pattern[0] == '\\'   
    96106    ){ 
     
    116126} 
    117127 
     128/// parses up to and including terminator.  Returns everything up to terminator. 
     129template parseUntil(char[] pattern,char terminator,bit fuzzy=false){ 
     130    static if(strlen!(pattern) > 0){ 
     131        static if(pattern[0] == '\\'){ 
     132            static if(strlen!(pattern) > 1){ 
     133                const char[] nextSlice = sliceheadoff!(pattern,2); 
     134                alias .parseUntil!(nextSlice,terminator,fuzzy) next; 
     135                const char[] token = slice!(pattern,0,2) ~ next.token;   
     136                const uint consumed = next.consumed+2; 
     137            } 
     138            else{ 
     139                pragma(msg,"Error: exptected character to follow \\"); 
     140                static assert(false); 
     141            } 
     142        } 
     143        else static if(pattern[0] == terminator){ 
     144            const char[] token=""; 
     145            const uint consumed = 1; 
     146        } 
     147        else{ 
     148            const char[] nextSlice = sliceheadoff!(pattern,1); 
     149            alias .parseUntil!(nextSlice,terminator,fuzzy) next; 
     150            const char[] token = slice!(pattern,0,1) ~ next.token; 
     151            const uint consumed = next.consumed+1; 
     152        } 
     153    } 
     154    else static if(fuzzy){ 
     155        const char[] token = ""; 
     156        const uint consumed = 0; 
     157    } 
     158    else{ 
     159        pragma(msg,"Error: exptected " ~ makechar!(terminator) ~ " to terminate group expression"); 
     160        static assert(false); 
     161    }            
     162} 
     163 
     164// shim for parseUint 
     165template charToUint(char[] value){ 
     166    const uint charToUint = value[0]; 
     167} 
     168 
     169template parseUint(char[] pattern,uint prev=0){ 
     170    static if(strlen!(pattern) > 0){ 
     171        static if(pattern[0] >= '0' && pattern[0] <= '9'){ 
     172            const uint thisValue = (charToUint!(pattern)-'0') + prev*10; 
     173            alias .parseUint!(sliceheadoff!(pattern,1),thisValue) next; 
     174            const uint consumed = next.consumed+1; 
     175            const uint value = next.value; 
     176        } 
     177        else{ 
     178            const uint consumed = 0; 
     179            const uint value = prev; 
     180        } 
     181    } 
     182    else{ 
     183        const uint consumed = 0; 
     184        const uint value = prev; 
     185    }            
     186} 
     187 
    118188template regexCompileCharClassRecurse(alias termFn,char[] pattern){ 
    119189    static if(strlen!(pattern) > 0){ 
     
    121191            debug pragma(msg,"REMAINING: " ~ pattern); 
    122192             
    123             alias regexCompileCharClass!(pattern) next; 
     193            alias regexCompileCharClass2!(pattern) next; 
    124194            alias testOr!(termFn,next.fn,pattern) fn; 
    125195            const uint consumed = next.consumed; 
     
    137207} 
    138208 
    139 template regexCompileCharClass(char[] pattern){ 
     209template regexCompileCharClass2(char[] pattern){ 
     210    static if(strlen!(pattern) > 0){ 
     211        static if(pattern[0] == '\\'){ 
     212            static if(strlen!(pattern) == 1){ 
     213                pragma(msg,"Error: expected character following \\ in character class"); 
     214                static assert(false); 
     215            } 
     216            else static if(pattern[1] == ']'){ 
     217                alias testChar!("]") fn; 
     218                const uint thisConsumed = 2; 
     219            } 
     220            else static if(pattern[1] == '^'){ 
     221                alias testChar!("^") fn; 
     222                const uint thisConsumed = 2; 
     223            } 
     224            else static if(pattern[1] == '-'){ 
     225                alias testChar!("-") fn; 
     226                const uint thisConsumed = 2; 
     227            } 
     228            else{ 
     229                alias regexCompileEscape!(sliceheadoff!(pattern,1)) term; 
     230                alias term.fn termFn; 
     231                const uint thisConsumed = term.consumed+1; 
     232            } 
     233             
     234            const char[] remaining = slice!(pattern,thisConsumed,strlen!(pattern)); 
     235        } 
     236        else{ 
     237            //NOTE: read ahead up to two chars for a range expression. 
     238            //NOTE: should probably be refactored off to something else 
     239            static if(strlen!(pattern) > 1){ 
     240                static if(pattern[1] == '-'){ 
     241                    static if(strlen!(pattern) > 2){ 
     242                        alias testRange!(slice!(pattern,0,1),slice!(pattern,2,3)) termFn; 
     243                        const uint thisConsumed = 3; 
     244                        const char[] remaining = slice!(pattern,3,strlen!(pattern)); 
     245                    } 
     246                    else{ // length is 2 
     247                        pragma(msg,"Error: expected character following '-' in character class"); 
     248                        static assert(false);    
     249                    } 
     250                } 
     251                else{ // not '-' 
     252                    alias testChar!(slice!(pattern,0,1)) termFn; 
     253                    const uint thisConsumed = 1; 
     254                    const char[] remaining = slice!(pattern,1,strlen!(pattern));                         
     255                } 
     256            } 
     257            else{ 
     258                alias testChar!(slice!(pattern,0,1)) termFn; 
     259                const uint thisConsumed = 1; 
     260                const char[] remaining = slice!(pattern,1,strlen!(pattern)); 
     261            } 
     262        } 
     263        alias regexCompileCharClassRecurse!(termFn,remaining) recurse; 
     264        alias recurse.fn fn; 
     265        const uint consumed = recurse.consumed + thisConsumed; 
     266    } 
     267    else{ 
     268        //TODO: trigger error 
     269        alias testEmpty!() fn; 
     270        const uint consumed = 0; 
     271    } 
     272    debug pragma(msg,"regexCompileCharClass2 consumed:" ~ itoa!(consumed)); 
     273
     274 
     275template regexCompileCharClass(char[] pattern){  
    140276    static if(strlen!(pattern) > 0){ 
    141277        static if(pattern[0] == ']'){ 
     
    143279            const uint consumed = 0; 
    144280        } 
    145         else{ 
    146             static if(pattern[0] == '\\'){ 
    147                 static if(strlen!(pattern) == 1){ 
    148                     pragma(msg,"Error: expected character following \\ in character class"); 
    149                     static assert(false); 
    150                 } 
    151                 else static if(pattern[1] == ']'){ 
    152                     alias testChar!("]") term; 
    153                 } 
    154                 else static if(pattern[1] == '^'){ 
    155                     alias testChar!("^") term; 
    156                 } 
    157                 else static if(pattern[1] == '-'){ 
    158                     alias testChar!("-") term; 
    159                 } 
    160                 else{ 
    161                     pragma(msg,"Error: expected either of ]^- following \\ in character class"); 
    162                     static assert(false); 
    163                 } 
    164                 const uint thisConsumed = 2; 
    165                 const char[] remaining = slice!(pattern,2,strlen!(pattern)); 
    166             } 
    167             else{ 
    168                 //NOTE: read ahead up to two chars for a range expression. 
    169                 //NOTE: should probably be refactored off to something else 
    170                 static if(strlen!(pattern) > 1){ 
    171                     static if(pattern[1] == '-'){ 
    172                         static if(strlen!(pattern) > 2){ 
    173                             alias testRange!(slice!(pattern,0,1),slice!(pattern,2,3)) term; 
    174                             const uint thisConsumed = 3; 
    175                             const char[] remaining = slice!(pattern,3,strlen!(pattern)); 
    176                         } 
    177                         else{ // length is 2 
    178                             pragma(msg,"Error: expected character following '-' in character class"); 
    179                             static assert(false);    
    180                         } 
    181                     } 
    182                     else{ // not '-' 
    183                         alias testChar!(slice!(pattern,0,1)) term; 
    184                         const uint thisConsumed = 1; 
    185                         const char[] remaining = slice!(pattern,1,strlen!(pattern));                         
    186                     } 
    187                 } 
    188                 else{ 
    189                     alias testChar!(slice!(pattern,0,1)) term; 
    190                     const uint thisConsumed = 1; 
    191                     const char[] remaining = slice!(pattern,1,strlen!(pattern)); 
    192                 } 
    193             } 
    194             alias regexCompileCharClassRecurse!(term,remaining) recurse; 
    195             alias recurse.fn fn; 
    196             const uint consumed = recurse.consumed + thisConsumed; 
    197         } 
    198     }    
    199     else{ 
    200         alias testEmpty!() fn; 
    201         const uint consumed = 0; 
    202     } 
    203     debug pragma(msg,"regexCompileCharClass2 consumed:" ~ itoa!(consumed)); 
     281        else static if(pattern[0] == '^'){ 
     282            pragma(msg,"foobar"); 
     283            alias regexCompileCharClass2!(sliceheadoff!(pattern,1)) charClass; 
     284            alias testCharInverse!(charClass.fn,pattern) inverseCharClass; 
     285            alias inverseCharClass fn; 
     286            const uint consumed = charClass.consumed + 1; 
     287        } 
     288        else{ 
     289            alias regexCompileCharClass2!(pattern) charClass; 
     290            alias charClass.fn fn; 
     291            const uint consumed = charClass.consumed; 
     292        } 
     293    } 
     294    else{ 
     295        pragma(msg,"Error: expected closing ']' for character class"); 
     296        static assert(false);    
     297    } 
     298
     299 
     300// shim to assist with {n,m} notation 
     301template validateMaxToken(uint tokenLength,uint consumed,uint value){ 
     302    static if(consumed == 0 || consumed < tokenLength){ 
     303        pragma(msg,"Error: expected expression in the format of {n,m}"); 
     304        static assert(false); 
     305    } 
     306    const uint max = value; 
     307
     308 
     309// shim to assist with {n,m} notation 
     310template parseMaxPredicate(uint min,char[] token){ 
     311    static if(strlen!(token) > 0){ 
     312        static if(getAt!(token,0) == ',' && strlen!(token) > 1){ 
     313            alias parseUint!(sliceheadoff!(token,1)) maxToken; 
     314            const uint max = validateMaxToken!(strlen!(token),maxToken.consumed+1,maxToken.value).max; 
     315        } 
     316        else{ 
     317            pragma(msg,"Error: expected expression in the format of {n,m}"); 
     318            static assert(false); 
     319        } 
     320    } 
     321    else{ 
     322        const uint max = min; 
     323    } 
    204324} 
    205325 
     
    218338            const uint consumed = 1; 
    219339        } 
    220         //TODO: add lazy matching and #of times operators 
     340        else static if(pattern[0] == '{'){ 
     341            const char[] token = parseUntil!(sliceheadoff!(pattern,1),'}').token; 
     342            static if(strlen!(token) == 0){ 
     343                pragma(msg,"Error: expected number inside {n} expression"); 
     344                static assert(false); 
     345            } 
     346             
     347            alias parseUint!(token) minToken; 
     348            const uint min = minToken.value;     
     349            uint max = parseMaxPredicate!(min,sliceheadoff!(token,minToken.consumed)).max;           
     350 
     351            alias testTimes!(min,max,test,token) fn; 
     352            const uint consumed = strlen!(token)+2; 
     353            debug pragma(msg,"consumed: " ~ itoa!(consumed)); 
     354        } 
    221355        else{ 
    222356            alias test fn; 
     
    230364} 
    231365 
     366template regexCompileEscape(char[] pattern){ 
     367    static if(strlen!(pattern) > 0){ 
     368        static if(pattern[0] == '\\'){ 
     369            alias testChar!("\\") fn; 
     370        }        
     371        //TODO: word boundary (/b) and non-word boundary (/B) 
     372        //TODO: /d (decimal) and /o (octal)? 
     373        else static if(pattern[0] == 'd'){ 
     374            // numeric chars 
     375            alias testRange!("0","9") fn; 
     376        } 
     377        else static if(pattern[0] == 'D'){ 
     378            // non numeric chars 
     379            alias testCharInverse!(testRange!("0","9"),pattern) fn; 
     380        } 
     381        else static if(pattern[0] == 'f'){ 
     382            // form feed 
     383            alias testChar!("\f") fn; 
     384        }        
     385        else static if(pattern[0] == 'n'){ 
     386            // newline 
     387            alias testChar!("\n") fn; 
     388        } 
     389        else static if(pattern[0] == 'r'){ 
     390            // carriage return 
     391            alias testChar!("\r") fn; 
     392        } 
     393        else static if(pattern[0] == 's'){ 
     394            // whitespace char 
     395            alias testRange!("\x00","\x20") fn; 
     396        } 
     397        else static if(pattern[0] == 'S'){ 
     398            //non-whitespace char 
     399            alias testCharInverse!(testRange!("\x00","\x20"),pattern) fn; 
     400        } 
     401        else static if(pattern[0] == 't'){ 
     402            //tab    
     403            alias testChar!("\t") fn; 
     404        } 
     405        else static if(pattern[0] == 'v'){ 
     406            //vertical tab 
     407            alias testChar!("\v") fn; 
     408        } 
     409        else static if(pattern[0] == 'w'){ 
     410            //word char 
     411            alias testWordChar!() fn; 
     412        } 
     413        else static if(pattern[0] == 'W'){ 
     414            alias testCharInverse!(testWordChar!()) fn; 
     415        } 
     416        else{ 
     417            alias testChar!(slice!(pattern,0,1)) fn; 
     418        } 
     419        const uint consumed = 1; 
     420    } 
     421    else{ 
     422        pragma(msg,"Error: expected char following '\\'"); 
     423        static assert(false); 
     424    } 
     425} 
     426 
    232427/// recursive portion of regexCompile - shim to work around alias scope issue 
    233428template regexCompileRecurse(alias term,char[] pattern){ 
     
    244439 
    245440/// recursive descent parser for regex strings 
     441//TODO: install pipe operator here and give regexCompile the 'consumed' protocol to make 
     442// partial passes of the pattern possible 
    246443template regexCompile(char[] pattern){ 
    247444    debug pragma(msg,"PATTERN: " ~ pattern); 
    248445    static if(strlen!(pattern) > 0){ 
    249446        static if(pattern[0] == '['){ 
    250             alias regexCompileCharClass!(slice!(pattern,1,strlen!(pattern))) charClass; 
     447            const char[] charClassToken = parseUntil!(slice!(pattern,1,strlen!(pattern)),']').token;             
     448            alias regexCompileCharClass!(charClassToken) charClass; 
    251449            const char[] token = slice!(pattern,0,charClass.consumed+2); 
    252450            const char[] next = slice!(pattern,charClass.consumed+2,strlen!(pattern)); 
    253451            alias charClass.fn test; 
    254452        } 
    255         //TODO: support for slashes, '.', '$' and any other simple productions 
    256     /*  else static if(pattern[0] == '\\'){ 
    257             //TODO: special matches 
    258             pragma(msg,"Error: Backslashes not supported"); 
     453        else static if(pattern[0] == '('){ 
     454            const char[] groupToken = parseUntil!(slice!(pattern,1,strlen!(pattern)),')').token; 
     455            alias regexCompile!(groupToken) groupExpression; 
     456            const char[] token = slice!(pattern,0,strlen!(groupToken)+2); 
     457            const char[] next = slice!(pattern,strlen!(groupToken)+2,strlen!(pattern));  
     458            alias groupExpression.fn test; 
     459        } 
     460        else static if(pattern[0] == '.'){ 
     461            const char[] token = "."; 
     462            const char[] next = sliceheadoff!(pattern,1); 
     463            alias testAny!() test; 
     464        } 
     465        else static if(pattern[0] == '\\'){ 
     466            alias regexCompileEscape!(sliceheadoff!(pattern,1)) escapeSequence; 
     467            const char[] token = slice!(pattern,0,escapeSequence.consumed+1); 
     468            const char[] next = sliceheadoff!(pattern,escapeSequence.consumed+1); 
     469            alias escapeSequence.fn test; 
     470        } 
     471        else static if(pattern[0] == '$'){ 
     472            pragma(msg,"Error: $ not allowed inside an expression (use \\$ instead)"); 
    259473            static assert(false); 
    260474        } 
    261         */ 
    262         else{ 
    263             //TODO: refactor to parse test text 
     475        else static if(pattern[0] == '^'){ 
     476            pragma(msg,"Error: ^ not allowed inside an expression (use \\^ instead)"); 
     477            static assert(false); 
     478        }        
     479        else{ 
    264480            const char[] token = parseTextToken!(pattern); 
     481            static assert(strlen!(token) > 0); 
    265482            const char[] next = slice!(pattern,strlen!(token),strlen!(pattern)); 
    266483            alias testText!(token) test; 
     
    280497} 
    281498 
    282 template regexMatch(char[] pattern){ 
     499 
     500//TODO: at this level, check each tokenized sub expression for starting with '^' or ending with '$', and 
     501// apply the appropriate matching algorithm. 
     502 
     503/// recursive portion of regexCompile - shim to work around alias scope issue 
     504template compileMatchRecurse(alias termFn,char[] pattern){ 
     505    static if(strlen!(pattern) > 0){ 
     506        debug pragma(msg,"REMAINING: " ~ pattern); 
     507         
     508        alias compileMatch!(pattern) next; 
     509        alias matchUnion!(termFn,next.fn,pattern) fn; 
     510    } 
     511    else{ 
     512        alias termFn fn; 
     513    } 
     514
     515 
     516// shim 
     517template compileMatch2(char[] token,char[] remaining){ 
     518    static if(last!(token) == '$'){ 
     519        static if(first!(token) == '^'){ 
     520            alias matchTestPerfect!(regexCompile!(slicetailoff!(token,1)).fn,remaining) termFn; 
     521        } 
     522        else{ 
     523            //TODO: refactor by reversing the string (should make for a faster match) 
     524            alias matchTestFromEnd!(regexCompile!(slicetailoff!(token,1)).fn,remaining) termFn; 
     525        } 
     526    } 
     527    else static if(first!(token) == '^'){ 
     528        alias matchTestFromStart!(regexCompile!(sliceheadoff!(token,1)).fn,remaining) termFn; 
     529    } 
     530    else{ 
     531        alias matchTest!(regexCompile!(token).fn,remaining) termFn; 
     532    }            
     533    alias compileMatchRecurse!(termFn,remaining).fn fn; 
     534
     535 
     536template compileMatch(char[] pattern){ 
     537    static if(strlen!(pattern) > 0){ 
     538        alias parseUntil!(pattern,'|',true) unionToken; 
     539         
     540        const char[] token = unionToken.token; 
     541        const char[] remaining = sliceheadoff!(pattern,unionToken.consumed); 
     542         
     543        debug pragma(msg,"TOKEN: " ~ token); 
     544        debug pragma(msg,"REMAINING: " ~ remaining);     
     545         
     546        alias compileMatch2!(token,remaining).fn fn; 
     547    } 
     548    else{ 
     549        alias matchTest!(testEmpty!(),pattern) fn; 
     550    } 
    283551    //TODO: parse support for unions (another parser layer most likely) 
    284     alias matchTest!(regexCompile!(pattern).fn,pattern) regexMatch; 
    285 
     552
     553 
     554template regexMatch(char[] pattern){     
     555    //alias matchTest!(regexCompile!(pattern).fn,pattern) regexMatch; 
     556    alias compileMatch!(pattern).fn regexMatch; 
     557
  • trunk/meta/regexPredicate.d

    r79 r84  
    3030import meta.string; 
    3131 
     32/* TODO: support all of these 
     33exec 
     34 
     35    A regular expression method that executes a search for a match in a string. It returns an array of information. 
     36 
     37test 
     38 
     39    A regular expression method that tests for a match in a string. It returns true or false. 
     40 
     41match 
     42 
     43    A String method that executes a search for a match in a string. It returns an array of information or null on a mismatch. 
     44 
     45search 
     46 
     47    A String method that tests for a match in a string. It returns the index of the match, or -1 if the search fails. 
     48 
     49replace 
     50 
     51    A String method that executes a search for a match in a string, and replaces the matched substring with a replacement substring. 
     52 
     53split 
     54 
     55    A String method that uses a regular expression or a fixed string to break a string into an array of substrings. 
     56*/ 
     57 
    3258//match routines 
    3359alias char[][] function(char[] str) MatchPredicate; 
    3460alias const noMatch = (char[][]).init ; 
    3561 
     62// unions the results of two matches together 
    3663template matchUnion(alias firstMatch,alias secondMatch,char[] key){ 
    3764    char[][] matchUnion(char[] str){ 
    38         char[][] results; 
    39         results ~= firstMatch(str); 
     65        char[][] results = firstMatch(str); 
    4066        results ~= secondMatch(str); 
    4167        return results; 
     
    4369} 
    4470 
    45  
     71// attempts a single basic match from the start of the string only 
    4672template matchTest(alias testPredicate,char[] key){ 
    4773    char[][] matchTest(char[] str){ 
    4874        char[][] results; 
    4975        int result = testPredicate(str); 
    50         if(result != testFail){ 
     76        if(result != testFail && result > 0){ 
     77            results ~= str[0..result]; 
     78        } 
     79        return results; 
     80    } 
     81
     82 
     83/* 
     84// aggressive test- tests every possible substring for matches 
     85//NOTE: you probably should never use this 
     86template matchAggressive(alias testPredicate,char[] key){ 
     87    char[][] matchAggressive(char[] str){ 
     88        char[][] results; 
     89        for(uint start=0; start<str.length; start++){ 
     90            for(uint end=str.length; end>start; end--){ 
     91                int result = testPredicate(str[start..end]); 
     92                if(result != testFail && result == end-start){ 
     93                    results ~= str[start..result]; 
     94                } 
     95            } 
     96        } 
     97        return results; 
     98    } 
     99}*/ 
     100 
     101// tests all substrings that start at the start of string 
     102template matchTestFromStart(alias testPredicate,char[] key,bit aggressive=false){ 
     103    char[][] matchTestFromStart(char[] str){ 
     104        for(uint end=str.length; end>0; end--){ 
     105            char[][] results; 
     106            int result = testPredicate(str[0..end]); 
     107            if(result != testFail && result > 0){ 
     108                results ~= str[0..result]; 
     109                return results;  
     110            } 
     111        } 
     112        return results;      
     113    } 
     114
     115 
     116// tests all substrings that terminate at the string's end 
     117//TODO: refactor by reversing the string (should make for a faster match) 
     118template matchTestFromEnd(alias testPredicate,char[] key,bit aggressive=false){ 
     119    char[][] matchTestFromEnd(char[] str){ 
     120        char[][] results; 
     121        for(uint start=0; start<str.length; start++){ 
     122            int result = testPredicate(str[start..$]); 
     123            if(result != testFail && result == str.length-start){ 
     124                results ~= str[start..$]; 
     125                static if(!aggressive) return results;   
     126            } 
     127        } 
     128        return results;  
     129    }    
     130
     131 
     132// test must completely cover the entire string 
     133template matchTestPerfect(alias testPredicate,char[] key){ 
     134    char[][] matchTestFromStart(char[] str){ 
     135        char[][] results; 
     136        int result = testPredicate(str); 
     137        if(result != testFail && result == str.length){ 
    51138            results ~= str[0..result]; 
    52139        } 
     
    170257 
    171258template testAny(){ 
    172     char[] testAny(char[] str){ 
     259    int testAny(char[] str){ 
    173260        if(str.length == 0) return testFail; 
    174261        //TODO: check for newline (some regexps dont' test this) 
     
    187274} 
    188275 
    189 template testTimes(uint min,uint max,alias testPredicate,char[] key){ 
    190     int testTimes(char[] str){ 
    191         if(str.length == 0) return testFail; 
    192         int result = 0; 
    193         uint i; 
    194         for(i=0; i<max; i++){ 
    195             int nextResult = testPredicate(str[result..$]); 
    196             if(nextResult == testFail){ 
    197                 if(i < min) return testFail; 
    198                 break; 
    199             } 
    200             result += nextResult; 
    201         } 
    202         return result; 
    203     } 
    204 
     276template testWordChar(){ 
     277    int testWordChar(char[] str){ 
     278        if(str.length == 0) return testFail; 
     279        if(  
     280            (str[0] >= 'a' && str[0] <= 'z') || 
     281            (str[0] >= 'A' && str[0] <= 'Z') || 
     282            (str[0] >= '0' && str[0] <= '9') || 
     283            str[0] == '_' 
     284        ){ 
     285            return 1; 
     286        } 
     287        return testFail; 
     288    } 
     289
     290 
     291template testCharInverse(alias testPredicate,char[] key){ 
     292    int testCharInverse(char[] str){ 
     293        if(testPredicate(str) == testFail) return 1; 
     294        return testFail; 
     295    } 
     296