Changeset 155
- Timestamp:
- 02/25/06 09:05:10 (3 years ago)
- Files:
-
- trunk/meta/regexp2.d (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/meta/regexp2.d
r154 r155 9 9 // A "naked term" is either a "sequence" or an "atom". 10 10 11 12 // Features currently supported: 13 // Non-greedy quantifiers: * + ? {m} {m,} {m,n} 14 // match character 15 // . match any character 16 // unions 17 // uncaptured grouping via ( ) 18 // ^ start of line 19 // @n match string variables passed into the functions as extra parameters. (this is a non-standard extension). 11 /* 12 Features currently supported: 13 * match previous expression 0 or more times 14 + match previous expression 1 or more times 15 ? match previous expression 0 or 1 times 16 {m,n} match previous expression between m and n times 17 {m,} match previous expression m or more times 18 {,n} match previous expression between 0 and m times 19 {n} match previous expression exactly n times. 20 . match any character 21 other characters match themselves 22 a|b match regular expression a or b 23 uncaptured grouping via ( ) 24 ^ start of line 25 $ end of line 26 [abc] match any character in character class abc 27 [^abc] match any character not in character class abc 28 @n match string variables passed into the functions as extra parameters. (this is a non-standard extension). 29 30 All matches are lazy, rather than greedy. 31 32 Planned, but not yet supported: 33 captured grouping 34 escape characters 35 word matching 36 greedy matching 37 full diagnostic error messages 38 39 Possible: 40 an optimisation pass, including features such as searching the pattern in reverse order. 41 \1..\9 to match previously captured subsequences 42 43 */ 20 44 21 45 // Points of interest: … … 268 292 269 293 // Evaluate one term (without quantifier). 270 // The ONLY purpose of this is to restore the 'p' pointer when we return. 294 // This helper class has two purposes: 295 // (1) to restore the 'p' pointer when we return. 296 // (2) ensure that at least one character was consumed 271 297 template regSequenceDontUpdateP(char [] regstr) 272 298 { 273 299 bool fn() { 274 300 mixin regSequence!(regstr, endSequence) x; 301 // It's only a successful match if _something_ was consumed 302 if (p==theinitialp) return false; 275 303 int oldp = p; 276 304 if (!x.fn()) return false; … … 286 314 bool fn() 287 315 { 316 // While evaluating this first sequence, if this is a sequence 317 // that potentially has zero length (ie, everything is a *, ? or {m,} term), 318 // each term should attempt to consume at least one character if possible. 319 int theinitialp = p; 288 320 mixin regSequenceDontUpdateP!(regstr[1..t-1]) suddendeath; 289 321 mixin regSequence!(regstr[1..t-1], suddendeath) a; … … 292 324 } else { 293 325 bool fn() { 326 // It's easy with atoms, because we know they always eat something. 327 // BUG: Maybe this will fail when null @n strings are passed in? 294 328 mixin regAtom!(regstr) a; 295 329 if (!a.fn()) return false; … … 316 350 const uint repmax = quantifierMax!(regstr[t..$]); 317 351 318 // HORRENDOUSLY inefficient! In some cases, we generate the quantified term T WICE.352 // HORRENDOUSLY inefficient! In some cases, we generate the quantified term THREE TIMES! 319 353 // The first one contains the rest of the search tree. 320 354 // This is used when we think we can do (atom).(next) for an early exit … … 377 411 static if (regstr[0]=='[') { 378 412 static if (regstr[1]=='^') 379 mixin regInvertedRange!(regstr[2..$-1]); 380 else 381 mixin regRange!(regstr[1..$-1]); 382 } else static if (regstr[0]=='.') { 413 { 414 bool fn() { // inverse character class 415 return (p<searchstr.length && !charMatches!(regstr[2..$-1])(searchstr[p++])); 416 } 417 } else { 418 bool fn() { // character class 419 return (p<searchstr.length && charMatches!(regstr[1..$-1])(searchstr[p++])); 420 } 421 } 422 } else static if (regstr[0]=='.') { // match any 383 423 bool fn() { 384 424 if (p==searchstr.length) return false; … … 413 453 } 414 454 415 template regRange(char [] regstr) 416 { 417 bool fn() { 418 pragma(msg, "Parsing range: " ~ regstr ~ " NOT YET IMPLEMENTED"); 419 return true; 420 } 421 } 422 423 template regInvertedRange(char [] regstr) 424 { 425 bool fn() { 426 pragma(msg, "Parsing inverted range: " ~ regstr ~ " NOT YET IMPLEMENTED"); 427 return true; 455 //"a-zA-Z0-9_" 456 457 // return true if char ch is matched by the character class regstr. 458 template charMatches(char [] regstr) 459 { 460 bool charMatches(char ch) { 461 static if (regstr.length==0) return false; 462 else static if (regstr.length>=3 && regstr[1]=='-') { 463 return (ch>=regstr[0] && ch<=regstr[2]) || charMatches!(regstr[3..$])(ch); 464 } else return (ch==regstr[0]) || charMatches!(regstr[1..$])(ch); 428 465 } 429 466 } … … 466 503 void main() 467 504 { 468 char [] qq; 469 writefln("UNIT TESTS\n"); 505 writefln("BEGINNING UNIT TESTS\n"); 470 506 assert(search!("ab")("aaab")=="ab"); 471 507 assert(search!("a*b")("aaab")=="aaab"); … … 474 510 assert(search!("(a*)b")("aaab")=="aaab"); 475 511 assert(search!("(b*a*)*b")("aaab")=="aaab"); 476 assert(search!("((a*b*)|da)*b")("fasdaaab")=="aaab");477 512 assert(search!("b+cd")("acdbbcabbcdaaab")=="bbcd"); 478 513 assert(search!("b?cd")("abcacbacdb")=="cd"); … … 482 517 assert(search!("(ab)*(abb)")("bababb")=="ababb"); 483 518 assert(search!("e?(ab)*b+")("eaaababbbbaac")=="ababb"); 484 519 assert(search!("(ab*)*c")("bbbababbaaabaaaabbbbc") == "ababbaaabaaaabbbbc"); 520 char [] quasistatic="m"; 521 assert(search!("(@1.*@1)")("they said D can't do metaprogramming?", quasistatic)=="metaprogram"); 522 assert(search!("[h-za]*g")("metaprogramming")=="taprog"); 523 assert(search!("(a*)*b")("cacaaab")=="aaab"); 524 assert(search!("(a*b*)*c")("dababdaabababbaaabbbcab")=="aabababbaaabbbc"); 525 526 assert(search!("((a*b*)|da)*b")("fasdaaab")=="daaab"); 527 528 char [] qq; 485 529 writefln("========="); 486 char [] quasistatic="m"; 487 qq = search!("(@1.{10}@1)")("they said D can't do metaprogramming?", quasistatic); 530 531 writefln("This example fails"); 532 qq = search!("((a*b*)|da)*b")("fasdaaab"); 488 533 writefln("Result: ----",qq, "---"); 489 534 writefln("========="); 490 535 491 /+492 writefln("Test 1: The match is: ", search!("ac?b+d*c?a*")("dsafjabbbbcaac"));493 writefln("Test 2: The match is: ", search!("e?(ab)*b+")("eaaababbbbaac"));494 writefln("Test 3: The match is: ", search!("(abc)*abb")("abbbbaac"));495 writefln("Test 3a: The match is: ", search!("(abc)*abb")("abcabcabbbaac"));496 writefln("Test 4: The match is: ", search!("a{3}b+")("aaabbbbcaac"));497 writefln("Test 5: The match is: ", search!("(z|a{3})b+|m.*")("aaabbbbcaac"));498 499 char [] quasistatic;500 quasistatic = "x";501 for (int i=0; i<3; ++i) {502 if (i==1) quasistatic = "a";503 if (i==2) quasistatic = "m";504 writefln("Test 6: The match is: ",505 search!("($1.{10}$1.*n)|g.*")("they said D can't do metaprogramming? ", quasistatic));506 }507 writefln("Test 7: The match is: ", search!("(z|a{3})b+|M.*")("zand this is Metaprogramming in D"));508 +/509 writefln("All Tests Pass");510 536 } 511 537 … … 530 556 // unit tests 531 557 //------------- 532 static assert(quantifierConsumed!("{456}345")==5); 533 static assert(parenConsumed!("(45(6)4)5")==8); 534 static assert(parenConsumed!(`(45\(6)45`)==7); 558 version (testmeta) { 559 static assert(quantifierConsumed!("{456}345")==5); 560 static assert(parenConsumed!("(45(6)4)5")==8); 561 static assert(parenConsumed!(`(45\(6)45`)==7); 562 }
