root/trunk/dparser/dparse.d

Revision 269, 38.4 kB (checked in by BCS, 5 months ago)

fixed a debug output issue
added a grammar fixer utility

Line 
1 /++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 This is a template meta-program that generates a recursive decent
3 parser. It is to be mixed into a class or struct. The parameters
4 for the template are: a string containing almost normal BNF
5 reduction definitions, and a string defining the start rule.
6
7 It extends BNF by allowing "|" for "or" but doesn't implement
8 optional or repeated terms.
9
10 $(TABLE
11 $(TR $(TD Grammar)  $(TD ::= Rule Grammar | Rule))
12 $(TR $(TD Rule)     $(TD ::= ID ":" Cases ";"))
13 $(TR $(TD Cases)    $(TD ::= Case "|" Alt | Case))
14 $(TR $(TD Case)     $(TD ::= ID "/" Sequence))
15 $(TR $(TD Sequence) $(TD ::= ID Sequence | ID))
16 $(TR $(TD Item)     $(TD ::= ID "[?+*]?";))
17 $(TR $(TD ID)       $(TD ::= "[A-Za-z][A-Za-z0-9]*";))
18 )
19
20 The term between the ':' and '/' is the name of the reduction
21 action. If a reduction is used but not defined a terminal must
22 be defined for it.
23
24 Example:
25
26 -----------------------------------------
27 struct set
28 {
29 /******* action code ********/
30 static PObject Action(char[] string : "MULTIPLY")(PObject[3] set)
31    {...}
32 static PObject Action(char[] string : "SUM"     )(PObject[3] set)
33    {...}
34 static PObject Action(char[] string : "PASS"    )(PObject[1] set)
35    {...}
36
37 /******** Terminal code *********/
38 static PObject Terminal(char[] name : "NUM")(IParser p)
39    {...}
40 static PObject Terminal(char[] name : "PLUS")(IParser p)
41    {...}
42 static PObject Terminal(char[] name : "TIMES")(IParser p)
43    {...}
44
45 // "ADD" indicates the root rule
46
47 mixin Parser!("ADD", ReduceWhite(
48     "PRIMARY : PASS/     NUM;
49      MUL     : MULTIPLY/ PRIMARY TIMES MUL
50              | PASS/     PRIMARY;
51      ADD     : SUM/      MUL PLUS ADD
52              | PASS/     MUL;
53     "));
54 }
55
56 void main()
57 {
58    auto a = new ExpGrammer;
59    a.data = "1 + 3 * 4 + 5 * 5 ";
60
61    Value v = cast(Value)set.Parser(a);
62
63    assert(v !is null,"ERR0R");
64    writef("%d\n", v.value);
65 }
66 ----------------
67
68 This meta program is distributed with no warranty of any kind.
69
70 Author: Benjamin Shropshire (shro8822drop_this at vandals DOT uidaho edu)
71
72 Please give credit where credit is due. Don't remove this notice.
73
74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++/
75 module syntax.dparse;
76
77 // Add not grammer, might use non-Action parser
78 private import std.stdio : writef;
79 private import std.string : ToString = toString ;
80
81 private import glue.templates;
82
83
84 /******************************************************************************
85 ********* PObject Hieracrcy
86 ******************************************************************************/
87
88
89 /** Base class for parsed objects
90 */
91 abstract class PObject
92 {
93     ///
94     abstract bool fail();
95
96     abstract char[] BaseName();
97 }
98
99
100 /************************************************
101     Common base type for ObjectVector types
102 */
103 class PObjectVectorBase : PObject
104 {
105     char[] BaseName(){return typeof(this).stringof;}
106     abstract char[] string();
107 }
108
109 /************************************************
110     A template sub class of PObject for basic tree building
111 */
112 class PObjectVector(uint i) : PObjectVectorBase
113 {
114     char[] BaseName(){return typeof(this).stringof;}
115     alias Tupleof!(i,PObject) type;
116     type data;
117     /// constructor taking i PObjects
118     this(type s)
119     {
120         foreach(int j, t; type)
121             data[j] = s[j];
122     }
123
124     this(PObject[i] d)
125     {
126         foreach(uint j, v; data)
127         {
128             data[j] = d[j];
129         }
130     }
131
132     PObject[] Get()
133     {
134         PObject[i] ret;
135         foreach(uint j, v; data)
136         {
137             ret[j] = data[j];
138         }
139         return ret.dup;
140     }
141
142     /// Get the j'th item
143     PObject Get(uint j)
144     {
145         switch(j)
146         {
147             foreach(int k, t; type)
148                 case k: return data[k];
149             default: throw new Error("Out of bound");
150         }
151     }
152
153     /// Get the j'th item
154     PObject GetT(uint j)()
155     {
156         static if(j<data.length)
157             return data[j];
158         else
159             static assert(false);
160     }
161
162     char[] string()
163     {
164         char[] ret =  "[";
165
166         foreach(i,_;type)
167         {
168             if(auto v = cast(PObjectVectorBase)data[i])
169             {
170                 ret ~= " " ~ v.string();
171             }
172             else if(auto v = cast(PObjectBoxBase)data[i])
173             {
174                 ret ~= " \"" ~ v.string()~\";
175             }
176             else ret ~= " <UNKNOWN>";
177         }
178         return ret ~ "]";
179     }
180
181     bool fail() { return false; }
182 }
183
184 /************************************************
185     Common base type for BoxObject types
186 */
187 class PObjectBoxBase : PObject
188 {
189     char[] BaseName(){return typeof(this).stringof;}
190     abstract char[] string();
191 }
192
193 /**
194 */
195 class PObjectBox(T,bool str = true) : PObjectBoxBase
196 {
197     char[] BaseName(){return typeof(this).stringof~"!("~T.stringof~")";}
198     T t;
199
200     ///
201     T Get() { return t; }
202
203     ///
204     this(T ti){t=ti;}
205
206     bool fail() { return false; }
207
208     ///
209     char[] string()
210     {
211         static if(str)
212         {
213             static if(is(T == char[]))
214                 return t;
215             else static if(is(ToString(t)))
216                 return ToString(t);
217             else
218                 return "<<"~T.stringof~">>";
219         }
220         else
221             return "<<"~T.stringof~">>";
222     }
223 }
224 //typedef PObjectVector!(5) Five;
225
226 /**
227 */
228 class PObjectList(T) : PObject
229 {
230     char[] BaseName(){return typeof(this).stringof;}
231     debug(TypeReport) pragma(msg, ">>T-"__FILE__~":"~itoa!(__LINE__)~": "~typeof(this).stringof~"!("~T.stringof~")");
232
233     T[] list;
234     int at=0;
235
236     this(){ list = null; }
237
238     this(T ti)
239     {
240         static if(is(T :Object)) assert(ti !is null);
241         list = new T[5];
242         list[at] = ti;
243         at++;
244
245     }
246
247     /// Add a T
248     uint Add(T ti)
249     {
250         static if(is(T :Object)) assert(ti !is null);
251         if(list.length <= at) list.length = at + 5;
252         list[at] = ti;
253         at++;
254         return at-1;
255     }
256
257     /// return the number of items stored
258     uint Count(){return at;}
259
260     ///
261     T[] get()
262     {
263         return list[0..at];
264     }
265
266     bool fail() { return false; }
267 }
268 unittest
269 {
270     writef("unittest@"__FILE__":"~itoa!(__LINE__)~\n);
271    
272     auto o = new PObjectList!(int);
273     o.Add(1);
274     o.Add(2);
275     o.Add(3);
276     o.Add(4);
277     o.Add(5);
278     o.Add(6);
279
280     assert(o.get == [1,2,3,4,5,6], "PObjectList failed");
281 }
282
283 /************************************************
284     List Object that build to the left
285 */
286 class PObjectListLeft(T) : PObjectList!(T)
287 {
288     char[] BaseName(){return typeof(this).stringof;}
289     debug(TypeReport) pragma(msg, ">>T-"__FILE__~":"~itoa!(__LINE__)~": "~typeof(this).stringof~"!("~T.stringof~")");
290
291     this(T t)
292     {
293         list.length = 5;
294         list[$-1] = t;
295         at = 1;
296     }
297     this(){}
298    
299     /// Add a T
300     uint Add(T ti)
301     {
302         static if(is(T :Object)) assert(ti !is null);
303
304         if(list.length <= at)
305         {
306             auto t = list[$-at..$].dup;
307             list.length = at + 5;
308             list[$-at..$] = t;
309         }
310         list[$-1-at] = ti;
311         at++;
312         return at-1;
313     }
314
315     T[] get()
316     {
317         return list[$-at..$];
318     }
319 }
320
321 unittest
322 {
323     writef("unittest@"__FILE__":"~itoa!(__LINE__)~\n);
324    
325     auto o = new PObjectListLeft!(int)( 0);
326                o.Add( 1); o.Add( 2); o.Add( 3); o.Add( 4); o.Add( 5); o.Add( 6); o.Add( 7); o.Add( 8); o.Add( 9);
327     o.Add(10); o.Add(11); o.Add(12); o.Add(13); o.Add(14); o.Add(15); o.Add(16); o.Add(17); o.Add(18); o.Add(19);
328     o.Add(20); o.Add(21); o.Add(22); o.Add(23); o.Add(24); o.Add(25); o.Add(26); o.Add(27); o.Add(28); o.Add(29);
329
330     assert(o.get == [29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0], "PObjectListLeft failed");
331 }
332
333
334 /************************************************
335     A sub class of PObject that is used for repeated reductions
336 */
337 class PObjectSet : PObjectList!(PObject)
338 {
339     char[] BaseName(){return typeof(this).stringof;}
340     /// Discard some PObjects
341     void Back(uint b)
342     {
343         at = b;
344     }
345 }
346
347
348
349 /************************************************
350     Uniform term to prevent needing to know the
351     exact type at compile time.
352 */
353 interface PInterfaceLeftFactor(T) { PObject InsertLeft(T,PObject); }
354
355 /************************************************
356     PObject that uses T.Action!(str) to process a
357     left side term with a Right side term
358 */
359 class PObjectLeftFactorT(Tp, int i, char[] str) : PObjectVector!(i), PInterfaceLeftFactor!(Tp)
360 {
361     this(PObject[i] d){super(d);}
362
363     PObject InsertLeft(Tp that, PObject L)
364     {
365         PObject[i+1] args;
366
367         args[1..$] = Get;
368         args[0] = L;
369
370         return SpecialAction!(Tp,str)(that,args);
371     }
372 }
373
374
375 /************************************************
376     default fail object
377 */
378 class PObjectFail : PObject
379 {
380     char[] BaseName(){return typeof(this).stringof;}
381     char[][] msg;
382
383     this()
384     {
385         msg.length = 1;
386         msg[0] = "Failed";
387     }
388
389     this(char[] m)
390     {
391         msg.length = 1;
392         msg[0] = m.dup;
393     }
394
395     void Add(char[] m)
396     {
397         msg ~= m.dup;
398     }
399
400     bool fail() { return true; }
401 }
402
403 /************************************************
404     default Pass object
405 */
406 class PObjectPass : PObject
407 {
408     char[] BaseName(){return typeof(this).stringof;}
409     this() { }
410     bool fail() { return false; }
411 }
412
413 /************************************************
414     default Filler object, returns fail state as instructed
415 */
416 class PObjectFill : PObject
417 {
418     char[] BaseName(){return typeof(this).stringof;}
419     bool b;
420     this(bool _b){b=_b;}
421     bool fail(){return b;}
422 }
423
424 /*******************************************************
425     interface to handle parser data
426 */
427 interface IParser
428 {
429     uint pos();
430     void pos(uint);
431     debug(dParse_runtime) void mark();
432 }
433
434
435 /******************************************************************************
436 ******** genaric Template code
437 ******************************************************************************/
438
439
440 /** generate a tuple of i U's
441 */
442 template Tupleof(uint i, U)
443 {
444     static if(i == 0)
445         alias T!() Tupleof;
446     else
447     static if(i == 1)
448         alias T!(U) Tupleof;
449     else
450         alias T!(Tupleof!(i-1, U), U) Tupleof;
451 }
452
453 /** Tuple literal template
454 */
455 template T(A...){alias A T;}
456
457
458 /******************************************************************************
459 ******** CTFE functions
460 ******************************************************************************/
461
462 /******************************
463 drop leading whitespace
464 */
465 char[] DropWhiteF(char[] str)
466 {
467
468     foreach(int i, char c; str)
469         switch(c)
470         {
471             case ' ', '\t', '\r', '\n':
472                 continue;
473             default:
474                 return str[i..$];
475         }
476         return "";
477 }
478 struct UnittetDropWhite{
479     static const char[] str = DropWhiteF(" \t\n\rhello");
480     static assert(DropWhiteF(" \t\n\rhello") == "hello");
481 }
482
483
484 /**********************************
485   find first instance of t in str,
486   return str up through that char
487 */
488 char[] FindChar(char t)(char[] str)
489 {
490     foreach(int i, char c; str)
491         if(c == t)
492             return str[0..i+1];
493     return str;
494 }
495
496
497 /**********************************
498   return an identifier
499 */
500 char[] GetID(char[] instr)
501 {
502     char[] str = DropWhiteF(instr);
503
504     if(str.length == 0) return "";
505
506     if(
507         !('a' <= str[0] && str[0] <= 'z') &&
508         !('A' <= str[0] && str[0] <= 'Z') &&
509         !('_' == str[0])
510     ) return "";
511
512     foreach(int i, char c; str)
513     if(
514         !('a' <= c && c <= 'z') &&
515         !('A' <= c && c <= 'Z') &&
516         !('0' <= c && c <= '9') &&
517         !('_' == c)
518     ) return str[0..i];
519
520     return str;
521 }
522 struct UnittetGetID{
523     static assert(GetID("hello world") == "hello", GetID("hello world"));
524 }
525
526 /*********************************
527   replace all sequences of white
528   space with single spaces
529 */
530 public char[] ReduceWhite(char[] str)
531 {
532     bool pass = true;
533     int at = 0;
534     foreach(char c; str)
535         switch(c)
536         {
537             case ' ', '\n', '\r', '\t':
538                 if(!pass)
539                 {
540                     pass = true;
541                     str[at++] = ' ';
542                 }
543                 break;
544
545             default:
546                 pass = false;
547                 str[at++] = c;
548                 break;
549         }
550     return str[0..at];;
551 }
552 static const char[] test = ReduceWhite("hello");
553
554 /*********************************
555     Extract # from "$(C,#,NAME)"
556 */
557 int ExtractCount(char[] str)
558 {
559     int ret = int.min;
560
561     if(str.length > 4 && '0' <= str[0] && str[0] <= '9')
562     {
563         ret = str[0] - '0';
564         for(int i = 1; i<str.length; i++)
565         {
566             if('0' <= str[i] && str[i] <= '9')
567             {
568                 ret *= 10;
569                 ret += str[i] - '0';
570             }
571             else
572                 break;
573         }
574     }
575
576     return ret;
577 }
578
579 /*********************************
580     Extract NAME from "$(C,#,NAME)"
581 */
582 char[] ExtractAct(char[] str)
583 {
584     int c, i;
585     for(i=0; i < str.length && c < 2; i++)
586         c += (str[i] == ',');
587
588     str = str[i..$];
589     i=0;
590     int d = 1;
591     while(i < str.length && d > 0)
592     {
593         d += (str[i] == '(');
594         d -= (str[i] == ')');
595         i++;
596     }
597
598     return str[0..i-1];
599 }
600
601 static assert(ExtractAct("$(T,1,$(N,1,abc,def))") == "$(N,1,abc,def)", ExtractAct("$(T,1,$(N,1,abc,def))"));
602
603 /******************************************************************************
604 ******* Special Action Code
605 ******************************************************************************/
606
607 /// Pack up stuff in a computed type of PObjectLeftFactorT Object
608 PObject L_Action(Tp, char[] str ) (Tp, PObject[ExtractCount(str[4..$])] i)
609 {
610     static const int c = ExtractCount(str[4..$]);
611     static const char[] n = ExtractAct(str);
612     static if(false) pragma(msg, str~" == $(L,"~c.stringof~","~n~")");
613
614     return new PObjectLeftFactorT!(Tp, c, n)(i);
615 }
616
617 /// Left Process a Leftmost term and right side terms list.
618 PObject T_Action(Tp,char[] str) (Tp that, PObject[1+ExtractCount(str[4..$])] i)
619 {
620     static const int num = ExtractCount(str[4..$]);
621     static const char[] act = ExtractAct(str);
622     static if(false) pragma(msg, str~" == $(T,"~num.stringof~","~act~")");
623
624     PObject[num] args = i[0..num];
625     auto ret = SpecialAction!(Tp,act)(that,args);
626
627     auto listO = cast(PObjectList!(PObject)) i[num];
628     auto list = listO.get;
629
630     foreach(obj; list)
631     {
632         auto Tobj = cast(PInterfaceLeftFactor!(Tp))obj;
633         ret = Tobj.InsertLeft(that,ret);
634     }
635     return ret;
636 }
637
638
639 template GetSize(Tp, char[] str)
640 {
641     alias Tp.Action!(str) Fn;
642     static if(is(typeof(Fn) Args == function))
643         const int size = Args[0].length;
644     else
645         static assert(false);
646 }
647
648 template GetArgs(Tp, char[] str)
649 {
650     alias Tp.Action!(str) Fn;
651     static if(is(typeof(Fn) Args == function))
652     {}
653     else
654         static assert(false);
655 }
656
657 /// Next actions
658
659 template N_parts(char[] str)
660 {
661     const int num = ExtractCount(str[4..$]);
662     const char[] firstAct = ExtractAct(str[4..$]);
663     const char[] s1 = str[FindChar!(',')(str[4..$]).length + 4..$];
664     const char[] secondAct = s1[FindChar!(',')(s1).length..$-1];
665 }
666
667 //alias N_parts!("$(N,3,dummy,dummy)") NP;
668
669 PObject N_Action(Tp,char[] str) (Tp that, GetArgs!(Tp,N_parts!(str).firstAct).Args