root/trunk/bevutils/TinyXML.d

Revision 20, 38.0 kB (checked in by teales, 1 year ago)

Bevutils files initial check-in.

Line 
1 /***************************************************************
2 Copyright (c) Steve Teale 2007
3 This program is free software; you can use it for any purpose
4 subject to the following conditions.
5
6 This program is distributed in the hope that it will be useful,
7 but WITHOUT ANY WARRANTY; without even the implied warranty of
8 MERCHANTABILITY or FITNESS FOR ANY PARTICULAR PURPOSE.
9 ****************************************************************/
10
11 module bevutils.tinyxml;
12
13 import std.regexp;
14 import std.string;
15 import std.stdio;
16 import std.stream;
17
18 alias std.string.find indexOf;
19 alias std.string.split split;
20 alias std.string.toString toString;
21
22 /**
23  * A compact XML parser and object model.
24  *
25  * TinyXML is aimed mainly at configuration files and the like.
26  *
27  * The TinyXML object model is navigated using a path syntax of two forms.  If tx is
28  * a TinyXML object, you can say:
29  *
30  * XMLElement e = tx("path_string");
31  * XMLElement e = tx << "path_string"
32  *
33  * These both perform the same navigation operation, but while the first simply hands
34  * you a reference to the target element, the second can do the same, but also sets the
35  * tx object context.
36  *
37  */
38 class TinyXML
39 {
40    protected static const char[510] _spaces = ' ';
41    protected static const char[] _WS = "(\\s+)";
42    protected static const char[] _TWS = "\\s+$";
43    protected static const char[] _COMMENT = "(<!--(.*)-->)";
44    protected static const char[] _XJOINT = r"(\.|(([A-Za-z_][0-9A-Za-z_]*)|\^|#|!|')(\[([0-9]+|\$)\])?)";
45    protected static const char[] _AJOINT = r"(@(([A-Za-z_][0-9A-Za-z_]*)|(\[[0-9]+\])))";
46    protected static RegExp _wsrex;
47    protected static RegExp _twsrex;
48    protected static RegExp _crex;
49    protected static RegExp _jrex;
50
51    static this()
52    {
53       //_spaces[0 .. 510] = ' ';
54       _wsrex = RegExp(_WS);
55       _twsrex = RegExp(_TWS);
56       _crex = RegExp(_COMMENT);
57       _jrex = RegExp(_XJOINT);
58    }
59
60    protected char[] prune(char[] s)
61    {
62       return _wsrex.find(s)? s: _wsrex.post;
63    }
64
65    protected char[] rprune(char[] s)
66    {
67       int pos = _twsrex.find(s);
68       return (pos >= 0)? s[0 .. pos]: s;
69    }
70
71    /**
72     * The XML element types supported by TinyXML.
73     *
74     * These are Tags - <tag> ... </tag>, text, as in <tag>This text</tag>, and comments
75     * <!-- A comment -->
76     */
77    enum : int
78    {
79       ALL,
80       TAG,
81       TEXT,
82       COMMENT
83    }
84
85    struct NVPair
86    {
87       char[] name;
88       char[] value;
89    }
90
91    /**
92     * The representation of a set of attributes on an XML tag.
93     */
94    class AttrList
95    {
96    private:
97       NVPair[] list;
98       int[char[]] map;
99    public:
100
101       /**
102        * Add a new attribute to the list.
103        *
104        * Params:
105        *   name = The attribute name.
106        *   value = the attribute value.
107        */
108       void addAttr(char[] name, char[] value)
109       {
110          if (hasKey(name))
111             throw new Exception("Attribute already exists");
112          int n = list.length;
113          list.length = n+1;
114          list[n].name = name;
115          list[n].value = value;
116          map[name] = n;
117       }
118
119       /**
120        * Create a new attribute list identical to the one for which the method was called.
121        */
122       AttrList clone()
123       {
124          AttrList a = new AttrList();
125          int n = length();
126          for (int i = 0; i < n; i++)
127             a.addAttr(name(i), value(i));
128          return a;
129       }
130
131       /**
132        * Delete an attribute from the list by name
133        *
134        * Params:
135        *   name = The name of the attribute to be deleted.
136        */
137       void delAttr(char[] name)
138       {
139          if (!hasKey(name))
140             throw new Exception("No such attribute");
141          int i = map[name];
142          list = list[0 .. i] ~ list[i+1 .. $];
143       }
144
145       /**
146        * A property-style method to get the length of the list
147        */
148       int length() { return list.length; }
149
150       /**
151        * Get the name of the i'th attribute in the list.
152        *
153        * Params:
154        *   i = The desired index.
155        */
156       char[] name(int i) { return list[i].name; }
157      
158       /**
159        * Get the name of the i'th value in the list.
160        *
161        * Params:
162        *   i = The desired index.
163        */
164       char[] value(int i) { return list[i].value; }
165      
166       /**
167        * Get the value of an attribute by name.
168        *
169        * Params:
170        *   name = The desired name.
171        */
172       char[] value(char[] name) { return hasKey(name)? list[map[name]].value: null; }
173      
174       /**
175        * Set the value of an attribute by name.
176        *
177        * Params:
178        *   name = The name of the attribute.
179        */
180       char[] value(char[] name, char[] value)
181          { if (hasKey(name)) { list[map[name]].value = value; return value; } else return null; }
182
183       /**
184        * Clear the list
185        */
186       void clear()
187       {
188          char[][] keys = map.keys;
189          foreach (s; keys)
190             map.remove(s);
191          list.length = 0;
192       }
193
194       /**
195        * Formats the list into a string suitable for XML.
196        */
197       char[] format()
198       {
199          char[] s = "";
200          for (int i = 0; i < list.length; i++)
201          {
202             if (i > 0) s ~= " ";
203             s ~= list[i].name ~ "=\"" ~ Tag.encode_entities(list[i].value) ~ "\"";
204          }
205          return s;
206       }
207
208    private:
209       private bool hasKey(char[] key) { return ((key in map) != null); }
210    }
211
212    /**
213     * A base class for Tags, Comments and body text - EText
214     */
215    class XMLElement
216    {
217       Tag _parent;
218
219       /**
220        * Abstract method to recover the element type.
221        */
222       abstract int EType();
223
224       /**
225        * Abstract method to format elements for XML.
226        */
227       abstract char[] format(char[] s, int indent);
228
229       /**
230        * Abstract method to recover a textual representation of the element.
231        */
232       abstract char[] PText();
233
234       /**
235        * Abstract method to recover an 'attribute' from the element
236        */
237       abstract char[] getValue(char[] name);
238
239       /**
240        * Abstract method to set an 'attribute' of the element.
241        */
242       abstract char[] setValue(char[] name, char[] value) ;
243
244       /**
245        * Abstract method to recover the parent of the element, which will
246        * always be a Tag.
247        */
248       abstract Tag Parent();
249    }
250
251    /**
252     * Class representing XML Tag body text.
253     */
254    class EText : XMLElement
255    {
256       char[] _etext;
257
258       /**
259        * Create an EText object and link it to a Tag.
260        */
261       this(char[] s, Tag parent) { _etext = s; _parent = parent; }
262
263       /**
264        * Get the element type - in this case TEXT.
265        */
266       int EType() { return TEXT; }
267
268       /**
269        * Format the text for inclusion in XML text.
270        *
271        * Params:
272        *   s = An existing string to which the text representing this element is appended.
273        *   indent = Degree to which the formatted text should be indented.
274        *
275        * Returns: The original string with the element text appended.
276        */
277       char[] format(char[] s, int indent)
278       {
279          char[] sp = _spaces[0 .. 3*indent];
280          return (s ~ sp ~ _etext ~ "\n");
281       }
282
283       /**
284        * Get a plain text version - just the text.
285        */
286       char[] PText() { return _etext; }
287
288       /**
289        * Get an 'attribute' by name - in this class the name is ignored.
290        *
291        * Params:
292        *   name = A string value that is ignored.
293        *
294        * Returns: The plain text.
295        */
296       char[] getValue(char[] name) { return _etext; }
297
298       /**
299        * Set an 'attribute' by name - in this class the name is ignored.
300        *
301        * Params:
302        *   name = A string value that is ignored.
303        *   value = the value to be set for the text.
304        *
305        * Returns: The set plain text.
306        */
307       char[] setValue(char[] name, char[] value) { _etext = value; return _etext; }
308
309       /**
310        * Get the element's parent.
311        */
312       Tag Parent() { return _parent; }
313    }
314
315    /**
316     * Class representing an XML comment.
317     */
318    class Comment : XMLElement
319    {
320       char[] _cmt;
321
322       /**
323        * Create a Comment object and link it to a Tag.
324        */
325       this(char[] s, Tag parent) { _cmt = s; _parent = parent; }
326
327       /**
328        * Get the element type - in this case COMMENT.
329        */
330       int EType() { return COMMENT; }
331
332       /**
333        * Format the text for inclusion in XML text.
334        *
335        * Params:
336        *   s = An existing string to which the text representing this element is appended.
337        *   indent = Degree to which the formatted text should be indented.
338        *
339        * Returns: The original string with the element text appended.
340        */
341       char[] format(char[] s, int indent)
342       {
343          char[] sp = _spaces[0 .. 3*indent];
344          return (s ~ sp ~ "<!--" ~ _cmt ~ "-->\n");
345       }
346
347       /**
348        * Get a plain text version - the bare comment.
349        */
350       char[] PText() { return "<!--" ~ _cmt ~ "-->"; }
351
352       /**
353        * Get an 'attribute' by name - in this class the name is ignored.
354        *
355        * Params:
356        *   name = A string value that is ignored.
357        *
358        * Returns: The plain text.
359        */
360       char[] getValue(char[] name) { return _cmt; }
361
362       /**
363        * Set an 'attribute' by name - in this class the name is ignored.
364        *
365        * Params:
366        *   name = A string value that is ignored.
367        *   value = the value to be set for the text.
368        *
369        * Returns: The set plain text.
370        */
371       char[] setValue(char[] name, char[] value) { _cmt = value; return _cmt; }
372
373       /**
374        * Get the element's parent.
375        */
376       Tag Parent() { return _parent; }
377    }
378
379
380    /**
381     * Class representing an XML Tag - &lt;tagname> ... &lt;/tagname>.
382     *
383     * Tags are the structural building blocks of the TinyXML object model.
384     */
385    class Tag : XMLElement
386    {
387       const char[] _TAG = "(<[A-Za-z_][A-Za-z0-9_]*[^>]*>)";
388       const char[] _END = "(<\\/[A-Za-z_][A-Za-z0-9_]*\\s*>)";
389       const char[] _AS = "(=[\"'])";
390
391       static RegExp _xmlrex;
392       static RegExp _tagrex;
393       static RegExp _endrex;
394       static RegExp _asrex;
395
396       static this()
397       {
398          _xmlrex = RegExp(_XML);
399          _tagrex = RegExp(_TAG);
400          _endrex = RegExp(_END);
401          _asrex = RegExp(_AS);
402       }
403
404       bool _valid;
405       char[] _name;
406       XMLElement[] _elist;
407       AttrList _attributes;
408
409
410       /**
411        * Create a Tag object from XML text and possibly link it to a Tag.
412        *
413        * This constructor is called recursively to nibble Tag elements from a string
414        * originally representing the entire XML, and is in essence the XML parser.
415        *
416        * Params:
417        *   so = The source TinyXML object - provides the context and the text to be parsed.
418        *        If so is null this constructor just returns a bare Tag element.
419        *   parentnode = The tag that will be the parent of the new Tag.
420        */
421       this(TinyXML so, Tag parentnode)
422       {
423          _valid = false;
424          _parent = parentnode;
425          _attributes = new AttrList();
426          if (so is null)
427          {
428             // Nothing to parse - just making a bare Tag object
429             _valid = true;
430             return;
431          }
432
433          so._src = prune(so._src);
434          int pt = _tagrex.find(so._src);
435          if (pt == -1)
436          {
437             so.setErr("No opening tag found");
438             return;
439          }
440          char[] tag = _tagrex.match(1);
441          int taglen = tag.length;
442          int tagend = taglen-1;
443
444          bool mt = (so._src[tagend-1] == '/');
445          so._atTop = false;
446
447          int ps = _wsrex.find(so._src);
448          char[] as;
449          if (ps == -1 || ps > tagend)
450             _name = so._src[1 .. (mt? tagend-1: tagend)];
451          else
452          {
453             char[] n = so._src[1 .. ps];
454             _name = prune(n);
455             as = prune(so._src[ps+1 .. (mt? tagend-1: tagend)]);
456             as = rprune(as);
457          }
458
459          if (as.length)
460          {
461             if ((!parse_attribs(so, as)))
462             {
463                return;
464             }
465          }
466          if (so._atTop && mt)
467          {
468             _valid = true;
469             return;
470          }
471
472          so._src = prune(so._src[tagend+1 .. so._src.length]);
473          int n = 0;
474          if (!mt)
475          {
476             // We have stripped of the opening tag construct and any whitespace, so now may have
477             // Some element text
478             // <!-- some comment -->
479             // <A ...> ... </A>
480             // Some more element text
481             // <B ...> ... </B>
482             // </CURRENTTAG  >
483
484             for (;;)
485             {
486                if (so._src == "")
487                {
488                   setErr("Missing end tag after tag: " ~ _name);
489                   return;
490                }
491                if (_endrex.find(so._src) == 0)
492                {
493                   // Hopefully we found the closing tag of the element we are parsing
494                   char[] et = _endrex.match(1);
495                   char[] tn = rprune(et[2 .. et.length-1]);    // gets us "CURRENTTAG  " --> "CURRENTTAG"
496                   if (tn == _name)
497                   {
498                      so._src = prune(_endrex.post);
499                      break;
500                   }
501                   so.setErr("Unexpected closing tag: " ~ et ~ " after " ~ _name);
502                   return;
503                }
504                else if (_tagrex.find(so._src) == 0)
505                {
506                   Tag nn = new Tag(so, this);
507                   if (!nn._valid) {
508                      // error already reported
509                      return;
510                   }
511                   _elist.length = _elist.length+1;
512                   _elist[n] = nn;
513                   n++;
514                }
515                else if (_crex.find(so._src) == 0)
516                {
517                   _elist.length = _elist.length+1;
518                   _elist[n++] = new Comment(_crex.match(2), this);
519                   so._src = prune(_crex.post);
520                }
521                else
522                {
523                   // It is element body text of some sort
524                   int limit = indexOf(so._src, '<');
525                   if (limit == -1)
526                   {
527                      so.setErr("Tag or closing tag expected after element text");
528                      return;
529                   }
530                   char[] t = rprune(so._src[0 .. limit]);
531                   _elist.length = _elist.length+1;
532                   _elist[n++] = new EText(t, this);
533                   so._src = so._src[limit .. $];   // no need to prune
534                }
535             }
536          }
537          this._valid = true;
538       }
539
540       /**
541        * Get the Tag name - same as PText().
542        */
543       char[] Name() { return _name; }
544
545       /**
546        * Get the element type - in this case TAG.
547        */
548       int EType() { return TAG; }
549
550       /**
551        * Get a plain text version - in this case simply the Tag name.
552        */
553       char[] PText() { return _name; }
554
555       /**
556        * Get the element's parent.
557        */
558       Tag Parent() { return _parent; }
559
560       private bool parse_attribs(TinyXML so, char[] as)
561       {
562          int pos = _asrex.find(as);
563          if (pos == -1)
564          {
565             so.setErr("Bad attribute list - no name=\" construct found: " ~ as);
566             return false;
567          }
568          char[] tail;
569          for (int i = 0; pos != -1; i++) {
570             char[] at = _asrex.match(1);
571             tail = _asrex.post;
572             char[] n = _asrex.pre;
573             char quot = at[1];
574             int q2 = std.string.find(tail, quot);
575             if (q2 == -1)
576             {
577                so.setErr("Bad attribute list - missing closing quote: " ~ as);
578                return false;
579             }
580             if (_wsrex.find(n) != -1)
581             {
582                so.setErr("Bad attribute list - space in attribute name: " ~ as);
583                return false;
584             }
585             char[] t = tail[0 .. q2];
586             char[] v = decode_entities(t);
587             _attributes.addAttr(n, v);
588             tail = tail[q2+1 .. tail.length];
589             as = prune(tail);
590             pos = _asrex.find(as);
591          }
592          if (tail.length) {
593             pos = _wsrex.find(tail);
594             if (pos != -1)
595             {
596                so.setErr("Bad attribute list - garbage after attributes: " ~ as);
597                return false;
598             }
599             if (tail.length != _wsrex.match(1).length)
600             {
601                so.setErr("Bad attribute list - garbage after attributes: " ~ as);
602                return false;
603             }
604          }
605
606          return true;
607       }
608
609       /**
610        * Format the text for inclusion in XML text.
611        *
612        * Params:
613        *   s = An existing string to which the text representing this element is appended.
614        *   indent = Degree to which the formatted text should be indented.
615        *
616        * Returns: The original string with the element text appended.
617        */
618       char[] format(char[] s, int indent)
619       {
620          char[] sp = _spaces[0 .. 3*indent];
621          s ~= sp ~ "<" ~ _name;
622          if (_attributes.length())
623          {
624                s ~= " " ~ _attributes.format();
625          }
626          bool mt = (_elist.length == 0);
627          if (mt)
628             s ~= "/>\n";
629          else
630          {
631             s ~= ">";
632             if (_elist.length == 1 && _elist[0].EType() != TAG)
633             {
634                s ~= _elist[0].PText() ~ "</" ~ _name ~ ">\n";
635             }
636             else
637             {
638                s ~= "\n";
639                for (int i = 0; i < _elist.length; i++)
640                {
641                   s = _elist[i].format(s, indent+1);
642                }
643                s ~= sp ~ "</" ~ _name ~ ">\n";
644             }
645          }
646          return s;
647       }
648
649       /**
650        * Decode &amp;lt; and &amp;amp; to < and &.
651        *
652        * Params:
653        *   s = The string to be decoded.
654        *
655        * Returns: The decoded string.
656        */
657       public static char[] decode_entities(char[] s)
658       {
659          s = std.string.replace(s