| 1 |
/* |
|---|
| 2 |
www.sourceforge.net/projects/tinyxml |
|---|
| 3 |
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) |
|---|
| 4 |
|
|---|
| 5 |
This software is provided 'as-is', without any express or implied |
|---|
| 6 |
warranty. In no event will the authors be held liable for any |
|---|
| 7 |
damages arising from the use of this software. |
|---|
| 8 |
|
|---|
| 9 |
Permission is granted to anyone to use this software for any |
|---|
| 10 |
purpose, including commercial applications, and to alter it and |
|---|
| 11 |
redistribute it freely, subject to the following restrictions: |
|---|
| 12 |
|
|---|
| 13 |
1. The origin of this software must not be misrepresented; you must |
|---|
| 14 |
not claim that you wrote the original software. If you use this |
|---|
| 15 |
software in a product, an acknowledgment in the product documentation |
|---|
| 16 |
would be appreciated but is not required. |
|---|
| 17 |
|
|---|
| 18 |
2. Altered source versions must be plainly marked as such, and |
|---|
| 19 |
must not be misrepresented as being the original software. |
|---|
| 20 |
|
|---|
| 21 |
3. This notice may not be removed or altered from any source |
|---|
| 22 |
distribution. |
|---|
| 23 |
*/ |
|---|
| 24 |
public import std.stream; |
|---|
| 25 |
public import std.string; |
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
private import std.stdio; |
|---|
| 29 |
|
|---|
| 30 |
private import std.uni; |
|---|
| 31 |
private import xpath.utf8; |
|---|
| 32 |
private import std.ctype; |
|---|
| 33 |
public import std.cstream; |
|---|
| 34 |
public import xpath.xpath_stream; |
|---|
| 35 |
public { |
|---|
| 36 |
enum TiXmlEncoding |
|---|
| 37 |
{ |
|---|
| 38 |
TIXML_ENCODING_UNKNOWN, |
|---|
| 39 |
TIXML_ENCODING_UTF8, |
|---|
| 40 |
TIXML_ENCODING_LEGACY |
|---|
| 41 |
}; |
|---|
| 42 |
} |
|---|
| 43 |
private |
|---|
| 44 |
{ |
|---|
| 45 |
debug{ |
|---|
| 46 |
alias writefln TIXML_LOG; |
|---|
| 47 |
} |
|---|
| 48 |
|
|---|
| 49 |
import std.utf; |
|---|
| 50 |
const char TIXML_UTF_LEAD_0 = 0xefU; |
|---|
| 51 |
const char TIXML_UTF_LEAD_1 = 0xbbU; |
|---|
| 52 |
const char TIXML_UTF_LEAD_2 = 0xbfU; |
|---|
| 53 |
|
|---|
| 54 |
class TiXmlParsingData |
|---|
| 55 |
{ |
|---|
| 56 |
|
|---|
| 57 |
public { |
|---|
| 58 |
void Stamp( char[] now, TiXmlEncoding encoding ) |
|---|
| 59 |
in { assert( ! (now is null) ); assert(now.length > 0); } |
|---|
| 60 |
body |
|---|
| 61 |
{ |
|---|
| 62 |
// Do nothing if the tabsize is 0. |
|---|
| 63 |
} |
|---|
| 64 |
|
|---|
| 65 |
|
|---|
| 66 |
TiXmlCursor Cursor() { return cursor; } |
|---|
| 67 |
} |
|---|
| 68 |
protected{ |
|---|
| 69 |
// Only used by the document! |
|---|
| 70 |
this( char[] start, int _tabsize, int row, int col ) |
|---|
| 71 |
{ |
|---|
| 72 |
stamp = start; |
|---|
| 73 |
tabsize = _tabsize; |
|---|
| 74 |
cursor.row = row; |
|---|
| 75 |
cursor.col = col; |
|---|
| 76 |
} |
|---|
| 77 |
} |
|---|
| 78 |
|
|---|
| 79 |
TiXmlCursor cursor; |
|---|
| 80 |
char[] stamp; |
|---|
| 81 |
int tabsize; |
|---|
| 82 |
}; |
|---|
| 83 |
|
|---|
| 84 |
|
|---|
| 85 |
} |
|---|
| 86 |
public { |
|---|
| 87 |
/+ |
|---|
| 88 |
#ifndef USE_MMGR |
|---|
| 89 |
#include <ctype.h> |
|---|
| 90 |
#include <stdio.h> |
|---|
| 91 |
#include <stdlib.h> |
|---|
| 92 |
#include <string.h> |
|---|
| 93 |
#include <assert.h> |
|---|
| 94 |
#endif |
|---|
| 95 |
|
|---|
| 96 |
// Help out windows: |
|---|
| 97 |
#if defined( _DEBUG ) && !defined( DEBUG ) |
|---|
| 98 |
#define DEBUG |
|---|
| 99 |
#endif |
|---|
| 100 |
|
|---|
| 101 |
#ifdef TIXML_USE_STL |
|---|
| 102 |
#include <string> |
|---|
| 103 |
#include <iostream> |
|---|
| 104 |
#define TIXML_STRING std::string |
|---|
| 105 |
#define TIXML_ISTREAM std::istream |
|---|
| 106 |
#define TIXML_OSTREAM std::ostream |
|---|
| 107 |
#else |
|---|
| 108 |
#include "tinystr.h" |
|---|
| 109 |
#define TIXML_STRING TiXmlString |
|---|
| 110 |
#define TIXML_OSTREAM TiXmlOutputStream |
|---|
| 111 |
#endif |
|---|
| 112 |
+/ |
|---|
| 113 |
|
|---|
| 114 |
const int TIXML_MAJOR_VERSION = 2; |
|---|
| 115 |
const int TIXML_MINOR_VERSION = 4; |
|---|
| 116 |
const int TIXML_PATCH_VERSION = 3; |
|---|
| 117 |
const int NUM_ENTITY = 5; |
|---|
| 118 |
|
|---|
| 119 |
/* Internal structure for tracking location of items |
|---|
| 120 |
in the XML file. |
|---|
| 121 |
*/ |
|---|
| 122 |
struct TiXmlCursor |
|---|
| 123 |
{ |
|---|
| 124 |
void Clear() { row = row.init; col = col.init; } |
|---|
| 125 |
int row = -1; // 0 based. |
|---|
| 126 |
int col = -1; // 0 based. |
|---|
| 127 |
}; |
|---|
| 128 |
|
|---|
| 129 |
|
|---|
| 130 |
// Only used by Attribute::Query functions |
|---|
| 131 |
enum AttributeQueryEnum |
|---|
| 132 |
{ |
|---|
| 133 |
TIXML_SUCCESS, |
|---|
| 134 |
TIXML_NO_ATTRIBUTE, |
|---|
| 135 |
TIXML_WRONG_TYPE |
|---|
| 136 |
}; |
|---|
| 137 |
|
|---|
| 138 |
|
|---|
| 139 |
// Used by the parsing routines. |
|---|
| 140 |
|
|---|
| 141 |
const TiXmlEncoding TIXML_DEFAULT_ENCODING = TiXmlEncoding.TIXML_ENCODING_UNKNOWN; |
|---|
| 142 |
|
|---|
| 143 |
/** TiXmlBase is a base class for every class in TinyXml. |
|---|
| 144 |
It does little except to establish that TinyXml classes |
|---|
| 145 |
can be printed and provide some utility functions. |
|---|
| 146 |
|
|---|
| 147 |
In XML, the document and elements can contain |
|---|
| 148 |
other elements and other types of nodes. |
|---|
| 149 |
|
|---|
| 150 |
@verbatim |
|---|
| 151 |
A Document can contain: Element (container or leaf) |
|---|
| 152 |
Comment (leaf) |
|---|
| 153 |
Unknown (leaf) |
|---|
| 154 |
Declaration( leaf ) |
|---|
| 155 |
|
|---|
| 156 |
An Element can contain: Element (container or leaf) |
|---|
| 157 |
Text (leaf) |
|---|
| 158 |
Attributes (not on tree) |
|---|
| 159 |
Comment (leaf) |
|---|
| 160 |
Unknown (leaf) |
|---|
| 161 |
|
|---|
| 162 |
A Decleration contains: Attributes (not on tree) |
|---|
| 163 |
@endverbatim |
|---|
| 164 |
*/ |
|---|
| 165 |
class TiXmlBase |
|---|
| 166 |
{ |
|---|
| 167 |
public { |
|---|
| 168 |
this() { userData = null; } |
|---|
| 169 |
|
|---|
| 170 |
/** All TinyXml classes can print themselves to a filestream. |
|---|
| 171 |
This is a formatted print, and will insert tabs and newlines. |
|---|
| 172 |
|
|---|
| 173 |
(For an unformatted stream, use the << operator.) |
|---|
| 174 |
*/ |
|---|
| 175 |
abstract void Print( Stream Stream, int depth ); |
|---|
| 176 |
|
|---|
| 177 |
/** The world does not agree on whether white space should be kept or |
|---|
| 178 |
not. In order to make everyone happy, these global, static functions |
|---|
| 179 |
are provided to set whether or not TinyXml will condense all white space |
|---|
| 180 |
into a single space or not. The default is to condense. Note changing this |
|---|
| 181 |
values is not thread safe. |
|---|
| 182 |
*/ |
|---|
| 183 |
static void SetCondenseWhiteSpace( bool condense ) { condenseWhiteSpace = condense; } |
|---|
| 184 |
|
|---|
| 185 |
/// Return the current white space setting. |
|---|
| 186 |
static bool IsWhiteSpaceCondensed() { return condenseWhiteSpace; } |
|---|
| 187 |
|
|---|
| 188 |
/** Return the position, in the original source file, of this node or attribute. |
|---|
| 189 |
The row and column are 1-based. (That is the first row and first column is |
|---|
| 190 |
1,1). If the returns values are 0 or less, then the parser does not have |
|---|
| 191 |
a row and column value. |
|---|
| 192 |
|
|---|
| 193 |
Generally, the row and column value will be set when the TiXmlDocument::Load(), |
|---|
| 194 |
TiXmlDocument::LoadFile(), or any TiXmlNode::Parse() is called. It will NOT be set |
|---|
| 195 |
when the DOM was created from operator>>. |
|---|
| 196 |
|
|---|
| 197 |
The values reflect the initial load. Once the DOM is modified programmatically |
|---|
| 198 |
(by adding or changing nodes and attributes) the new values will NOT update to |
|---|
| 199 |
reflect changes in the document. |
|---|
| 200 |
|
|---|
| 201 |
There is a minor performance cost to computing the row and column. Computation |
|---|
| 202 |
can be disabled if TiXmlDocument::SetTabSize() is called with 0 as the value. |
|---|
| 203 |
|
|---|
| 204 |
@sa TiXmlDocument::SetTabSize() |
|---|
| 205 |
*/ |
|---|
| 206 |
int Row() { return location.row + 1; } |
|---|
| 207 |
int Column() { return location.col + 1; } ///< See Row() |
|---|
| 208 |
|
|---|
| 209 |
void SetUserData( void* user ) { userData = user; } |
|---|
| 210 |
void* GetUserData() { return userData; } |
|---|
| 211 |
|
|---|
| 212 |
// Table that returns, for a given lead byte, the total number of bytes |
|---|
| 213 |
// in the UTF-8 sequence. |
|---|
| 214 |
static const int utf8ByteTable[256]; |
|---|
| 215 |
|
|---|
| 216 |
abstract char[] Parse( char[] p, |
|---|
| 217 |
TiXmlParsingData data, |
|---|
| 218 |
TiXmlEncoding encoding /*= TIXML_ENCODING_UNKNOWN */ ); |
|---|
| 219 |
|
|---|
| 220 |
enum TiXmlError |
|---|
| 221 |
{ |
|---|
| 222 |
TIXML_NO_ERROR = 0, |
|---|
| 223 |
TIXML_ERROR, |
|---|
| 224 |
TIXML_ERROR_OPENING_FILE, |
|---|
| 225 |
TIXML_ERROR_OUT_OF_MEMORY, |
|---|
| 226 |
TIXML_ERROR_PARSING_ELEMENT, |
|---|
| 227 |
TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, |
|---|
| 228 |
TIXML_ERROR_READING_ELEMENT_VALUE, |
|---|
| 229 |
TIXML_ERROR_READING_ATTRIBUTES, |
|---|
| 230 |
TIXML_ERROR_PARSING_EMPTY, |
|---|
| 231 |
TIXML_ERROR_READING_END_TAG, |
|---|
| 232 |
TIXML_ERROR_PARSING_UNKNOWN, |
|---|
| 233 |
TIXML_ERROR_PARSING_COMMENT, |
|---|
| 234 |
TIXML_ERROR_PARSING_DECLARATION, |
|---|
| 235 |
TIXML_ERROR_DOCUMENT_EMPTY, |
|---|
| 236 |
TIXML_ERROR_EMBEDDED_NULL, |
|---|
| 237 |
TIXML_ERROR_PARSING_CDATA, |
|---|
| 238 |
TIXML_ERROR_DOCUMENT_TOP_ONLY, |
|---|
| 239 |
|
|---|
| 240 |
TIXML_ERROR_STRING_COUNT |
|---|
| 241 |
}; |
|---|
| 242 |
} |
|---|
| 243 |
protected{ |
|---|
| 244 |
|
|---|
| 245 |
static char[] SkipWhiteSpace( char[] op, TiXmlEncoding encoding ) |
|---|
| 246 |
{ |
|---|
| 247 |
return stripl(op); |
|---|
| 248 |
/* |
|---|
| 249 |
if (!op || op.length == 0) |
|---|
| 250 |
return null; |
|---|
| 251 |
int p = 0; |
|---|
| 252 |
if ( encoding == TiXmlEncoding.TIXML_ENCODING_UTF8 ) |
|---|
| 253 |
{ |
|---|
| 254 |
for (p = 0; p < op.length; ++p) |
|---|
| 255 |
{ |
|---|
| 256 |
//const unsigned char* pU = (const unsigned char*)p; |
|---|
| 257 |
|
|---|
| 258 |
// Skip the stupid Microsoft UTF-8 Byte order marks |
|---|
| 259 |
if ( op[p]==TIXML_UTF_LEAD_0 |
|---|
| 260 |
&& op[p+1]==TIXML_UTF_LEAD_1 |
|---|
| 261 |
&& op[p+2]==TIXML_UTF_LEAD_2 ) |
|---|
| 262 |
{ |
|---|
| 263 |
p += 3; |
|---|
| 264 |
continue; |
|---|
| 265 |
} |
|---|
| 266 |
else if(op[p]==TIXML_UTF_LEAD_0 |
|---|
| 267 |
&& op[p+1]==0xbfU |
|---|
| 268 |
&& op[p+2]==0xbeU ) |
|---|
| 269 |
{ |
|---|
| 270 |
p += 3; |
|---|
| 271 |
continue; |
|---|
| 272 |
} |
|---|
| 273 |
else if(op[p]==TIXML_UTF_LEAD_0 |
|---|
| 274 |
&& op[p+1]==0xbfU |
|---|
| 275 |
&& op[p+2]==0xbfU ) |
|---|
| 276 |
{ |
|---|
| 277 |
p += 3; |
|---|
| 278 |
continue; |
|---|
| 279 |
} |
|---|
| 280 |
|
|---|
| 281 |
if ( IsWhiteSpace( op[p] ) || op[p] == '\n' || op[p] =='\r' ) // Still using old rules for white space. |
|---|
| 282 |
++p; |
|---|
| 283 |
else |
|---|
| 284 |
break; |
|---|
| 285 |
} |
|---|
| 286 |
} |
|---|
| 287 |
else |
|---|
| 288 |
{ |
|---|
| 289 |
while ( p < op.length && IsWhiteSpace( op[p] ) || op[p] == '\n' || op[p] =='\r' ) |
|---|
| 290 |
++p; |
|---|
| 291 |
} |
|---|
| 292 |
|
|---|
| 293 |
return op[p..length];*/ |
|---|
| 294 |
} |
|---|
| 295 |
|
|---|
| 296 |
static bool IsWhiteSpace( char c ) |
|---|
| 297 |
{ |
|---|
| 298 |
return ( std.string.iswhite( c ) || c == '\n' || c == '\r' ); |
|---|
| 299 |
} |
|---|
| 300 |
static bool IsWhiteSpace( int c ) |
|---|
| 301 |
{ |
|---|
| 302 |
if ( c < 256 ) |
|---|
| 303 |
return IsWhiteSpace( cast(char) c ); |
|---|
| 304 |
return false; // Again, only truly correct for English/Latin...but usually works. |
|---|
| 305 |
} |
|---|
| 306 |
|
|---|
| 307 |
void StreamOut (OutputStream o); |
|---|
| 308 |
|
|---|
| 309 |
/*#ifdef TIXML_USE_STL |
|---|
| 310 |
static bool StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag ); |
|---|
| 311 |
static bool StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag ); |
|---|
| 312 |
#endif*/ |
|---|
| 313 |
|
|---|
| 314 |
/* Reads an XML name into the string provided. Returns |
|---|
| 315 |
a pointer just past the last character of the name, |
|---|
| 316 |
or 0 if the function has an error. |
|---|
| 317 |
*/ |
|---|
| 318 |
static char[] ReadName( char[] p, out char[] name, TiXmlEncoding encoding ) |
|---|
| 319 |
in |
|---|
| 320 |
{ assert(p); } |
|---|
| 321 |
body |
|---|
| 322 |
{ |
|---|
| 323 |
name = ""; |
|---|
| 324 |
|
|---|
| 325 |
// Names start with letters or underscores. |
|---|
| 326 |
// Of course, in unicode, tinyxml has no idea what a letter *is*. The |
|---|
| 327 |
// algorithm is generous. |
|---|
| 328 |
// |
|---|
| 329 |
// After that, they can be letters, underscores, numbers, |
|---|
| 330 |
// hyphens, or colons. (Colons are valid ony for namespaces, |
|---|
| 331 |
// but tinyxml can't tell namespaces from names.) |
|---|
| 332 |
int i = 0; |
|---|
| 333 |
|
|---|
| 334 |
if ( p && p.length > 0 |
|---|
| 335 |
&& ( IsAlpha( cast(ubyte) p[0], encoding ) || p[0] == '_' ) ) |
|---|
| 336 |
{ |
|---|
| 337 |
while( i < p.length && ( IsAlphaNum( p[i], encoding ) |
|---|
| 338 |
|| p[i] == '_' |
|---|
| 339 |
|| p[i] == '-' |
|---|
| 340 |
|| p[i] == '.' |
|---|
| 341 |
|| p[i] == ':' ) ) |
|---|
| 342 |
{ |
|---|
| 343 |
name ~= p[i]; |
|---|
| 344 |
++i; |
|---|
| 345 |
} |
|---|
| 346 |
return p[i..length]; |
|---|
| 347 |
} |
|---|
| 348 |
return null; |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
/* Reads text. Returns a pointer past the given end tag. |
|---|
| 352 |
Wickedly complex options, but it keeps the (sensitive) code in one place. |
|---|
| 353 |
*/ |
|---|
| 354 |
static int ReadText( char[] sin, // where to start |
|---|
| 355 |
inout char[] text, // the string read |
|---|
| 356 |
bool ignoreWhiteSpace, // whether to keep the white space |
|---|
| 357 |
char[] endTag, // what ends this text |
|---|
| 358 |
bool ignoreCase, // whether to ignore case in the end tag |
|---|
| 359 |
TiXmlEncoding encoding ) // the current encoding |
|---|
| 360 |
/* in { writefln("ReadText: sin='%s', endTag='%s'",replace(sin,"\n","\\n"),endTag); } |
|---|
| 361 |
out (o) { writefln(" returned %d (text: %s)", o, text); } |
|---|
| 362 |
body*/ |
|---|
| 363 |
{ |
|---|
| 364 |
text = ""; |
|---|
| 365 |
int p = 0; |
|---|
| 366 |
if (!(!ignoreWhiteSpace // certain tags always keep whitespace |
|---|
| 367 |
|| !condenseWhiteSpace )) // if true, whitespace is always kept |
|---|
| 368 |
sin = SkipWhiteSpace( sin, encoding ); |
|---|
| 369 |
|
|---|
| 370 |
p = (!ignoreCase)?(find(sin, endTag)):(ifind(sin,endTag)); |
|---|
| 371 |
if (p == -1) |
|---|
| 372 |
{ |
|---|
| 373 |
text = sin; |
|---|
| 374 |
return sin.length; |
|---|
| 375 |
} |
|---|
| 376 |
else |
|---|
| 377 |
{ |
|---|
| 378 |
text = sin[0..p]; |
|---|
| 379 |
return p + endTag.length; |
|---|
| 380 |
} |
|---|
| 381 |
} |
|---|
| 382 |
|
|---|
| 383 |
|
|---|
| 384 |
|
|---|
| 385 |
|
|---|
| 386 |
|
|---|
| 387 |
// If an entity has been found, transform it into a character. |
|---|
| 388 |
static int GetEntity( char[] sin, inout char[] value, inout int length, TiXmlEncoding encoding ) |
|---|
| 389 |
{ |
|---|
| 390 |
// Presume an entity, and pull it out. |
|---|
| 391 |
char[] ent; |
|---|
| 392 |
int delta; |
|---|
| 393 |
int i; |
|---|
| 394 |
length = 0; |
|---|
| 395 |
|
|---|
| 396 |
if ( sin.length > 2 && sin[1] == '#' ) |
|---|
| 397 |
{ |
|---|
| 398 |
ulong ucs = 0; |
|---|
| 399 |
//ptrdiff_t delta = 0; |
|---|
| 400 |
uint mult = 1; |
|---|
| 401 |
|
|---|
| 402 |
if ( sin[2] == 'x' ) |
|---|
| 403 |
{ |
|---|
| 404 |
if (sin.length < 3) return 0; |
|---|
| 405 |
// Hexadecimal. |
|---|
| 406 |
|
|---|
| 407 |
char[] q = sin[3..length]; |
|---|
| 408 |
|
|---|
| 409 |
delta = find(q, ';'); |
|---|
| 410 |
if (delta == -1) |
|---|
| 411 |
return 0; |
|---|
| 412 |
|
|---|
| 413 |
int _q = delta-1; |
|---|
| 414 |
|
|---|
| 415 |
while ( q[_q] != 'x' ) |
|---|
| 416 |
{ |
|---|
| 417 |
if ( q[_q] >= '0' && q[_q] <= '9' ) |
|---|
| 418 |
ucs += mult * (q[_q] - '0'); |
|---|
| 419 |
else if ( q[_q] >= 'a' && q[_q] <= 'f' ) |
|---|
| 420 |
ucs += mult * (*q - 'a' + 10); |
|---|
| 421 |
else if ( q[_q] >= 'A' && q[_q] <= 'F' ) |
|---|
| 422 |
ucs += mult * (*q - 'A' + 10 ); |
|---|
| 423 |
else |
|---|
| 424 |
return 0; |
|---|
| 425 |
mult *= 16; |
|---|
| 426 |
--_q; |
|---|
| 427 |
} |
|---|
| 428 |
} |
|---|
| 429 |
else |
|---|
| 430 |
{ |
|---|
| 431 |
// Decimal. |
|---|
| 432 |
if (sin.length < 3) return 0; |
|---|
| 433 |
|
|---|
| 434 |
char[] q = sin[2..length]; |
|---|
| 435 |
delta = find(q, ';'); |
|---|
| 436 |
if ( delta == -1 ) return 0; |
|---|
| 437 |
int _q = delta-1; |
|---|
| 438 |
--_q; |
|---|
| 439 |
|
|---|
| 440 |
while ( q[_q] != '#' ) |
|---|
| 441 |
{ |
|---|
| 442 |
if ( q[_q] >= '0' && q[_q] <= '9' ) |
|---|
| 443 |
ucs += mult * (q[_q] - '0'); |
|---|
| 444 |
else |
|---|
| 445 |
return 0; |
|---|
| 446 |
mult *= 10; |
|---|
| 447 |
--_q; |
|---|
| 448 |
} |
|---|
| 449 |
} |
|---|
| 450 |
if ( encoding == TiXmlEncoding.TIXML_ENCODING_UTF8 ) |
|---|
| 451 |
{ |
|---|
| 452 |
ConvertUTF32ToUTF8( ucs, value, length ); |
|---|
| 453 |
} |
|---|
| 454 |
else |
|---|
| 455 |
{ |
|---|
| 456 |
value[0] = cast(char)ucs; |
|---|
| 457 |
length = 1; |
|---|
| 458 |
} |
|---|
| 459 |
return delta + 1; |
|---|
| 460 |
} |
|---|
| 461 |
|
|---|
| 462 |
// Now try to match it. |
|---|
| 463 |
for( i=0; i<NUM_ENTITY; ++i ) |
|---|
| 464 |
{ |
|---|
| 465 |
if ( entity[i].str == sin ) |
|---|
| 466 |
{ |
|---|
| 467 |
value[0] = entity[i].chr; |
|---|
| 468 |
length = 1; |
|---|
| 469 |
return entity[i].str.length; |
|---|
| 470 |
} |
|---|
| 471 |
} |
|---|
| 472 |
|
|---|
| 473 |
// So it wasn't an entity, its unrecognized, or something like that. |
|---|
| 474 |
value[0] = sin[0]; // Don't put back the last one, since we return it! |
|---|
| 475 |
length = 1; // Leave unrecognized entities - this doesn't really work. |
|---|
| 476 |
// Just writes strange XML. |
|---|
| 477 |
return 1; |
|---|
| 478 |
} |
|---|
| 479 |
|
|---|
| 480 |
|
|---|
| 481 |
// Get a character, while interpreting entities. |
|---|
| 482 |
// The length can be from 0 to 4 bytes. |
|---|
| 483 |
static int GetChar( char[] sin, inout char[] _value, inout int length, TiXmlEncoding encoding ) |
|---|
| 484 |
in { assert(sin); } |
|---|
| 485 |
body |
|---|
| 486 |
{ |
|---|
| 487 |
if ( encoding == TiXmlEncoding.TIXML_ENCODING_UTF8 ) |
|---|
| 488 |
{ |
|---|
| 489 |
length = utf8ByteTable[ sin[0] ]; |
|---|
| 490 |
assert( length >= 0 && length < 5 ); |
|---|
| 491 |
} |
|---|
| 492 |
else |
|---|
| 493 |
{ |
|---|
| 494 |
length = 1; |
|---|
| 495 |
} |
|---|
| 496 |
|
|---|
| 497 |
if ( length == 1 ) |
|---|
| 498 |
{ |
|---|
| 499 |
if ( sin[0] == '&' ) |
|---|
| 500 |
return GetEntity( sin, _value, length, encoding ); |
|---|
| 501 |
_value = sin; |
|---|
| 502 |
return 1; |
|---|
| 503 |
} |
|---|
| 504 |
else if ( length > 0 ) |
|---|
| 505 |
{ |
|---|
| 506 |
for( int i=0; i < sin.length && i<length; ++i ) { |
|---|
| 507 |
_value[i] = sin[i]; |
|---|
| 508 |
} |
|---|
| 509 |
return length; |
|---|
| 510 |
} |
|---|
| 511 |
else |
|---|
| 512 |
{ |
|---|
| 513 |
return 0; |
|---|
| 514 |
} |
|---|
| 515 |
} |
|---|
| 516 |
|
|---|
| 517 |
// Puts a string to a stream, expanding entities as it goes. |
|---|
| 518 |
// Note this should not contian the '<', '>', etc, or they will be transformed into entities! |
|---|
| 519 |
static void PutString( char[] str, OutputStream outs ) |
|---|
| 520 |
{ |
|---|
| 521 |
char[] buffer; |
|---|
| 522 |
PutString( str, buffer ); |
|---|
| 523 |
outs.writeString(buffer); |
|---|
| 524 |
} |
|---|
| 525 |
|
|---|
| 526 |
static void PutString( char[] str, out char[] outString ) |
|---|
| 527 |
{ |
|---|
| 528 |
int i=0; |
|---|
| 529 |
|
|---|
| 530 |
foreach (int i,char c;str) |
|---|
| 531 |
{ |
|---|
| 532 |
ubyte _c = cast(ubyte) str[i]; |
|---|
| 533 |
|
|---|
| 534 |
if ( _c == '&' |
|---|
| 535 |
&& i < ( str.length - 2 ) |
|---|
| 536 |
&& str[i+1] == '#' |
|---|
| 537 |
&& str[i+2] == 'x' ) |
|---|
| 538 |
{ |
|---|
| 539 |
// Hexadecimal character reference. |
|---|
| 540 |
// Pass through unchanged. |
|---|
| 541 |
// © -- copyright symbol, for example. |
|---|
| 542 |
// |
|---|
| 543 |
// The -1 is a bug fix from Rob Laveaux. It keeps |
|---|
| 544 |
// an overflow from happening if there is no ';'. |
|---|
| 545 |
// There are actually 2 ways to exit this loop - |
|---|
| 546 |
// while fails (error case) and break (semicolon found). |
|---|
| 547 |
// However, there is no mechanism (currently) for |
|---|
| 548 |
// this function to return an error. |
|---|
| 549 |
while ( i<str.length-1 ) |
|---|
| 550 |
{ |
|---|
| 551 |
outString ~= str[i]; |
|---|
| 552 |
++i; |
|---|
| 553 |
if ( str[i] == ';' ) |
|---|
| 554 |
break; |
|---|
| 555 |
} |
|---|
| 556 |
} |
|---|
| 557 |
else if ( c == '&' ) |
|---|
| 558 |
{ |
|---|
| 559 |
outString ~= entity[0].str; |
|---|
| 560 |
++i; |
|---|
| 561 |
} |
|---|
| 562 |
else if ( c == '<' ) |
|---|
| 563 |
{ |
|---|
| 564 |
outString ~= entity[1].str; |
|---|
| 565 |
} |
|---|
| 566 |
else if ( c == '>' ) |
|---|
| 567 |
{ |
|---|
| 568 |
outString ~= entity[2].str; |
|---|
| 569 |
++i; |
|---|
| 570 |
} |
|---|
| 571 |
else if ( c == '\"' ) |
|---|
| 572 |
{ |
|---|
| 573 |
outString ~= entity[3].str; |
|---|
| 574 |
++i; |
|---|
| 575 |
} |
|---|
| 576 |
else if ( c == '\'' ) |
|---|
| 577 |
{ |
|---|
| 578 |
outString ~= entity[4].str; |
|---|
| 579 |
++i; |
|---|
| 580 |
} |
|---|
| 581 |
else if ( c < 32 ) |
|---|
| 582 |
{ |
|---|
| 583 |
// Easy pass at non-alpha/numeric/symbol |
|---|
| 584 |
// Below 32 is symbolic. |
|---|
| 585 |
char[] buf = format("&#x%02X;", cast(uint) ( c & 0xff )); |
|---|
| 586 |
|
|---|
| 587 |
|
|---|
| 588 |
outString ~= buf; |
|---|
| 589 |
++i; |
|---|
| 590 |
} |
|---|
| 591 |
else |
|---|
| 592 |
{ |
|---|
| 593 |
outString ~= c; // somewhat more efficient function call. |
|---|
| 594 |
++i; |
|---|
| 595 |
} |
|---|
| 596 |
} |
|---|
| 597 |
} |
|---|
| 598 |
|
|---|
| 599 |
|
|---|
| 600 |
// Return true if the next characters in the stream are any of the endTag sequences. |
|---|
| 601 |
// Ignore case only works for english, and should only be relied on when comparing |
|---|
| 602 |
// to English words: StringEqual( p, "version", true ) is fine. |
|---|
| 603 |
static bool StringEqual( char[] p, |
|---|
| 604 |
char[] endTag, |
|---|
| 605 |
bool ignoreCase, |
|---|
| 606 |
TiXmlEncoding encoding ) |
|---|
| 607 |
in { assert(p); assert(endTag); assert(p.length > 0); } |
|---|
| 608 |
body |
|---|
| 609 |
{ |
|---|
| 610 |
if ( ignoreCase ) |
|---|
| 611 |
return (ifind(p, endTag) == 0); |
|---|
| 612 |
else |
|---|
| 613 |
return (find(p, endTag) == 0); |
|---|
| 614 |
} |
|---|
| 615 |
|
|---|
| 616 |
|
|---|
| 617 |
static char[][] errorString = [ |
|---|
| 618 |
"No error", |
|---|
| 619 |
"Error", |
|---|
| 620 |
"Failed to open file", |
|---|
| 621 |
"Memory allocation failed.", |
|---|
| 622 |
"Error parsing Element.", |
|---|
| 623 |
"Failed to read Element name", |
|---|
| 624 |
"Error reading Element value.", |
|---|
| 625 |
"Error reading Attributes.", |
|---|
| 626 |
"Error: empty tag.", |
|---|
| 627 |
"Error reading end tag.", |
|---|
| 628 |
"Error parsing Unknown.", |
|---|
| 629 |
"Error parsing Comment.", |
|---|
| 630 |
"Error parsing Declaration.", |
|---|
| 631 |
"Error document empty.", |
|---|
| 632 |
"Error null (0) or unexpected EOF found in input stream.", |
|---|
| 633 |
"Error parsing CDATA.", |
|---|
| 634 |
"Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.", |
|---|
| 635 |
]; |
|---|
| 636 |
|
|---|
| 637 |
TiXmlCursor location; |
|---|
| 638 |
|
|---|
| 639 |
/// Field containing a generic user pointer |
|---|
| 640 |
void* userData; |
|---|
| 641 |
|
|---|
| 642 |
// None of these methods are reliable for any language except English. |
|---|
| 643 |
// Good for approximation, not great for accuracy. |
|---|
| 644 |
static int IsAlpha( ubyte anyByte, TiXmlEncoding encoding ) |
|---|
| 645 |
{ |
|---|
| 646 |
// This will only work for low-ascii, everything else is assumed to be a valid |
|---|
| 647 |
// letter. I'm not sure this is the best approach, but it is quite tricky trying |
|---|
| 648 |
// to figure out alhabetical vs. not across encoding. So take a very |
|---|
| 649 |
// conservative approach. |
|---|
| 650 |
return u8isUniAlpha(cast(char)anyByte); |
|---|
| 651 |
} |
|---|
| 652 |
|
|---|
| 653 |
static int IsAlphaNum( ubyte anyByte, TiXmlEncoding encoding ) |
|---|
| 654 |
{ |
|---|
| 655 |
if ( anyByte < 127 ) |
|---|
| 656 |
return (IsAlpha(anyByte,encoding) || std.ctype.isdigit(anyByte)); |
|---|
| 657 |
else |
|---|
| 658 |
return 1; |
|---|
| 659 |
} |
|---|
| 660 |
|
|---|
| 661 |
|
|---|
| 662 |
static int ToLower( int v, TiXmlEncoding encoding ) |
|---|
| 663 |
{ |
|---|
| 664 |
return std.ctype.tolower(v); |
|---|
| 665 |
} |
|---|
| 666 |
static void ConvertUTF32ToUTF8( ulong input, inout char[] output, inout int length ) |
|---|
| 667 |
{ |
|---|
| 668 |
const ulong BYTE_MASK = 0xBF; |
|---|
| 669 |
const ulong BYTE_MARK |
|---|