Changeset 43
- Timestamp:
- 04/17/05 11:02:13 (4 years ago)
- Files:
-
- trunk/xml/ILexerStream.d (modified) (1 diff)
- trunk/xml/IXMLConsumer.d (modified) (1 diff)
- trunk/xml/IXMLConsumerAdapter.d (modified) (1 diff)
- trunk/xml/IXMLProvider.d (modified) (1 diff)
- trunk/xml/OfflineProvider.d (modified) (1 diff)
- trunk/xml/Position.d (modified) (1 diff)
- trunk/xml/SimpleStream.d (modified) (1 diff)
- trunk/xml/XMLException.d (modified) (1 diff)
- trunk/xml/XMLLexer.d (modified) (13 diffs)
- trunk/xml/XMLParser.d (modified) (5 diffs)
- trunk/xml/XMLToken.d (modified) (2 diffs)
- trunk/xmltest.d (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/xml/ILexerStream.d
r42 r43 27 27 module xml.ILexerStream; 28 28 29 import xml.Position;29 private import xml.Position; 30 30 31 31 interface ILexerStream{ trunk/xml/IXMLConsumer.d
r42 r43 26 26 module xml.IXMLConsumer; 27 27 28 import xml.XMLAttributes;28 private import xml.XMLAttributes; 29 29 30 30 interface IXMLConsumer{ trunk/xml/IXMLConsumerAdapter.d
r42 r43 26 26 module xml.IXMLConsumerAdapter; 27 27 28 import xml.IXMLConsumer; 28 private import xml.IXMLConsumer; 29 private import xml.XMLAttributes; 29 30 30 31 class IXMLConsumerAdapter : IXMLConsumer{ trunk/xml/IXMLProvider.d
r42 r43 26 26 module xml.IXMLProvider; 27 27 28 import xml.ILexerStream;28 private import xml.ILexerStream; 29 29 30 30 interface IXMLProvider{ 31 ILexerStream resolveURI(char[] uri);31 ILexerStream resolveURI(char[] pubid,char[] system); 32 32 } trunk/xml/OfflineProvider.d
r42 r43 26 26 module xml.OfflineProvider; 27 27 28 import xml.IXMLProvider; 28 private import xml.ILexerStream; 29 private import xml.IXMLProvider; 30 31 private import std.file; 29 32 30 33 class OfflineProvider : IXMLProvider{ 31 ILexerStream resolveURI(char[] uri){ 32 return null; // do nothing 34 ILexerStream resolveURI(char[] pubid,char[] system){ 35 //throw out the public identifier 36 37 if(system[0..5] == "http:"){ 38 return null; 39 } 40 41 // attempt to load the systemURI as a file 42 33 43 } 34 44 } trunk/xml/Position.d
r42 r43 26 26 module xml.Position; 27 27 28 import std.string;28 private import std.string; 29 29 30 30 struct Position{ trunk/xml/SimpleStream.d
r42 r43 26 26 module xml.SimpleStream; 27 27 28 import xml.ILexerStream; 29 import xml.XMLException; 30 31 28 private import xml.Position; 29 private import xml.ILexerStream; 30 private import xml.XMLException; 32 31 33 32 class SimpleStream : ILexerStream{ trunk/xml/XMLException.d
r42 r43 26 26 module xml.XMLException; 27 27 28 import xml.Position;28 private import xml.Position; 29 29 30 import std.string;30 private import std.string; 31 31 32 32 class XMLException : Exception{ trunk/xml/XMLLexer.d
r42 r43 26 26 module xml.XMLLexer; 27 27 28 import xml.ILexerStream;29 import xml.XMLToken;30 import xml.XMLException;31 32 import std.stdio;33 import std.utf;34 import std.conv;35 import std.string;28 private import xml.ILexerStream; 29 private import xml.XMLToken; 30 private import xml.XMLException; 31 32 private import std.stdio; 33 private import std.utf; 34 private import std.conv; 35 private import std.string; 36 36 37 37 class XMLLexer{ … … 47 47 position = 0; 48 48 tokens.length = 0; 49 } 50 51 public XMLToken[] getTokens(){ 52 return tokens; 49 53 } 50 54 … … 94 98 } 95 99 100 /* 101 used to allow parsed entities to inject themselves into the token stream *at* the read position 102 note: this will have the side effect of changing the result of peek() before and after the call, 103 since the position will point to a different token 104 */ 105 protected void insertTokens(XMLToken[] newTokens){ 106 tokens = tokens[0..position] ~ newTokens ~ tokens[position..$]; 107 } 108 96 109 private void addToken(char value){ 97 110 addToken(value,toString(value)); … … 112 125 113 126 protected void tokenize(ILexerStream stream){ 127 assert(this.stream == null); 114 128 this.stream = stream; 115 129 reset(); … … 157 171 this.stream = null; 158 172 } 173 174 /* hook to allow parsed entities to function */ 175 public void tokenizeDTD(ILexerStream stream){ 176 assert(this.stream == null); 177 this.stream = stream; 178 reset(); 179 tokenizeDTDElement(); 180 stream = null; 181 } 159 182 160 183 private void tokenizePI(){ … … 238 261 break; 239 262 263 case '%': 264 // handle parsed entity 265 //TODO: break into separate function 266 addToken(stream.get()); 267 addToken(TokName,stream.getUntil(';')); 268 addToken(stream.get()); 269 break; 270 240 271 case ']': 241 272 addToken(stream.get()); … … 254 285 } 255 286 } 256 } 257 287 } 288 258 289 private void tokenizeDTDElement(){ 259 290 while(stream.hasMore()){ … … 274 305 case ')': 275 306 case '*': 307 case '%': 308 case ';': 276 309 addToken(stream.get()); 277 310 break; … … 292 325 tokenizeEntity(); 293 326 break; 294 295 case '%': 296 //tokenizeParsedEntity(); 297 addToken(stream.get()); 298 break; 299 327 300 328 case '#': 301 329 tokenizeNmtoken(); … … 345 373 private void tokenizeString(char terminator){ 346 374 char[] value; 347 char[ 2] term;375 char[4] term; 348 376 term[0] = terminator; 349 377 term[1] = '&'; 378 term[2] = '%'; // capture tokens useful for PE-sensitive areas 379 term[3] = ';'; 350 380 351 381 addToken(TokString,toString(terminator)); … … 353 383 while(stream.hasMore()){ 354 384 addToken(TokChars,stream.getUntilIn(term)); 355 if(stream.peek() == terminator){ 385 char ch = stream.peek(); 386 if(ch == terminator){ 356 387 addToken(TokString,toString(stream.get())); 357 388 return; 358 389 } 359 else tokenizeEntity(); 390 else if(ch == '&'){ 391 tokenizeEntity(); 392 } 393 else{ 394 addToken(stream.get); // plug '%' and ';' in as separate bits 395 } 360 396 } 361 397 throw new XMLException(stream.getPosition(),"unexpected end of file"); … … 400 436 newValue[0] = cast(dchar)value; 401 437 402 addToken(Tok Entity,toUTF8(newValue));438 addToken(TokChars,toUTF8(newValue)); 403 439 } 404 440 else{ … … 407 443 stream.get(); //eat ';' token 408 444 } 409 410 public void tokenizeParsedEntity(){411 //TODO: could invovle pushing data back into the stream (ick!)412 }413 445 414 446 private static char[] toString(char ch){ trunk/xml/XMLParser.d
r42 r43 26 26 module xml.XMLParser; 27 27 28 import xml.IXMLProvider; 29 import xml.IXMLConsumer; 30 import xml.ILexerStream; 31 import xml.XMLLexer; 32 import xml.XMLToken; 33 34 import xml.SimpleStream; 35 import xml.OfflineProvider; 36 import xml.XMLAttributes; 37 38 import std.string; 28 private import xml.IXMLProvider; 29 private import xml.IXMLConsumer; 30 private import xml.ILexerStream; 31 private import xml.XMLLexer; 32 private import xml.XMLToken; 33 private import xml.XMLException; 34 35 private import xml.SimpleStream; 36 private import xml.OfflineProvider; 37 private import xml.XMLAttributes; 38 39 private import std.string; 39 40 40 41 /* … … 50 51 51 52 char[][char[]] entities; 53 XMLToken[][char[]] parsedEntities; 52 54 53 55 public this(){ … … 154 156 155 157 consumer.xmlStartDoctype(name,pubidLiteral,systemLiteral); 156 157 158 //TODO: implement doctype parsing 158 159 159 // parse all the elements under the doctype 160 160 if(peek().type == TokStartDTD){ 161 161 getNext(); // eat '[' token 162 163 while(peek().type != TokEndDTD) getNext(); 164 getNext(); // eat ']' token 165 162 parseDTD(); 166 163 getNextOptional(TokSpace); 167 164 } … … 170 167 171 168 consumer.xmlEndDoctype(name,pubidLiteral,systemLiteral); 169 } 170 171 private void parseDTD(){ 172 while(hasMore()){ 173 switch(peek().type){ 174 case TokSpace: 175 getNext(); // eat token; 176 break; 177 178 case TokComment: 179 consumer.xmlComment(getNext().value); 180 break; 181 182 case TokPercent: 183 getNext(); // eat token 184 parseParsedEntity(); 185 break; 186 187 case TokStartElem: 188 getNext(); // eat token; 189 190 if(peek().type == TokQuestion){ 191 getNext(); // eat token; 192 parseProcesingInstruction(); 193 } 194 else{ 195 getNext(TokBang); // next must be a '!' 196 197 // check out the element name 198 XMLToken tok = getNext(TokName); 199 switch(tok.value){ 200 case "ELEMENT": 201 parseDTDElement(); 202 break; 203 204 case "ATTLIST": 205 parseDTDAttlist(); 206 break; 207 208 case "ENTITY": 209 parseDTDEntity(); 210 break; 211 212 case "NOTATION": 213 parseDTDNotation(); 214 break; 215 216 default: 217 throw new XMLException(tok.pos,"invalid DTD element: '" ~ tok.value ~ "'"); 218 } 219 } 220 break; 221 222 case TokEndDTD: 223 getNext(); // eat ']' token 224 return; 225 226 default: 227 // shouldn't ever get here 228 throw new XMLException(peek().pos,"expected DTD element or closing ']'"); 229 break; 230 } 231 } 232 } 233 234 private void parseDTDElement(){ 235 /* 236 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' [VC: Unique Element Type Declaration] 237 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 238 */ 239 240 char[] name; 241 242 getNext(TokSpace); 243 name = getNext(TokName).value; 244 245 //TODO: unbreak me 246 while(hasMore()){ 247 if(getNext().type == TokEndElem){ 248 break; 249 } 250 } 251 } 252 253 private void parseDTDAttlist(){ 254 /* 255 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' [VC: Unique Element Type Declaration] 256 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 257 */ 258 259 //TODO: unbreak me 260 while(hasMore()){ 261 if(getNext().type == TokEndElem){ 262 break; 263 } 264 } 265 } 266 267 private void parseDTDEntity(){ 268 /* 269 [70] EntityDecl ::= GEDecl | PEDecl 270 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 271 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 272 [73] EntityDef ::= EntityValue| (ExternalID NDataDecl?) 273 [74] PEDef ::= EntityValue | ExternalID 274 275 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' 276 | "'" ([^%&'] | PEReference | Reference)* "'" 277 278 [67] Reference ::= EntityRef | CharRef 279 [68] EntityRef ::= '&' Name ';' 280 [69] PEReference ::= '%' Name ';' 281 282 [75] ExternalID ::= 'SYSTEM' S SystemLiteral 283 284 [76] NDataDecl ::= S 'NDATA' S Name 285 | 'PUBLIC' S PubidLiteral S SystemLiteral 286 */ 287 char[] name, value; 288 289 getNext(TokSpace); 290 291 switch(peek().type){ 292 case TokPercent: 293 // handle parsed entity 294 getNext(); // eat token; 295 getNext(TokSpace); 296 297 name = getNext(TokName).value; 298 getNext(TokSpace); 299 300 switch(peek().type){ 301 case TokString: 302 // handle parsed entity literal value 303 try{ 304 // use a new lexer to get the tokens needed to represent this entity 305 XMLLexer lexer = new XMLLexer(); 306 lexer.tokenizeDTD(new SimpleStream(parsePESensitiveString())); 307 parsedEntities[name] = lexer.getTokens(); 308 } 309 catch(XMLException e){ 310 throw new XMLException(peek().pos,"parsed entity value is not well-formed"); 311 } 312 break; 313 314 case TokName: 315 // handle parsed entity external value via provider 316 break; 317 318 default: 319 throw new XMLException(peek().pos,"invalid token: '" ~ peek().value ~ "', expected '%' or name"); 320 break; 321 } 322 break; 323 324 case TokName: 325 // handle normal entity 326 name = getNext(TokName).value; 327 getNext(TokSpace); 328 329 if(peek().type == TokString){ 330 entities[name] = parsePESensitiveString(); 331 } 332 else{ 333 // resolve external id via provider 334 // get ndata if present, create a parsed identity and set as value 335 } 336 break; 337 338 default: 339 throw new XMLException(peek().pos,"invalid token: '" ~ peek().value ~ "', expected '%' or name"); 340 break; 341 } 342 343 //TODO: unbreak me 344 while(hasMore()){ 345 if(getNext().type == TokEndElem){ 346 break; 347 } 348 } 349 } 350 351 private void parseDTDNotation(){ 352 /* 353 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' [VC: Unique Notation Name] 354 [83] PublicID ::= 'PUBLIC' S PubidLiteral 355 356 // note: this is identical to how the <?xml?> tag handles things 357 [75] ExternalID ::= 'SYSTEM' S SystemLiteral 358 | 'PUBLIC' S PubidLiteral S SystemLiteral 359 */ 360 361 //TODO: unbreak me 362 while(hasMore()){ 363 if(getNext().type == TokEndElem){ 364 break; 365 } 366 } 172 367 } 173 368 … … 314 509 break; 315 510 316 case TokChars:317 value ~= getNext().value;318 break;319 320 511 case TokString: 321 512 getNext(); // eat string terminator 322 513 return value; 323 default: 324 break; // there are no other possibilities 514 515 default: // mesh everything else 516 value ~= getNext().value; 517 break; 325 518 } 326 519 } 327 520 return ""; // never get here 328 521 } 522 523 /* allows for PE handling inside of a string */ 524 private char[] parsePESensitiveString(){ 525 //assemble the string values between TokString tokens 526 char[] value; 527 528 getNext(); // eat string terminator 529 while(hasMore()){ 530 switch(peek().type){ 531 case TokEntity: 532 value ~= parseEntity(); 533 break; 534 535 case TokString: 536 getNext(); // eat string terminator 537 return value; 538 539 case TokPercent: 540 getNext(); 541 parseParsedEntity(); 542 break; 543 544 default: // mesh everything else 545 value ~= getNext().value; 546 break; 547 } 548 } 549 return ""; // never get here 550 } 329 551 330 552 private char[] parseEntity(){ 331 553 //return the entity value for a given entity 332 554 char[] name = getNext().value; 333 char[] value = entities[name]; 334 if(value == null){ 555 if(!(name in entities)){ 335 556 throw new XMLException(peek().pos,"unrecognized entity '&" ~ name ~ ";'"); 336 557 } 337 return value; 558 return entities[name]; 559 } 560 561 /* Things get odd here because PE's can come in one of two ways: 562 TokPercent TokName TokSemi 563 TokPercent TokChars TokSemi 564 The latter is due to how the lexer attempts to make string handling context-insensitive 565 The result is that the lexer stays separate from the parser, at a small cost here 566 567 The routine injects the contents of the PE back into the lexer token stream 568 TODO: add recursion checks 569 */ 570 private void parseParsedEntity(){ 571 char[] name; 572 if(peek().type == TokChars){ 573 name = getNext().value; 574 } 575 else{ 576 name = getNext(TokName).value; // do this one last since it generates a better error message 577 } 578 if(!(name in parsedEntities)){ 579 throw new XMLException(peek().pos,"unrecognized parsed entity '%" ~ peek().value ~ ";'"); 580 } 581 else{ 582 insertTokens(parsedEntities[name]); 583 } 338 584 } 339 585 } trunk/xml/XMLToken.d
r42 r43 26 26 module xml.XMLToken; 27 27 28 import xml.Position;28 private import xml.Position; 29 29 30 import std.string;30 private import std.string; 31 31 32 32 enum{ … … 58 58 TokRParen = ')', 59 59 TokStar = '*', 60 TokPercent = '%', 61 TokSemi = ';', 60 62 61 63 TokNull trunk/xmltest.d
r41 r43 27 27 import xml.IXMLConsumer; 28 28 import xml.IXMLConsumerAdapter; 29 import xml.XMLAttributes; 29 30 30 31 import std.stdio; … … 66 67 r"xmlconf\ibm\valid\P03\ibm03v01.xml", 67 68 68 /* fails: needs doctype support to allow for entity definitions 69 69 70 r"xmlconf\ibm\valid\P09\ibm09v01.xml", 70 71 r"xmlconf\ibm\valid\P09\ibm09v02.xml", 71 r"xmlconf\ibm\valid\P09\ibm09v03.xml", 72 // r"xmlconf\ibm\valid\P09\ibm09v03.xml", // fails: needs local dtd loading support 72 73 r"xmlconf\ibm\valid\P09\ibm09v04.xml", 73 r"xmlconf\ibm\valid\P09\ibm09v05.xml", 74 // r"xmlconf\ibm\valid\P09\ibm09v05.xml", // fails: needs local dtd loading support 74 75 75 76 r"xmlconf\ibm\valid\P10\ibm10v01.xml", … … 81 82 r"xmlconf\ibm\valid\P10\ibm10v07.xml", 82 83 r"xmlconf\ibm\valid\P10\ibm10v08.xml", 83 */84 84 85 r"xmlconf\ibm\valid\P11\ibm11v01.xml", 85 86 r"xmlconf\ibm\valid\P11\ibm11v02.xml",
