diff --git a/ChangeLog b/ChangeLog index 67cc881f..1524135a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Sun Jan 5 02:23:20 CET 2003 Daniel Veillard + + * xmlreader.c: seriously changed the way data are pushed to + the underlying parser, go by block of 512 bytes instead of + tryng to detect tag boundaries at that level. Changed the + way empty element are detected and tagged. + * python/tests/reader.py python/tests/reader2.py + python/tests/reader3.py: small changes mostly due to context + reporting being different and DTD node being reported. Some + errors previously undetected are now caught and fixed. + * doc/xmlreader.html: flagged last section as TODO + Sat Jan 4 20:40:28 CET 2003 Daniel Veillard * python/libxml.py: integrated the Python 2.2 optimizations diff --git a/doc/xmlreader.html b/doc/xmlreader.html index e818a77c..b44a3ea8 100644 --- a/doc/xmlreader.html +++ b/doc/xmlreader.html @@ -390,6 +390,8 @@ the validation feature is just:

Entities substitution

+

@@TODO@@

+

Daniel Veillard

diff --git a/python/tests/reader.py b/python/tests/reader.py index 5fd61603..e9671718 100755 --- a/python/tests/reader.py +++ b/python/tests/reader.py @@ -14,56 +14,56 @@ input = libxml2.inputBuffer(f) reader = input.newTextReader("test1") ret = reader.Read() if ret != 1: - print "Error reading to first element" + print "test1: Error reading to first element" sys.exit(1) if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 1 or reader.HasAttributes() != 0: - print "Error reading the first element" + print "test1: Error reading the first element" sys.exit(1) ret = reader.Read() if ret != 1: - print "Error reading to second element" + print "test1: Error reading to second element" sys.exit(1) if reader.Name() != "b" or reader.IsEmptyElement() != 1 or \ reader.NodeType() != 1 or reader.HasAttributes() != 1: - print "Error reading the second element" + print "test1: Error reading the second element" sys.exit(1) ret = reader.Read() if ret != 1: - print "Error reading to third element" + print "test1: Error reading to third element" sys.exit(1) if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 1 or reader.HasAttributes() != 0: - print "Error reading the third element" + print "test1: Error reading the third element" sys.exit(1) ret = reader.Read() if ret != 1: - print "Error reading to text node" + print "test1: Error reading to text node" sys.exit(1) if reader.Name() != "#text" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 3 or reader.HasAttributes() != 0 or \ reader.Value() != "content of c": - print "Error reading the text node" + print "test1: Error reading the text node" sys.exit(1) ret = reader.Read() if ret != 1: - print "Error reading to end of third element" + print "test1: Error reading to end of third element" sys.exit(1) if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 15 or reader.HasAttributes() != 0: - print "Error reading the end of third element" + print "test1: Error reading the end of third element" sys.exit(1) ret = reader.Read() if ret != 1: - print "Error reading to end of first element" + print "test1: Error reading to end of first element" sys.exit(1) if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 15 or reader.HasAttributes() != 0: - print "Error reading the end of first element" + print "test1: Error reading the end of first element" sys.exit(1) ret = reader.Read() if ret != 0: - print "Error reading to end of document" + print "test1: Error reading to end of document" sys.exit(1) # @@ -239,7 +239,7 @@ if reader.MoveToNextAttribute() != 0: # # a couple of tests for namespace nodes # -f = StringIO.StringIO("""""") +f = StringIO.StringIO("""""") input = libxml2.inputBuffer(f) reader = input.newTextReader("test6") ret = reader.Read() @@ -256,7 +256,7 @@ if reader.NamespaceUri() != "http://www.w3.org/2000/xmlns/" or \ print "test6: failed to read the namespace node" sys.exit(1) -f = StringIO.StringIO("""""") +f = StringIO.StringIO("""""") input = libxml2.inputBuffer(f) reader = input.newTextReader("test7") ret = reader.Read() diff --git a/python/tests/reader2.py b/python/tests/reader2.py index 6333aee4..7519a901 100755 --- a/python/tests/reader2.py +++ b/python/tests/reader2.py @@ -14,13 +14,13 @@ libxml2.debugMemory(1) err="" expect="""../../test/valid/rss.xml:172: validity error: Element rss does not carry attribute version - ^ + ^ ../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined -

+

An arc is contained within an ^ -../../test/valid/xlink.xml:529: validity error: attribute def line 199 references an unknown ID "dt-xlg" - - ^ +../../test/valid/xlink.xml:530: validity error: attribute def line 199 references an unknown ID "dt-xlg" + +^ """ def callback(ctx, str): global err @@ -61,7 +61,8 @@ s = """ bbb """ -expect="""1,test +expect="""10,test +1,test 3,#text 1,x 1,c @@ -110,7 +111,8 @@ s = """ """ tst_ent = """hello""" -expect="""1 test +expect="""10 test +1 test 3 #text 1 x 3 #text @@ -161,7 +163,8 @@ s = """""" -expect="""1 test 0 +expect="""10 test 0 +1 test 0 3 #text 1 1 x 1 1 y 2 @@ -213,7 +216,8 @@ s = """""" -expect="""1 test 0 +expect="""10 test 0 +1 test 0 3 #text 1 5 x 1 3 #text 1 diff --git a/python/tests/reader3.py b/python/tests/reader3.py index 1affbe98..2a4fbe07 100755 --- a/python/tests/reader3.py +++ b/python/tests/reader3.py @@ -1,6 +1,6 @@ #!/usr/bin/python -u # -# this tests the validation with the XmlTextReader interface +# this tests the entities substitutions with the XmlTextReader interface # import sys import StringIO @@ -22,6 +22,11 @@ f = StringIO.StringIO(docstr) input = libxml2.inputBuffer(f) reader = input.newTextReader("test_noent") ret = reader.Read() +if ret != 1: + print "Error reading to root" + sys.exit(1) +if reader.Name() == "doc" or reader.NodeType() == 10: + ret = reader.Read() if ret != 1: print "Error reading to root" sys.exit(1) @@ -55,6 +60,11 @@ input = libxml2.inputBuffer(f) reader = input.newTextReader("test_noent") reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) ret = reader.Read() +if ret != 1: + print "Error reading to root" + sys.exit(1) +if reader.Name() == "doc" or reader.NodeType() == 10: + ret = reader.Read() if ret != 1: print "Error reading to root" sys.exit(1) diff --git a/xmlreader.c b/xmlreader.c index 0577334b..316ff15a 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -15,6 +15,8 @@ * - provide an API to expand part of the tree * - provide an API to preserve part of the tree * - Streaming XInclude support + * - validation against a provided DTD + * - XML Schemas validation * - setting(s) for NoBlanks * - performances and tuning ... */ @@ -99,7 +101,6 @@ struct _xmlTextReader { xmlNodePtr curnode;/* current attribute node */ int depth; /* depth of the current node */ xmlNodePtr faketext;/* fake xmlNs chld */ - int wasempty;/* was the last node empty */ /* entity stack when traversing entities content */ xmlNodePtr ent; /* Current Entity Ref Node */ @@ -108,6 +109,8 @@ struct _xmlTextReader { xmlNodePtr *entTab; /* array of entities */ }; +static const char *xmlTextReaderIsEmpty = "This element is empty"; + #ifdef DEBUG_READER static void xmlTextReaderDebug(xmlTextReaderPtr reader) { @@ -222,13 +225,10 @@ xmlTextReaderStartElement(void *ctx, const xmlChar *fullname, */ origctxt = reader->ctxt; reader->startElement(ctx, fullname, atts); -#if 0 - 123 - if (origctxt->validate) { - ctxt->valid &= xmlValidatePushElement(&origctxt->vctxt, - ctxt->myDoc, ctxt->node, fullname); - } -#endif + if ((ctxt->node != NULL) && (ctxt->input != NULL) && + (ctxt->input->cur != NULL) && (ctxt->input->cur[0] == '/') && + (ctxt->input->cur[1] == '>')) + ctxt->node->_private = (void *) xmlTextReaderIsEmpty; } if (reader != NULL) reader->state = XML_TEXTREADER_ELEMENT; @@ -258,12 +258,6 @@ xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) { reader->endElement(ctx, fullname); } - if (reader != NULL) { - if (reader->state == XML_TEXTREADER_ELEMENT) - reader->wasempty = 1; - else - reader->wasempty = 0; - } } /** @@ -290,13 +284,6 @@ xmlTextReaderCharacters(void *ctx, const xmlChar *ch, int len) * when processing an entity, the context may have been changed */ origctxt = reader->ctxt; - -#if 0 - 123 - if (origctxt->validate) { - ctxt->valid &= xmlValidatePushCData(&origctxt->vctxt, ch, len); - } -#endif } } @@ -319,10 +306,6 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len) #endif if ((reader != NULL) && (reader->cdataBlock != NULL)) { reader->cdataBlock(ctx, ch, len); - - if (ctxt->validate) { - ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len); - } } } @@ -365,8 +348,12 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { } else break; } - if (inbuf->content[cur] == '>') { - cur = cur + 1; + /* + * parse by block of 512 bytes + */ + if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) { + if (cur < inbuf->use) + cur = cur + 1; val = xmlParseChunk(reader->ctxt, (const char *) &inbuf->content[reader->cur], cur - reader->cur, 0); @@ -412,7 +399,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { if (reader->mode == XML_TEXTREADER_MODE_EOF) { if (reader->mode != XML_TEXTREADER_DONE) { val = xmlParseChunk(reader->ctxt, - (const char *) &inbuf->content[reader->cur], 0, 1); + (const char *) &inbuf->content[reader->cur], + cur - reader->cur, 1); + reader->cur = cur; reader->mode = XML_TEXTREADER_DONE; } } @@ -564,7 +553,7 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) { */ int xmlTextReaderRead(xmlTextReaderPtr reader) { - int val, olddepth = 0, wasempty = 0; + int val, olddepth = 0; xmlTextReaderState oldstate = 0; xmlNodePtr oldnode = NULL; @@ -588,14 +577,11 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { if (val < 0) return(-1); } while ((reader->ctxt->node == NULL) && - (reader->mode != XML_TEXTREADER_MODE_EOF)); + ((reader->mode != XML_TEXTREADER_MODE_EOF) && + (reader->mode != XML_TEXTREADER_DONE))); if (reader->ctxt->node == NULL) { if (reader->ctxt->myDoc != NULL) { reader->node = reader->ctxt->myDoc->children; - if ((reader->ctxt->input != NULL) && - (reader->ctxt->input->cur != NULL) && - (reader->ctxt->input->cur[-2] != '/')) - reader->wasempty = -1; } if (reader->node == NULL) return(-1); @@ -610,16 +596,6 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { oldstate = reader->state; olddepth = reader->ctxt->nodeNr; oldnode = reader->node; - /* - * the

vs.

distinction at the API level royally sucks, - * Microsoft priviledge ... - */ - if (reader->wasempty == -1) - wasempty = 0; - else - wasempty = (((reader->wasempty == 1) && (reader->ctxt->node != NULL) && - (reader->ctxt->node->last == reader->node)) || - (reader->node != reader->ctxt->node)); get_next_node: /* @@ -653,7 +629,8 @@ get_next_node: if (reader->node->next != NULL) { if ((oldstate == XML_TEXTREADER_ELEMENT) && (reader->node->type == XML_ELEMENT_NODE) && - (wasempty == 0)) { + (reader->node->children == NULL) && + (reader->node->_private != (void *)xmlTextReaderIsEmpty)) { reader->state = XML_TEXTREADER_END; goto node_found; } @@ -675,7 +652,8 @@ get_next_node: } if ((oldstate == XML_TEXTREADER_ELEMENT) && (reader->node->type == XML_ELEMENT_NODE) && - (wasempty == 0)) { + (reader->node->children == NULL) && + (reader->node->_private != (void *)xmlTextReaderIsEmpty)) { reader->state = XML_TEXTREADER_END; goto node_found; } @@ -1733,15 +1711,7 @@ xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader) { return(0); if (reader->state == XML_TEXTREADER_END) return(0); - if (reader->wasempty == -1) - return(0); - if (reader->node != reader->ctxt->node) - return(1); - if ((reader->ctxt->node != NULL) && - (reader->node == reader->ctxt->node->last) && - (reader->wasempty == 1)) - return(1); - return(0); + return(reader->node->_private == (void *)xmlTextReaderIsEmpty); } /**