diff --git a/ChangeLog b/ChangeLog
index 67cc881f..1524135a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Sun Jan 5 02:23:20 CET 2003 Daniel Veillard
+
+ * xmlreader.c: seriously changed the way data are pushed to
+ the underlying parser, go by block of 512 bytes instead of
+ tryng to detect tag boundaries at that level. Changed the
+ way empty element are detected and tagged.
+ * python/tests/reader.py python/tests/reader2.py
+ python/tests/reader3.py: small changes mostly due to context
+ reporting being different and DTD node being reported. Some
+ errors previously undetected are now caught and fixed.
+ * doc/xmlreader.html: flagged last section as TODO
+
Sat Jan 4 20:40:28 CET 2003 Daniel Veillard
* python/libxml.py: integrated the Python 2.2 optimizations
diff --git a/doc/xmlreader.html b/doc/xmlreader.html
index e818a77c..b44a3ea8 100644
--- a/doc/xmlreader.html
+++ b/doc/xmlreader.html
@@ -390,6 +390,8 @@ the validation feature is just:
+@@TODO@@
+
Daniel Veillard
diff --git a/python/tests/reader.py b/python/tests/reader.py
index 5fd61603..e9671718 100755
--- a/python/tests/reader.py
+++ b/python/tests/reader.py
@@ -14,56 +14,56 @@ input = libxml2.inputBuffer(f)
reader = input.newTextReader("test1")
ret = reader.Read()
if ret != 1:
- print "Error reading to first element"
+ print "test1: Error reading to first element"
sys.exit(1)
if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 1 or reader.HasAttributes() != 0:
- print "Error reading the first element"
+ print "test1: Error reading the first element"
sys.exit(1)
ret = reader.Read()
if ret != 1:
- print "Error reading to second element"
+ print "test1: Error reading to second element"
sys.exit(1)
if reader.Name() != "b" or reader.IsEmptyElement() != 1 or \
reader.NodeType() != 1 or reader.HasAttributes() != 1:
- print "Error reading the second element"
+ print "test1: Error reading the second element"
sys.exit(1)
ret = reader.Read()
if ret != 1:
- print "Error reading to third element"
+ print "test1: Error reading to third element"
sys.exit(1)
if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 1 or reader.HasAttributes() != 0:
- print "Error reading the third element"
+ print "test1: Error reading the third element"
sys.exit(1)
ret = reader.Read()
if ret != 1:
- print "Error reading to text node"
+ print "test1: Error reading to text node"
sys.exit(1)
if reader.Name() != "#text" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 3 or reader.HasAttributes() != 0 or \
reader.Value() != "content of c":
- print "Error reading the text node"
+ print "test1: Error reading the text node"
sys.exit(1)
ret = reader.Read()
if ret != 1:
- print "Error reading to end of third element"
+ print "test1: Error reading to end of third element"
sys.exit(1)
if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 15 or reader.HasAttributes() != 0:
- print "Error reading the end of third element"
+ print "test1: Error reading the end of third element"
sys.exit(1)
ret = reader.Read()
if ret != 1:
- print "Error reading to end of first element"
+ print "test1: Error reading to end of first element"
sys.exit(1)
if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 15 or reader.HasAttributes() != 0:
- print "Error reading the end of first element"
+ print "test1: Error reading the end of first element"
sys.exit(1)
ret = reader.Read()
if ret != 0:
- print "Error reading to end of document"
+ print "test1: Error reading to end of document"
sys.exit(1)
#
@@ -239,7 +239,7 @@ if reader.MoveToNextAttribute() != 0:
#
# a couple of tests for namespace nodes
#
-f = StringIO.StringIO("""""")
+f = StringIO.StringIO("""""")
input = libxml2.inputBuffer(f)
reader = input.newTextReader("test6")
ret = reader.Read()
@@ -256,7 +256,7 @@ if reader.NamespaceUri() != "http://www.w3.org/2000/xmlns/" or \
print "test6: failed to read the namespace node"
sys.exit(1)
-f = StringIO.StringIO("""""")
+f = StringIO.StringIO("""""")
input = libxml2.inputBuffer(f)
reader = input.newTextReader("test7")
ret = reader.Read()
diff --git a/python/tests/reader2.py b/python/tests/reader2.py
index 6333aee4..7519a901 100755
--- a/python/tests/reader2.py
+++ b/python/tests/reader2.py
@@ -14,13 +14,13 @@ libxml2.debugMemory(1)
err=""
expect="""../../test/valid/rss.xml:172: validity error: Element rss does not carry attribute version
- ^
+ ^
../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined
-
+ An arc is contained within an
^
-../../test/valid/xlink.xml:529: validity error: attribute def line 199 references an unknown ID "dt-xlg"
-
- ^
+../../test/valid/xlink.xml:530: validity error: attribute def line 199 references an unknown ID "dt-xlg"
+
+^
"""
def callback(ctx, str):
global err
@@ -61,7 +61,8 @@ s = """
bbb
"""
-expect="""1,test
+expect="""10,test
+1,test
3,#text
1,x
1,c
@@ -110,7 +111,8 @@ s = """
"""
tst_ent = """hello"""
-expect="""1 test
+expect="""10 test
+1 test
3 #text
1 x
3 #text
@@ -161,7 +163,8 @@ s = """"""
-expect="""1 test 0
+expect="""10 test 0
+1 test 0
3 #text 1
1 x 1
1 y 2
@@ -213,7 +216,8 @@ s = """"""
-expect="""1 test 0
+expect="""10 test 0
+1 test 0
3 #text 1
5 x 1
3 #text 1
diff --git a/python/tests/reader3.py b/python/tests/reader3.py
index 1affbe98..2a4fbe07 100755
--- a/python/tests/reader3.py
+++ b/python/tests/reader3.py
@@ -1,6 +1,6 @@
#!/usr/bin/python -u
#
-# this tests the validation with the XmlTextReader interface
+# this tests the entities substitutions with the XmlTextReader interface
#
import sys
import StringIO
@@ -22,6 +22,11 @@ f = StringIO.StringIO(docstr)
input = libxml2.inputBuffer(f)
reader = input.newTextReader("test_noent")
ret = reader.Read()
+if ret != 1:
+ print "Error reading to root"
+ sys.exit(1)
+if reader.Name() == "doc" or reader.NodeType() == 10:
+ ret = reader.Read()
if ret != 1:
print "Error reading to root"
sys.exit(1)
@@ -55,6 +60,11 @@ input = libxml2.inputBuffer(f)
reader = input.newTextReader("test_noent")
reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1)
ret = reader.Read()
+if ret != 1:
+ print "Error reading to root"
+ sys.exit(1)
+if reader.Name() == "doc" or reader.NodeType() == 10:
+ ret = reader.Read()
if ret != 1:
print "Error reading to root"
sys.exit(1)
diff --git a/xmlreader.c b/xmlreader.c
index 0577334b..316ff15a 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -15,6 +15,8 @@
* - provide an API to expand part of the tree
* - provide an API to preserve part of the tree
* - Streaming XInclude support
+ * - validation against a provided DTD
+ * - XML Schemas validation
* - setting(s) for NoBlanks
* - performances and tuning ...
*/
@@ -99,7 +101,6 @@ struct _xmlTextReader {
xmlNodePtr curnode;/* current attribute node */
int depth; /* depth of the current node */
xmlNodePtr faketext;/* fake xmlNs chld */
- int wasempty;/* was the last node empty */
/* entity stack when traversing entities content */
xmlNodePtr ent; /* Current Entity Ref Node */
@@ -108,6 +109,8 @@ struct _xmlTextReader {
xmlNodePtr *entTab; /* array of entities */
};
+static const char *xmlTextReaderIsEmpty = "This element is empty";
+
#ifdef DEBUG_READER
static void
xmlTextReaderDebug(xmlTextReaderPtr reader) {
@@ -222,13 +225,10 @@ xmlTextReaderStartElement(void *ctx, const xmlChar *fullname,
*/
origctxt = reader->ctxt;
reader->startElement(ctx, fullname, atts);
-#if 0
- 123
- if (origctxt->validate) {
- ctxt->valid &= xmlValidatePushElement(&origctxt->vctxt,
- ctxt->myDoc, ctxt->node, fullname);
- }
-#endif
+ if ((ctxt->node != NULL) && (ctxt->input != NULL) &&
+ (ctxt->input->cur != NULL) && (ctxt->input->cur[0] == '/') &&
+ (ctxt->input->cur[1] == '>'))
+ ctxt->node->_private = (void *) xmlTextReaderIsEmpty;
}
if (reader != NULL)
reader->state = XML_TEXTREADER_ELEMENT;
@@ -258,12 +258,6 @@ xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) {
reader->endElement(ctx, fullname);
}
- if (reader != NULL) {
- if (reader->state == XML_TEXTREADER_ELEMENT)
- reader->wasempty = 1;
- else
- reader->wasempty = 0;
- }
}
/**
@@ -290,13 +284,6 @@ xmlTextReaderCharacters(void *ctx, const xmlChar *ch, int len)
* when processing an entity, the context may have been changed
*/
origctxt = reader->ctxt;
-
-#if 0
- 123
- if (origctxt->validate) {
- ctxt->valid &= xmlValidatePushCData(&origctxt->vctxt, ch, len);
- }
-#endif
}
}
@@ -319,10 +306,6 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len)
#endif
if ((reader != NULL) && (reader->cdataBlock != NULL)) {
reader->cdataBlock(ctx, ch, len);
-
- if (ctxt->validate) {
- ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len);
- }
}
}
@@ -365,8 +348,12 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
} else
break;
}
- if (inbuf->content[cur] == '>') {
- cur = cur + 1;
+ /*
+ * parse by block of 512 bytes
+ */
+ if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) {
+ if (cur < inbuf->use)
+ cur = cur + 1;
val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur],
cur - reader->cur, 0);
@@ -412,7 +399,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
if (reader->mode == XML_TEXTREADER_MODE_EOF) {
if (reader->mode != XML_TEXTREADER_DONE) {
val = xmlParseChunk(reader->ctxt,
- (const char *) &inbuf->content[reader->cur], 0, 1);
+ (const char *) &inbuf->content[reader->cur],
+ cur - reader->cur, 1);
+ reader->cur = cur;
reader->mode = XML_TEXTREADER_DONE;
}
}
@@ -564,7 +553,7 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) {
*/
int
xmlTextReaderRead(xmlTextReaderPtr reader) {
- int val, olddepth = 0, wasempty = 0;
+ int val, olddepth = 0;
xmlTextReaderState oldstate = 0;
xmlNodePtr oldnode = NULL;
@@ -588,14 +577,11 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
if (val < 0)
return(-1);
} while ((reader->ctxt->node == NULL) &&
- (reader->mode != XML_TEXTREADER_MODE_EOF));
+ ((reader->mode != XML_TEXTREADER_MODE_EOF) &&
+ (reader->mode != XML_TEXTREADER_DONE)));
if (reader->ctxt->node == NULL) {
if (reader->ctxt->myDoc != NULL) {
reader->node = reader->ctxt->myDoc->children;
- if ((reader->ctxt->input != NULL) &&
- (reader->ctxt->input->cur != NULL) &&
- (reader->ctxt->input->cur[-2] != '/'))
- reader->wasempty = -1;
}
if (reader->node == NULL)
return(-1);
@@ -610,16 +596,6 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
oldstate = reader->state;
olddepth = reader->ctxt->nodeNr;
oldnode = reader->node;
- /*
- * the vs. distinction at the API level royally sucks,
- * Microsoft priviledge ...
- */
- if (reader->wasempty == -1)
- wasempty = 0;
- else
- wasempty = (((reader->wasempty == 1) && (reader->ctxt->node != NULL) &&
- (reader->ctxt->node->last == reader->node)) ||
- (reader->node != reader->ctxt->node));
get_next_node:
/*
@@ -653,7 +629,8 @@ get_next_node:
if (reader->node->next != NULL) {
if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->node->type == XML_ELEMENT_NODE) &&
- (wasempty == 0)) {
+ (reader->node->children == NULL) &&
+ (reader->node->_private != (void *)xmlTextReaderIsEmpty)) {
reader->state = XML_TEXTREADER_END;
goto node_found;
}
@@ -675,7 +652,8 @@ get_next_node:
}
if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->node->type == XML_ELEMENT_NODE) &&
- (wasempty == 0)) {
+ (reader->node->children == NULL) &&
+ (reader->node->_private != (void *)xmlTextReaderIsEmpty)) {
reader->state = XML_TEXTREADER_END;
goto node_found;
}
@@ -1733,15 +1711,7 @@ xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader) {
return(0);
if (reader->state == XML_TEXTREADER_END)
return(0);
- if (reader->wasempty == -1)
- return(0);
- if (reader->node != reader->ctxt->node)
- return(1);
- if ((reader->ctxt->node != NULL) &&
- (reader->node == reader->ctxt->node->last) &&
- (reader->wasempty == 1))
- return(1);
- return(0);
+ return(reader->node->_private == (void *)xmlTextReaderIsEmpty);
}
/**