Attribute nomarlization closing bug #3597

Small fixes in encoding.c First bits of real progressive parsing, Daniel
2024-10-26 12:25:09 +03:00 · 1999-11-17 17:32:38 +00:00 · 1999-11-17 17:32:38 +00:00 · 7f8585025f
commit 7f8585025f
parent d7e200c0b0
15 changed files with 303 additions and 94 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
+Wed Nov 17 18:28:06 CET 1999
+
+	* encoding.c: bug fix and typos
+	* xmlIO.[ch] parser.c: first bits toward real progressive parsing
+	* parser.c: added attribute normalization closing bug #3597
+	* test/att* result/att* SAXresult/att*: testcase for attribute
+	    normalization
+
 Mon Nov 15 18:50:56 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>

 	* configure.in: closing bug #3163 by adding extra flags for the
--- a/SAXresult/att1
+++ b/SAXresult/att1
@ -0,0 +1,5 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(doc, attr='to normalize with a space')
+SAX.endElement(doc)
+SAX.endDocument()
--- a/SAXresult/att2
+++ b/SAXresult/att2
@ -0,0 +1,5 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(doc, attr='to normalize with a space')
+SAX.endElement(doc)
+SAX.endDocument()
--- a/encoding.c
+++ b/encoding.c
@ -51,10 +51,10 @@

 /**
 * isolat1ToUTF8:
- * @out:  a pointer ot an array of bytes to store the result
- * @outlen:  the lenght of @out
- * @in:  a pointer ot an array of ISO Latin 1 chars
- * @inlen:  the lenght of @in
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of ISO Latin 1 chars
+ * @inlen:  the length of @in
 *
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 * block of chars out.
@ -86,10 +86,10 @@ isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)

 /**
 * UTF8Toisolat1:
- * @out:  a pointer ot an array of bytes to store the result
- * @outlen:  the lenght of @out
- * @in:  a pointer ot an array of UTF-8 chars
- * @inlen:  the lenght of @in
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of UTF-8 chars
+ * @inlen:  the length of @in
 *
 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 * block of chars out.
@ -123,10 +123,10 @@ UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)

 /**
 * UTF16ToUTF8:
- * @out:  a pointer ot an array of bytes to store the result
- * @outlen:  the lenght of @out
- * @in:  a pointer ot an array of UTF-16 chars (array of unsigned shorts)
- * @inlen:  the lenght of @in
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of UTF-16 chars (array of unsigned shorts)
+ * @inlen:  the length of @in
 *
 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 * block of chars out.
@ -161,7 +161,7 @@ UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
        else if (c < 0x10000) {  *out++= (c >> 12) | 0xE0;  bits=  6; }
        else                  {  *out++= (c >> 18) | 0xF0;  bits= 12; }
 
-        for ( ; bits < 0; bits-= 6) {
+        for ( ; bits > 0; bits-= 6) {
            if (out >= outend)  return -1;
            *out++= (c >> bits) & 0x3F;
        }
@ -171,10 +171,10 @@ UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)

 /**
 * UTF8ToUTF16:
- * @out:  a pointer ot an array of shorts to store the result
- * @outlen:  the lenght of @out (number of shorts)
- * @in:  a pointer ot an array of UTF-8 chars
- * @inlen:  the lenght of @in
+ * @out:  a pointer to an array of shorts to store the result
+ * @outlen:  the length of @out (number of shorts)
+ * @in:  a pointer to an array of UTF-8 chars
+ * @inlen:  the length of @in
 *
 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 * block of chars out.
@ -264,7 +264,7 @@ xmlDetectCharEncoding(const unsigned char* in)

 /**
 * xmlParseCharEncoding:
- * @name:  the encoding name as parsed, in UTF-8 format (ASCCI actually)
+ * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
 *
 * Conpare the string to the known encoding schemes already known. Note
 * that the comparison is case insensitive accordingly to the section
@ -351,7 +351,7 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;

 /**
 * xmlNewCharEncodingHandler:
- * @name:  the encoding name, in UTF-8 format (ASCCI actually)
+ * @name:  the encoding name, in UTF-8 format (ASCII actually)
 * @input:  the xmlCharEncodingInputFunc to read that encoding
 * @output:  the xmlCharEncodingOutputFunc to write that encoding
 *
@ -409,7 +409,7 @@ xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
 *
 * Initialize the char encoding support, it registers the default
 * encoding supported.
- * NOTE: while public theis function usually don't need to be called
+ * NOTE: while public, this function usually doesn't need to be called
 *       in normal processing.
 */
 void
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@ -50,6 +50,9 @@ int	xmlParserInputBufferRead		(xmlParserInputBufferPtr in,
 						 int len);
 int	xmlParserInputBufferGrow		(xmlParserInputBufferPtr in,
 						 int len);
+int	xmlParserInputBufferPush		(xmlParserInputBufferPtr in,
+						 int len,
+						 char *buf);
 void	xmlFreeParserInputBuffer		(xmlParserInputBufferPtr in);
 char *	xmlParserGetDirectory			(const char *filename);

--- a/parser.c
+++ b/parser.c
@ -2497,58 +2497,146 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
 *                   "'" ([^<&'] | Reference)* "'"
 *
- * Returns the AttValue parsed or NULL.
+ * 3.3.3 Attribute-Value Normalization:
+ * Before the value of an attribute is passed to the application or
+ * checked for validity, the XML processor must normalize it as follows: 
+ * - a character reference is processed by appending the referenced
+ *   character to the attribute value
+ * - an entity reference is processed by recursively processing the
+ *   replacement text of the entity 
+ * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
+ *   appending #x20 to the normalized value, except that only a single
+ *   #x20 is appended for a "#xD#xA" sequence that is part of an external
+ *   parsed entity or the literal entity value of an internal parsed entity 
+ * - other characters are processed by appending them to the normalized value 
+ *
+ * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
 */

 xmlChar *
 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
-    xmlChar *ret = NULL;
+    xmlChar limit = 0;
+    xmlChar *buffer = NULL;
+    int buffer_size = 0;
+    xmlChar *out = NULL;
+
+    xmlChar *current = NULL;
+    xmlEntityPtr ent;
+    xmlChar cur;
+    int blank = 0;
+

    SHRINK;
    if (CUR == '"') {
 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
+	limit = '"';
        NEXT;
-	ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
-	if (CUR == '<') {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData,
-		   "Unescaped '<' not allowed in attributes values\n");
-	    ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
-	    ctxt->wellFormed = 0;
-	}
-        if (CUR != '"') {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
-	    ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
-	    ctxt->wellFormed = 0;
-	} else
-	    NEXT;
    } else if (CUR == '\'') {
+	limit = '\'';
 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
        NEXT;
-	ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
-	if (CUR == '<') {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData,
-		   "Unescaped '<' not allowed in attributes values\n");
-	    ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
-	    ctxt->wellFormed = 0;
-	}
-        if (CUR != '\'') {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
-	    ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
-	    ctxt->wellFormed = 0;
-	} else
-	    NEXT;
    } else {
 	ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
 	ctxt->wellFormed = 0;
+	return(NULL);
    }
    
-    return(ret);
+    /*
+     * allocate a translation buffer.
+     */
+    buffer_size = 100;
+    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
+    if (buffer == NULL) {
+	perror("xmlParseAttValue: malloc failed");
+	return(NULL);
+    }
+    out = buffer;
+
+    /*
+     * Ok loop until we reach one of the ending char or a size limit.
+     */
+    cur = CUR;
+    while ((cur != limit) && (cur != '<')) {
+
+	if (cur == 0) break;
+        if ((cur == '&') && (NXT(1) == '#')) {
+	    int val = xmlParseCharRef(ctxt);
+	    *out++ = val;
+	    blank = 0;
+	} else if (cur == '&') {
+	    ent = xmlParseEntityRef(ctxt);
+	    if ((ent != NULL) && 
+		(ctxt->replaceEntities != 0)) {
+		current = ent->content;
+		while (*current != 0) {
+		    *out++ = *current++;
+		    if (out - buffer > buffer_size - 10) {
+			int index = out - buffer;
+
+			growBuffer(buffer);
+			out = &buffer[index];
+		    }
+		}
+	    } else if (ent != NULL) {
+		int i = xmlStrlen(ent->name);
+		const xmlChar *cur = ent->name;
+
+		*out++ = '&';
+		if (out - buffer > buffer_size - i - 10) {
+		    int index = out - buffer;
+
+		    growBuffer(buffer);
+		    out = &buffer[index];
+		}
+		for (;i > 0;i--)
+		    *out++ = *cur++;
+		*out++ = ';';
+	    }
+	    blank = 0;
+	} else {
+	    /*  invalid for UTF-8 , use COPY(out); !!!!!! */
+	    if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
+	        if (!blank) {
+		    *out++ = 0x20;
+		    if (out - buffer > buffer_size - 10) {
+		      int index = out - buffer;
+		      
+		      growBuffer(buffer);
+		      out = &buffer[index];
+		    }
+		}
+		blank = 1;
+	    } else {
+		*out++ = cur;
+		if (out - buffer > buffer_size - 10) {
+		  int index = out - buffer;
+		  
+		  growBuffer(buffer);
+		  out = &buffer[index];
+		}
+		blank = 0;
+	    }
+	    NEXT;
+	}
+	cur = CUR;
+    }
+    *out++ = 0;
+    if (CUR == '<') {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt->userData,
+	       "Unescaped '<' not allowed in attributes values\n");
+	ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
+	ctxt->wellFormed = 0;
+    } else if (CUR != limit) {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
+	ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
+	ctxt->wellFormed = 0;
+    } else
+	NEXT;
+    return(buffer);
 }

 /**
@ -4962,7 +5050,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
 *
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
 *
- * Returns the element name parsed
+ * Returne the element name parsed
 */

 xmlChar *
@ -5986,6 +6074,80 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
    return(0);
 }

+/************************************************************************
+ *									*
+ * 		Progressive parsing interfaces				*
+ *									*
+ ************************************************************************/
+
+/**
+ * xmlParseLookupSequence:
+ * @ctxt:  an XML parser context
+ * @first:  the first char to lookup
+ * @next:  the next char to lookup
+ *
+ * Try to find if a sequence (first, next) or  just (first) if next
+ * is zero is available in the input stream.
+ * Since XML-1.0 is an LALR(2) grammar a sequence of 2 char should be
+ * enought. If this doesn't prove true this function call may change.
+ *
+ * Returns 1 if the full sequence is available, 0 otherwise.
+ */
+int
+xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlChar next) {
+    return(0);
+}
+
+/**
+ * xmlParseTry:
+ * @ctxt:  an XML parser context
+ *
+ * Try to progress on parsing
+ *
+ * Returns zero if no parsing was possible
+ */
+int
+xmlParseTry(xmlParserCtxtPtr ctxt) {
+    int ret = 0;
+
+    while (1) {
+        switch (ctxt->instate) {
+            case XML_PARSER_EOF:
+	        return(0);
+            case XML_PARSER_PROLOG:
+            case XML_PARSER_CONTENT:
+            case XML_PARSER_ENTITY_DECL:
+            case XML_PARSER_ENTITY_VALUE:
+            case XML_PARSER_ATTRIBUTE_VALUE:
+            case XML_PARSER_DTD:
+            case XML_PARSER_EPILOG:
+            case XML_PARSER_COMMENT:
+            case XML_PARSER_CDATA_SECTION:
+	}
+    }
+    return(ret);
+}
+
+/**
+ * xmlParseChunk:
+ * @ctxt:  an XML parser context
+ * @chunk:  an char array
+ * @size:  the size in byte of the chunk
+ * @terminate:  last chunk indicator
+ *
+ * Parse a Chunk of memory
+ *
+ * Returns zero if no error, the xmlParserErrors otherwise.
+ */
+xmlParserErrors
+xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
+              int terminate) {
+    if ((size > 0) && (chunk != NULL)) {	      
+	xmlParserInputBufferPush(ctxt->input, size, chunk);	      
+    }
+    return((xmlParserErrors) ctxt->errNo);	      
+}
+
 /************************************************************************
 *									*
 * 		I/O front end functions to the parser			*
--- a/result/SVG/flower2.xml
+++ b/result/SVG/flower2.xml
@ -2,42 +2,9 @@
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
 <svg width="800px" height="800px">
  <desc>This sample SVG file draws a flower</desc>
-  <g style="transform: matrix(1 0 0 -1 -25.88 798.60);
-     stroke: #000; stroke-width: 1">
-    <path style="fill: #1A5466" d="m 242.33 684.19
-          C 346.44 757.48 271.45 647.38 213.17 641.37
-          C 271.45 647.38 383.43 575.21 256.71 613.30
-          C 383.43 575.21 251.04 565.90 205.23 609.68
-          C 251.04 565.90 265.13 432.88 210.71 557.95
-          C 265.13 432.88 175.04 531.37 175.67 596.26
-          C 175.04 531.37 80.63  437.67 138.96 559.82
-          C  80.63 437.67 100.67 569.80 146.75 611.20
-          C 100.67 569.80 -31.14 585.98 95.49  617.49
-          C -31.14 585.98 83.94  652.25 140.24 643.26
-          C 83.94  652.25 13.98  766.12 113.04 687.55
-          C 13.98  766.12 137.45 716.63 161.05 668.30
-          C 137.45 716.63 182.02 842.45 178.39 717.23
-          C 182.02 842.45 220.90 714.46 193.51 667.46
-          C 220.90 714.46 346.44 757.48 242.33 684.19 z"/>
-    <path style="fill: #34AACD" d="M 235.33 691.19
-          C 339.44 764.48 264.45 654.38 206.17 648.37
-          C 264.45 654.38 376.43 582.21 249.71 620.30
-          C 376.43 582.21 244.04 572.90 198.23 616.68
-          C 244.04 572.90 258.13 439.88 203.71 564.95
-          C 258.13 439.88 168.04 538.37 168.67 603.26
-          C 168.04 538.37 73.63  444.67 131.96 566.82
-          C 73.63  444.67 93.67  576.80 139.75 618.20
-          C 93.67  576.80 -38.14 592.98  88.49 624.49
-          C -38.14 592.98 76.94  659.25 133.24 650.26
-          C 76.94  659.25 6.98   773.12 106.04 694.55
-          C 6.98   773.12 130.45 723.63 154.05 675.30
-          C 130.45 723.63 175.02 849.45 171.39 724.23
-          C 175.02 849.45 213.90 721.46 186.51 674.46
-          C 213.90 721.46 339.44 764.48 235.33 691.19 z"/>
-    <path style="fill: #F881BF" d="M 199.44 634.43
-          C 199.44 622.16 189.19 612.21 176.54 612.21
-          C 163.89 612.21 153.63 622.16 153.63 634.43
-          C 153.63 646.71 163.89 656.66 176.54 656.66
-          C 189.19 656.66 199.44 646.71 199.44 634.43 z"/>
+  <g style="transform: matrix(1 0 0 -1 -25.88 798.60); stroke: #000; stroke-width: 1">
+    <path style="fill: #1A5466" d="m 242.33 684.19 C 346.44 757.48 271.45 647.38 213.17 641.37 C 271.45 647.38 383.43 575.21 256.71 613.30 C 383.43 575.21 251.04 565.90 205.23 609.68 C 251.04 565.90 265.13 432.88 210.71 557.95 C 265.13 432.88 175.04 531.37 175.67 596.26 C 175.04 531.37 80.63 437.67 138.96 559.82 C 80.63 437.67 100.67 569.80 146.75 611.20 C 100.67 569.80 -31.14 585.98 95.49 617.49 C -31.14 585.98 83.94 652.25 140.24 643.26 C 83.94 652.25 13.98 766.12 113.04 687.55 C 13.98 766.12 137.45 716.63 161.05 668.30 C 137.45 716.63 182.02 842.45 178.39 717.23 C 182.02 842.45 220.90 714.46 193.51 667.46 C 220.90 714.46 346.44 757.48 242.33 684.19 z"/>
+    <path style="fill: #34AACD" d="M 235.33 691.19 C 339.44 764.48 264.45 654.38 206.17 648.37 C 264.45 654.38 376.43 582.21 249.71 620.30 C 376.43 582.21 244.04 572.90 198.23 616.68 C 244.04 572.90 258.13 439.88 203.71 564.95 C 258.13 439.88 168.04 538.37 168.67 603.26 C 168.04 538.37 73.63 444.67 131.96 566.82 C 73.63 444.67 93.67 576.80 139.75 618.20 C 93.67 576.80 -38.14 592.98 88.49 624.49 C -38.14 592.98 76.94 659.25 133.24 650.26 C 76.94 659.25 6.98 773.12 106.04 694.55 C 6.98 773.12 130.45 723.63 154.05 675.30 C 130.45 723.63 175.02 849.45 171.39 724.23 C 175.02 849.45 213.90 721.46 186.51 674.46 C 213.90 721.46 339.44 764.48 235.33 691.19 z"/>
+    <path style="fill: #F881BF" d="M 199.44 634.43 C 199.44 622.16 189.19 612.21 176.54 612.21 C 163.89 612.21 153.63 622.16 153.63 634.43 C 153.63 646.71 163.89 656.66 176.54 656.66 C 189.19 656.66 199.44 646.71 199.44 634.43 z"/>
  </g>
 </svg>
--- a/result/SVG/toap02.xml
+++ b/result/SVG/toap02.xml
@ -3,7 +3,7 @@
 <svg width="4in" height="3in">
  <defs>
    <symbol id="Triangle1" min-x="0" min-y="0" max-x="300" max-y="200">
-      <path d="M 50 0 L  50 200 L 250 0 z"/>
+      <path d="M 50 0 L 50 200 L 250 0 z"/>
    </symbol>
    <symbol id="Triangle2" min-x="0" min-y="0" max-x="300" max-y="200">
      <path d="M 50 0 L 250 200 L 250 0 z"/>
--- a/result/att1
+++ b/result/att1
@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<doc attr="to normalize with a space"/>
--- a/result/att2
+++ b/result/att2
@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<doc attr="to normalize with a space"/>
--- a/result/valid/REC-xml-19980210.xml
+++ b/result/valid/REC-xml-19980210.xml
@ -1674,8 +1674,7 @@ match <termref def="NT-Nmtokens">Nmtokens</termref>.
 <p>The XML processor must normalize attribute values before
 passing them to the application, as described in 
 <specref ref="AVNormalize"/>.</p>-->
-          <p><termdef id="dt-enumerated" term="Enumerated Attribute
-Values"><term>Enumerated attributes</term> can take one 
+          <p><termdef id="dt-enumerated" term="Enumerated Attribute Values"><term>Enumerated attributes</term> can take one 
 of a list of values provided in the declaration</termdef>. There are two
 kinds of enumerated types:
 <scrap lang="ebnf"><head>Enumerated Attribute Types</head><prod id="NT-EnumeratedType"><lhs>EnumeratedType</lhs><rhs><nt def="NT-NotationType">NotationType</nt> 
--- a/test/att1
+++ b/test/att1
@ -0,0 +1,2 @@
+<doc attr="to normalize
+with a    space"/>
--- a/test/att2
+++ b/test/att2
@ -0,0 +1 @@
+<doc attr="to normalize 
 with a space"/>
--- a/xmlIO.c
+++ b/xmlIO.c
@ -249,6 +249,55 @@ xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
    return(ret);
 }

+/**
+ * xmlParserInputBufferPush:
+ * @in:  a buffered parser input
+ * @buf:  an char array
+ * @len:  the size in bytes of the array.
+ *
+ * Push the content of the arry in the input buffer
+ * This routine handle the I18N transcoding to internal UTF-8
+ * This is used when operating the parser in progressive (push) mode.
+ *
+ * Returns the number of chars read and stored in the buffer, or -1
+ *         in case of error.
+ */
+int
+xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, char *buf) {
+    char *buffer = NULL;
+    int nbchars = 0;
+
+    if (len < 0) return(0);
+    if (in->encoder != NULL) {
+        xmlChar *buf;
+
+	buf = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
+	if (buf == NULL) {
+	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
+	    xmlFree(buffer);
+	    return(-1);
+	}
+	nbchars = in->encoder->input(buf, (len + 1) * 2 * sizeof(xmlChar),
+	                             BAD_CAST buffer, len);
+	/*
+	 * TODO : we really need to have something atomic or the 
+	 *        encoder must report the number of bytes read
+	 */
+        buf[nbchars] = 0;
+        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
+	xmlFree(buf);
+    } else {
+	nbchars = len;
+        buffer[nbchars] = 0;
+        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
+    }
+#ifdef DEBUG_INPUT
+    fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
+            nbchars, in->buffer->use, in->buffer->size);
+#endif
+    return(nbchars);
+}
+
 /**
 * xmlParserInputBufferGrow:
 * @in:  a buffered parser input
@ -256,6 +305,7 @@ xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
 *
 * Grow up the content of the input buffer, the old data are preserved
 * This routine handle the I18N transcoding to internal UTF-8
+ * This routine is used when operating the parser in normal (pull) mode
 * TODO: one should be able to remove one extra copy
 *
 * Returns the number of chars read and stored in the buffer, or -1
--- a/xmlIO.h
+++ b/xmlIO.h
@ -50,6 +50,9 @@ int	xmlParserInputBufferRead		(xmlParserInputBufferPtr in,
 						 int len);
 int	xmlParserInputBufferGrow		(xmlParserInputBufferPtr in,
 						 int len);
+int	xmlParserInputBufferPush		(xmlParserInputBufferPtr in,
+						 int len,
+						 char *buf);
 void	xmlFreeParserInputBuffer		(xmlParserInputBufferPtr in);
 char *	xmlParserGetDirectory			(const char *filename);