1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

Convert XML parser to the new input buffers

The main changes are when the internal of the buffers structure
were adressed directly, we now use routines coming from buf.h
The routine xmlParserInputRead() which wasn't used anywhere is
deprecated too.
This commit is contained in:
Daniel Veillard 2012-07-16 14:19:49 +08:00
parent 65c7d3b2e6
commit 768eb3b82d
2 changed files with 108 additions and 154 deletions

111
parser.c
View File

@ -84,6 +84,9 @@
#include <lzma.h>
#endif
#include "buf.h"
#include "enc.h"
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
@ -10560,8 +10563,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
buf = in->base;
len = in->length;
} else {
buf = in->buf->buffer->content;
len = in->buf->buffer->use;
buf = xmlBufContent(in->buf->buffer);
len = xmlBufUse(in->buf->buffer);
}
/* take into account the sequence length */
if (third) len -= 2;
@ -10831,20 +10834,18 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
* remainng chars to avoid them stalling in the non-converted
* buffer.
*/
if ((ctxt->input->buf->raw != NULL) &&
(ctxt->input->buf->raw->use > 0)) {
int base = ctxt->input->base -
ctxt->input->buf->buffer->content;
int current = ctxt->input->cur - ctxt->input->base;
if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
size_t base = ctxt->input->base -
xmlBufContent(ctxt->input->buf->buffer);
size_t current = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
ctxt->input->base = ctxt->input->buf->buffer->content + base;
ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) +
base;
ctxt->input->cur = ctxt->input->base + current;
ctxt->input->end =
&ctxt->input->buf->buffer->content[
ctxt->input->buf->buffer->use];
ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
}
avail = ctxt->input->buf->buffer->use -
avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
}
if (avail < 1)
@ -11282,7 +11283,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else
avail = ctxt->input->buf->buffer->use -
avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
@ -11370,7 +11371,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
@ -11414,7 +11416,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
@ -11469,29 +11472,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int base, i;
xmlChar *buf;
xmlChar quote = 0;
size_t use;
base = ctxt->input->cur - ctxt->input->base;
if (base < 0) return(0);
if (ctxt->checkIndex > base)
base = ctxt->checkIndex;
buf = ctxt->input->buf->buffer->content;
for (;(unsigned int) base < ctxt->input->buf->buffer->use;
base++) {
buf = xmlBufContent(ctxt->input->buf->buffer);
use = xmlBufUse(ctxt->input->buf->buffer);
for (;(unsigned int) base < use; base++) {
if (quote != 0) {
if (buf[base] == quote)
quote = 0;
continue;
continue;
}
if ((quote == 0) && (buf[base] == '<')) {
int found = 0;
/* special handling of comments */
if (((unsigned int) base + 4 <
ctxt->input->buf->buffer->use) &&
if (((unsigned int) base + 4 < use) &&
(buf[base + 1] == '!') &&
(buf[base + 2] == '-') &&
(buf[base + 3] == '-')) {
for (;(unsigned int) base + 3 <
ctxt->input->buf->buffer->use; base++) {
for (;(unsigned int) base + 3 < use; base++) {
if ((buf[base] == '-') &&
(buf[base + 1] == '-') &&
(buf[base + 2] == '>')) {
@ -11522,17 +11524,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "%c%c%c%c: ", buf[base],
buf[base + 1], buf[base + 2], buf[base + 3]);
#endif
if ((unsigned int) base +1 >=
ctxt->input->buf->buffer->use)
if ((unsigned int) base +1 >= use)
break;
if (buf[base + 1] == ']') {
/* conditional crap, skip both ']' ! */
base++;
continue;
}
for (i = 1;
(unsigned int) base + i < ctxt->input->buf->buffer->use;
i++) {
for (i = 1; (unsigned int) base + i < use; i++) {
if (buf[base + i] == '>') {
#if 0
fprintf(stderr, "found\n");
@ -11550,7 +11549,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "end of stream\n");
#endif
break;
}
not_end_of_int_subset:
continue; /* for */
@ -11708,7 +11707,7 @@ xmldecl_done:
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
int cur = ctxt->input->cur - ctxt->input->base;
int res;
@ -11753,10 +11752,9 @@ xmldecl_done:
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
ctxt->input->base = ctxt->input->buf->buffer->content + base;
ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
ctxt->input->cur = ctxt->input->base + cur;
ctxt->input->end =
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@ -11768,7 +11766,7 @@ xmldecl_done:
(in->raw != NULL)) {
int nbchars;
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
nbchars = xmlCharEncInput(in);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,
@ -11806,14 +11804,14 @@ xmldecl_done:
avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else
avail = ctxt->input->buf->buffer->use -
avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
}
if ((ctxt->instate != XML_PARSER_EOF) &&
(ctxt->instate != XML_PARSER_EPILOG)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
}
}
if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
}
@ -11823,7 +11821,7 @@ xmldecl_done:
}
ctxt->instate = XML_PARSER_EOF;
}
return((xmlParserErrors) ctxt->errNo);
return((xmlParserErrors) ctxt->errNo);
}
/************************************************************************
@ -11927,10 +11925,9 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
}
}
inputStream->buf = buf;
inputStream->base = inputStream->buf->buffer->content;
inputStream->cur = inputStream->buf->buffer->content;
inputStream->end =
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
inputStream->cur =
inputStream->base = xmlBufContent(inputStream->buf->buffer);
inputStream->end = xmlBufEnd(inputStream->buf->buffer);
inputPush(ctxt, inputStream);
@ -11942,15 +11939,14 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
if ((size == 0) || (chunk == NULL)) {
ctxt->charset = XML_CHAR_ENCODING_NONE;
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
int cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
ctxt->input->base = ctxt->input->buf->buffer->content + base;
ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
ctxt->input->cur = ctxt->input->base + cur;
ctxt->input->end =
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@ -13874,9 +13870,9 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) {
input->filename = NULL;
input->buf = buf;
input->base = input->buf->buffer->content;
input->cur = input->buf->buffer->content;
input->end = &input->buf->buffer->content[input->buf->buffer->use];
input->cur =
input->base = xmlBufContent(input->buf->buffer);
input->end = xmlBufEnd(input->buf->buffer);
inputPush(ctxt, input);
return(ctxt);
@ -14446,25 +14442,22 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
inputStream->base = inputStream->buf->buffer->content;
inputStream->cur = inputStream->buf->buffer->content;
inputStream->end =
&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
inputStream->cur =
inputStream->base = xmlBufContent(buf->buffer);
inputStream->end = xmlBufEnd(buf->buffer);
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
int cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
ctxt->input->base = ctxt->input->buf->buffer->content + base;
ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
ctxt->input->cur = ctxt->input->base + cur;
ctxt->input->end =
&ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
use];
ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif

View File

@ -55,6 +55,9 @@
#include <libxml/globals.h>
#include <libxml/chvalid.h>
#include "buf.h"
#include "enc.h"
/*
* Various global defaults for parsing
*/
@ -225,7 +228,7 @@ xmlIsLetter(int c) {
/************************************************************************
* *
* Input handling functions for progressive parsing *
* Input handling functions for progressive parsing *
* *
************************************************************************/
@ -242,7 +245,7 @@ xmlIsLetter(int c) {
static
void check_buffer(xmlParserInputPtr in) {
if (in->base != in->buf->buffer->content) {
if (in->base != xmlBufContent(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: base mismatch problem\n");
}
@ -250,17 +253,17 @@ void check_buffer(xmlParserInputPtr in) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur < base problem\n");
}
if (in->cur > in->base + in->buf->buffer->use) {
if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur > base + use problem\n");
}
xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
(int) in, (int) in->buf->buffer->content, in->cur - in->base,
in->buf->buffer->use, in->buf->buffer->size);
xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
(int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
xmlBufUse(in->buf->buffer));
}
#else
#define CHECK_BUFFER(in)
#define CHECK_BUFFER(in)
#endif
@ -269,50 +272,13 @@ void check_buffer(xmlParserInputPtr in) {
* @in: an XML parser input
* @len: an indicative size for the lookahead
*
* This function refresh the input for the parser. It doesn't try to
* preserve pointers to the input buffer, and discard already read data
* This function was internal and is deprecated.
*
* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
* end of this entity
* Returns -1 as this is an error to use it.
*/
int
xmlParserInputRead(xmlParserInputPtr in, int len) {
int ret;
int used;
int indx;
if (in == NULL) return(-1);
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Read\n");
#endif
if (in->buf == NULL) return(-1);
if (in->base == NULL) return(-1);
if (in->cur == NULL) return(-1);
if (in->buf->buffer == NULL) return(-1);
if (in->buf->readcallback == NULL) return(-1);
CHECK_BUFFER(in);
used = in->cur - in->buf->buffer->content;
ret = xmlBufferShrink(in->buf->buffer, used);
if (ret > 0) {
in->cur -= ret;
in->consumed += ret;
}
ret = xmlParserInputBufferRead(in->buf, len);
if (in->base != in->buf->buffer->content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
in->base = in->buf->buffer->content;
in->cur = &in->buf->buffer->content[indx];
}
in->end = &in->buf->buffer->content[in->buf->buffer->use];
CHECK_BUFFER(in);
return(ret);
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
return(-1);
}
/**
@ -323,15 +289,16 @@ xmlParserInputRead(xmlParserInputPtr in, int len) {
* This function increase the input for the parser. It tries to
* preserve pointers to the input buffer, and keep already read data
*
* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
* Returns the amount of char read, or -1 in case of error, 0 indicate the
* end of this entity
*/
int
xmlParserInputGrow(xmlParserInputPtr in, int len) {
int ret;
int indx;
size_t ret;
size_t indx;
const xmlChar *content;
if (in == NULL) return(-1);
if ((in == NULL) || (len < 0)) return(-1);
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Grow\n");
#endif
@ -343,15 +310,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
CHECK_BUFFER(in);
indx = in->cur - in->base;
if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
CHECK_BUFFER(in);
return(0);
}
if (in->buf->readcallback != NULL)
if (in->buf->readcallback != NULL) {
ret = xmlParserInputBufferGrow(in->buf, len);
else
} else
return(0);
/*
@ -360,15 +327,17 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
* pointer arithmetic. Insure will raise it as a bug but in
* that specific case, that's not !
*/
if (in->base != in->buf->buffer->content) {
content = xmlBufContent(in->buf->buffer);
if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
in->base = in->buf->buffer->content;
in->cur = &in->buf->buffer->content[indx];
in->base = content;
in->cur = &content[indx];
}
in->end = &in->buf->buffer->content[in->buf->buffer->use];
in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
@ -383,9 +352,10 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
*/
void
xmlParserInputShrink(xmlParserInputPtr in) {
int used;
int ret;
int indx;
size_t used;
size_t ret;
size_t indx;
const xmlChar *content;
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@ -398,35 +368,36 @@ xmlParserInputShrink(xmlParserInputPtr in) {
CHECK_BUFFER(in);
used = in->cur - in->buf->buffer->content;
used = in->cur - xmlBufContent(in->buf->buffer);
/*
* Do not shrink on large buffers whose only a tiny fraction
* was consumed
*/
if (used > INPUT_CHUNK) {
ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) {
in->cur -= ret;
in->consumed += ret;
}
in->end = &in->buf->buffer->content[in->buf->buffer->use];
in->end = xmlBufEnd(in->buf->buffer);
}
CHECK_BUFFER(in);
if (in->buf->buffer->use > INPUT_CHUNK) {
if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
return;
}
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
if (in->base != in->buf->buffer->content) {
content = xmlBufContent(in->buf->buffer);
if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
in->base = in->buf->buffer->content;
in->cur = &in->buf->buffer->content[indx];
in->base = content;
in->cur = &content[indx];
}
in->end = &in->buf->buffer->content[in->buf->buffer->use];
in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
}
@ -945,11 +916,6 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
* *
************************************************************************/
/* defined in encoding.c, not public */
int
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in, int len);
static int
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler, int len);
@ -1189,7 +1155,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
/*
* Is there already some content down the pipe to convert ?
*/
if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
if (xmlBufIsEmpty(input->buf->buffer) == 0) {
int processed;
unsigned int use;
@ -1225,19 +1191,17 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
xmlBufferShrink(input->buf->buffer, processed);
xmlBufShrink(input->buf->buffer, processed);
input->buf->raw = input->buf->buffer;
input->buf->buffer = xmlBufferCreate();
input->buf->buffer = xmlBufCreate();
input->buf->rawconsumed = processed;
use = input->buf->raw->use;
use = xmlBufUse(input->buf->raw);
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInFunc(input->buf->encoder,
input->buf->buffer,
input->buf->raw);
nbchars = xmlCharEncInput(input->buf);
} else {
/*
* convert just enough to get
@ -1245,10 +1209,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
input->buf->buffer,
input->buf->raw,
len);
nbchars = xmlCharEncFirstLineInput(input->buf, len);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
@ -1256,9 +1217,9 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
NULL);
return (-1);
}
input->buf->rawconsumed += use - input->buf->raw->use;
input->base = input->cur = input->buf->buffer->content;
input->end = &input->base[input->buf->buffer->use];
input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
input->base = input->cur = xmlBufContent(input->buf->buffer);
input->end = xmlBufEnd(input->buf->buffer);
}
return (0);
@ -1426,9 +1387,9 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
}
inputStream->filename = NULL;
inputStream->buf = input;
inputStream->base = inputStream->buf->buffer->content;
inputStream->cur = inputStream->buf->buffer->content;
inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
inputStream->cur =
inputStream->base = xmlBufContent(inputStream->buf->buffer);
inputStream->end = xmlBufEnd(inputStream->buf->buffer);
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
@ -1581,9 +1542,9 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
if (URI != NULL) xmlFree((char *) URI);
inputStream->directory = directory;
inputStream->base = inputStream->buf->buffer->content;
inputStream->cur = inputStream->buf->buffer->content;
inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
inputStream->base =
inputStream->cur = xmlBufContent(inputStream->buf->buffer);
inputStream->end = xmlBufEnd(inputStream->buf->buffer);
if ((ctxt->directory == NULL) && (directory != NULL))
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
return(inputStream);