mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-05 05:47:00 +03:00
- Push mode for the HTML parser (new calls)
- Improved the memory debugger to provide content informations - cleanups, last known mem leak killed Daniel
This commit is contained in:
parent
be849cf33f
commit
5e5c62351f
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
Wed Dec 29 15:29:52 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* HTMLparser.[ch] testHTML.c: added push mode for the HTML parser
|
||||
too htmlCreatePushParserCtxt() and htmlParseChunk()
|
||||
* parser.c: a bit of cleanup.
|
||||
* SAX.c, HTMLparser.c: some attributes may not have values (contrary
|
||||
to XML) removed the last mem leak known
|
||||
* HTMLtree.c: output message cleanup
|
||||
* xmlmemory.c: display content info about memory blocks
|
||||
* result/HTML/wired.* : missing att value warning change
|
||||
|
||||
Tue Dec 28 17:42:41 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* doc/* : rebuilt the documentation
|
||||
|
845
HTMLparser.c
845
HTMLparser.c
@ -41,11 +41,15 @@
|
||||
#include "valid.h"
|
||||
#include "parserInternals.h"
|
||||
#include "xmlIO.h"
|
||||
#include "xml-error.h"
|
||||
|
||||
#define HTML_MAX_NAMELEN 1000
|
||||
#define INPUT_CHUNK 50
|
||||
#define HTML_PARSER_BIG_BUFFER_SIZE 1024
|
||||
#define HTML_PARSER_BUFFER_SIZE 100
|
||||
|
||||
/* #define DEBUG */
|
||||
/* #define DEBUG_PUSH */
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
@ -145,20 +149,6 @@ PUSH_AND_POP(extern, xmlChar*, name)
|
||||
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
|
||||
}}
|
||||
|
||||
/****************************************
|
||||
#define NEXT ((*ctxt->input->cur) ? \
|
||||
(((*(ctxt->input->cur) == '\n') ? \
|
||||
(ctxt->input->line++, ctxt->input->col = 1) : \
|
||||
(ctxt->input->col++)), \
|
||||
(ctxt->input->cur++), \
|
||||
((*ctxt->input->cur) ? \
|
||||
(xmlParserInputGrow(ctxt->input, 100), \
|
||||
ctxt->input->cur): \
|
||||
(ctxt->input->cur))) : \
|
||||
((xmlParserInputGrow(ctxt->input, 100) > 0) ? \
|
||||
ctxt->input->cur: \
|
||||
(xmlPopInput(ctxt), ctxt->input->cur)))
|
||||
****************************************/
|
||||
#else
|
||||
#endif
|
||||
|
||||
@ -926,7 +916,7 @@ htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
|
||||
/*
|
||||
* allocate a translation buffer.
|
||||
*/
|
||||
buffer_size = 1000;
|
||||
buffer_size = HTML_PARSER_BIG_BUFFER_SIZE;
|
||||
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
|
||||
if (buffer == NULL) {
|
||||
perror("htmlDecodeEntities: malloc failed");
|
||||
@ -1128,6 +1118,66 @@ htmlSwitchEncoding(htmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to handle streams *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* htmlFreeInputStream:
|
||||
* @input: an htmlParserInputPtr
|
||||
*
|
||||
* Free up an input stream.
|
||||
*/
|
||||
void
|
||||
htmlFreeInputStream(htmlParserInputPtr input) {
|
||||
if (input == NULL) return;
|
||||
|
||||
if (input->filename != NULL) xmlFree((char *) input->filename);
|
||||
if (input->directory != NULL) xmlFree((char *) input->directory);
|
||||
if ((input->free != NULL) && (input->base != NULL))
|
||||
input->free((xmlChar *) input->base);
|
||||
if (input->buf != NULL)
|
||||
xmlFreeParserInputBuffer(input->buf);
|
||||
memset(input, -1, sizeof(htmlParserInput));
|
||||
xmlFree(input);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlNewInputStream:
|
||||
* @ctxt: an HTML parser context
|
||||
*
|
||||
* Create a new input stream structure
|
||||
* Returns the new input stream or NULL
|
||||
*/
|
||||
htmlParserInputPtr
|
||||
htmlNewInputStream(htmlParserCtxtPtr ctxt) {
|
||||
htmlParserInputPtr input;
|
||||
|
||||
input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
|
||||
if (input == NULL) {
|
||||
ctxt->errNo = XML_ERR_NO_MEMORY;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"malloc: couldn't allocate a new input stream\n");
|
||||
ctxt->errNo = XML_ERR_NO_MEMORY;
|
||||
return(NULL);
|
||||
}
|
||||
input->filename = NULL;
|
||||
input->directory = NULL;
|
||||
input->base = NULL;
|
||||
input->cur = NULL;
|
||||
input->buf = NULL;
|
||||
input->line = 1;
|
||||
input->col = 1;
|
||||
input->buf = NULL;
|
||||
input->free = NULL;
|
||||
input->consumed = 0;
|
||||
input->length = 0;
|
||||
return(input);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
@ -1268,12 +1318,13 @@ xmlChar *
|
||||
htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
|
||||
xmlChar *ret = NULL;
|
||||
int i = 0;
|
||||
xmlChar loc[100];
|
||||
xmlChar loc[HTML_PARSER_BUFFER_SIZE];
|
||||
|
||||
if (!IS_LETTER(CUR) && (CUR != '_') &&
|
||||
(CUR != ':')) return(NULL);
|
||||
|
||||
while ((i < 100) && ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)))) {
|
||||
while ((i < HTML_PARSER_BUFFER_SIZE) &&
|
||||
((IS_LETTER(CUR)) || (IS_DIGIT(CUR)))) {
|
||||
if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
|
||||
else loc[i] = CUR;
|
||||
i++;
|
||||
@ -1615,7 +1666,7 @@ void
|
||||
htmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) {
|
||||
xmlChar *buf = NULL;
|
||||
int len = 0;
|
||||
int size = 100;
|
||||
int size = HTML_PARSER_BUFFER_SIZE;
|
||||
xmlChar q;
|
||||
|
||||
buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
|
||||
@ -1742,17 +1793,16 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
||||
/**
|
||||
* htmlParseComment:
|
||||
* @ctxt: an HTML parser context
|
||||
* @create: should we create a node, or just skip the content
|
||||
*
|
||||
* Parse an XML (SGML) comment <!-- .... -->
|
||||
*
|
||||
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
|
||||
*/
|
||||
void
|
||||
htmlParseComment(htmlParserCtxtPtr ctxt, int create) {
|
||||
htmlParseComment(htmlParserCtxtPtr ctxt) {
|
||||
xmlChar *buf = NULL;
|
||||
int len = 0;
|
||||
int size = 100;
|
||||
int size = HTML_PARSER_BUFFER_SIZE;
|
||||
register xmlChar s, r, q;
|
||||
|
||||
/*
|
||||
@ -1793,10 +1843,8 @@ htmlParseComment(htmlParserCtxtPtr ctxt, int create) {
|
||||
ctxt->wellFormed = 0;
|
||||
} else {
|
||||
NEXT;
|
||||
if (create) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL)) {
|
||||
ctxt->sax->comment(ctxt->userData, buf);
|
||||
}
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL)) {
|
||||
ctxt->sax->comment(ctxt->userData, buf);
|
||||
}
|
||||
}
|
||||
xmlFree(buf);
|
||||
@ -1935,6 +1983,9 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
|
||||
/*
|
||||
* Create the document accordingly to the DOCTYPE
|
||||
*/
|
||||
if (ctxt->myDoc != NULL)
|
||||
xmlFreeDoc(ctxt->myDoc);
|
||||
|
||||
ctxt->myDoc = htmlNewDoc(URI, ExternalID);
|
||||
|
||||
/*
|
||||
@ -1968,7 +2019,7 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
|
||||
|
||||
xmlChar *
|
||||
htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
||||
xmlChar *name, *val;
|
||||
xmlChar *name, *val = NULL;
|
||||
|
||||
*value = NULL;
|
||||
name = htmlParseName(ctxt);
|
||||
@ -1990,10 +2041,8 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
||||
} else {
|
||||
/* TODO : some attribute must have values, some may not */
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Specification mandate value for attribute %s\n", name);
|
||||
ctxt->wellFormed = 0;
|
||||
return(NULL);
|
||||
ctxt->sax->warning(ctxt->userData,
|
||||
"No value for attribute %s\n", name);
|
||||
}
|
||||
|
||||
*value = val;
|
||||
@ -2060,7 +2109,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
||||
|
||||
GROW;
|
||||
attname = htmlParseAttribute(ctxt, &attvalue);
|
||||
if ((attname != NULL) && (attvalue != NULL)) {
|
||||
if (attname != NULL) {
|
||||
/*
|
||||
* Well formedness requires at most one declaration of an attribute
|
||||
*/
|
||||
@ -2072,7 +2121,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
||||
attname);
|
||||
ctxt->wellFormed = 0;
|
||||
xmlFree(attname);
|
||||
xmlFree(attvalue);
|
||||
if (attvalue != NULL)
|
||||
xmlFree(attvalue);
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
@ -2127,7 +2177,10 @@ failed:
|
||||
ctxt->sax->startElement(ctxt->userData, name, atts);
|
||||
|
||||
if (atts != NULL) {
|
||||
for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
|
||||
for (i = 0;i < nbatts;i++) {
|
||||
if (atts[i] != NULL)
|
||||
xmlFree((xmlChar *) atts[i]);
|
||||
}
|
||||
xmlFree(atts);
|
||||
}
|
||||
if (name != NULL) xmlFree(name);
|
||||
@ -2330,7 +2383,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
if ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
htmlParseComment(ctxt, 1);
|
||||
htmlParseComment(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2384,11 +2437,11 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||
void
|
||||
htmlParseElement(htmlParserCtxtPtr ctxt) {
|
||||
const xmlChar *openTag = CUR_PTR;
|
||||
xmlChar *oldname;
|
||||
xmlChar *name;
|
||||
xmlChar *currentNode = NULL;
|
||||
htmlElemDescPtr info;
|
||||
htmlParserNodeInfo node_info;
|
||||
xmlChar *oldname;
|
||||
int depth = ctxt->nameNr;
|
||||
|
||||
/* Capture start position */
|
||||
@ -2585,8 +2638,9 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
while ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
ctxt->myDoc = htmlNewDoc(NULL, NULL);
|
||||
htmlParseComment(ctxt, 1);
|
||||
if (ctxt->myDoc == NULL)
|
||||
ctxt->myDoc = htmlNewDoc(NULL, NULL);
|
||||
htmlParseComment(ctxt);
|
||||
SKIP_BLANKS;
|
||||
}
|
||||
|
||||
@ -2721,6 +2775,7 @@ htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
|
||||
xmlFree(oldname);
|
||||
}
|
||||
if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
|
||||
if (ctxt->directory != NULL) xmlFree(ctxt->directory);
|
||||
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
|
||||
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax != &htmlDefaultSAXHandler))
|
||||
@ -2766,11 +2821,717 @@ htmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
|
||||
return(ctxt);
|
||||
}
|
||||
|
||||
/********************************************************************************
|
||||
* *
|
||||
* User entry points *
|
||||
* *
|
||||
********************************************************************************/
|
||||
/************************************************************************
|
||||
* *
|
||||
* Progressive parsing interfaces *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* htmlParseLookupSequence:
|
||||
* @ctxt: an HTML parser context
|
||||
* @first: the first char to lookup
|
||||
* @next: the next char to lookup or zero
|
||||
* @third: the next char to lookup or zero
|
||||
*
|
||||
* Try to find if a sequence (first, next, third) or just (first next) or
|
||||
* (first) is available in the input stream.
|
||||
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
||||
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
||||
* parser, do not use liberally.
|
||||
* This is basically similar to xmlParseLookupSequence()
|
||||
*
|
||||
* Returns the index to the current parsing point if the full sequence
|
||||
* is available, -1 otherwise.
|
||||
*/
|
||||
int
|
||||
htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
||||
xmlChar next, xmlChar third) {
|
||||
int base, len;
|
||||
htmlParserInputPtr in;
|
||||
const xmlChar *buf;
|
||||
|
||||
in = ctxt->input;
|
||||
if (in == NULL) return(-1);
|
||||
base = in->cur - in->base;
|
||||
if (base < 0) return(-1);
|
||||
if (ctxt->checkIndex > base)
|
||||
base = ctxt->checkIndex;
|
||||
if (in->buf == NULL) {
|
||||
buf = in->base;
|
||||
len = in->length;
|
||||
} else {
|
||||
buf = in->buf->buffer->content;
|
||||
len = in->buf->buffer->use;
|
||||
}
|
||||
/* take into account the sequence length */
|
||||
if (third) len -= 2;
|
||||
else if (next) len --;
|
||||
for (;base < len;base++) {
|
||||
if (buf[base] == first) {
|
||||
if (third != 0) {
|
||||
if ((buf[base + 1] != next) ||
|
||||
(buf[base + 2] != third)) continue;
|
||||
} else if (next != 0) {
|
||||
if (buf[base + 1] != next) continue;
|
||||
}
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
if (next == 0)
|
||||
fprintf(stderr, "HPP: lookup '%c' found at %d\n",
|
||||
first, base);
|
||||
else if (third == 0)
|
||||
fprintf(stderr, "HPP: lookup '%c%c' found at %d\n",
|
||||
first, next, base);
|
||||
else
|
||||
fprintf(stderr, "HPP: lookup '%c%c%c' found at %d\n",
|
||||
first, next, third, base);
|
||||
#endif
|
||||
return(base - (in->cur - in->base));
|
||||
}
|
||||
}
|
||||
ctxt->checkIndex = base;
|
||||
#ifdef DEBUG_PUSH
|
||||
if (next == 0)
|
||||
fprintf(stderr, "HPP: lookup '%c' failed\n", first);
|
||||
else if (third == 0)
|
||||
fprintf(stderr, "HPP: lookup '%c%c' failed\n", first, next);
|
||||
else
|
||||
fprintf(stderr, "HPP: lookup '%c%c%c' failed\n", first, next, third);
|
||||
#endif
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlParseTry:
|
||||
* @ctxt: an HTML parser context
|
||||
*
|
||||
* Try to progress on parsing
|
||||
*
|
||||
* Returns zero if no parsing was possible
|
||||
*/
|
||||
int
|
||||
htmlParseTry(htmlParserCtxtPtr ctxt) {
|
||||
int ret = 0;
|
||||
htmlParserInputPtr in;
|
||||
int avail;
|
||||
xmlChar cur, next;
|
||||
|
||||
#ifdef DEBUG_PUSH
|
||||
switch (ctxt->instate) {
|
||||
case XML_PARSER_EOF:
|
||||
fprintf(stderr, "HPP: try EOF\n"); break;
|
||||
case XML_PARSER_START:
|
||||
fprintf(stderr, "HPP: try START\n"); break;
|
||||
case XML_PARSER_MISC:
|
||||
fprintf(stderr, "HPP: try MISC\n");break;
|
||||
case XML_PARSER_COMMENT:
|
||||
fprintf(stderr, "HPP: try COMMENT\n");break;
|
||||
case XML_PARSER_PROLOG:
|
||||
fprintf(stderr, "HPP: try PROLOG\n");break;
|
||||
case XML_PARSER_START_TAG:
|
||||
fprintf(stderr, "HPP: try START_TAG\n");break;
|
||||
case XML_PARSER_CONTENT:
|
||||
fprintf(stderr, "HPP: try CONTENT\n");break;
|
||||
case XML_PARSER_CDATA_SECTION:
|
||||
fprintf(stderr, "HPP: try CDATA_SECTION\n");break;
|
||||
case XML_PARSER_END_TAG:
|
||||
fprintf(stderr, "HPP: try END_TAG\n");break;
|
||||
case XML_PARSER_ENTITY_DECL:
|
||||
fprintf(stderr, "HPP: try ENTITY_DECL\n");break;
|
||||
case XML_PARSER_ENTITY_VALUE:
|
||||
fprintf(stderr, "HPP: try ENTITY_VALUE\n");break;
|
||||
case XML_PARSER_ATTRIBUTE_VALUE:
|
||||
fprintf(stderr, "HPP: try ATTRIBUTE_VALUE\n");break;
|
||||
case XML_PARSER_DTD:
|
||||
fprintf(stderr, "HPP: try DTD\n");break;
|
||||
case XML_PARSER_EPILOG:
|
||||
fprintf(stderr, "HPP: try EPILOG\n");break;
|
||||
case XML_PARSER_PI:
|
||||
fprintf(stderr, "HPP: try PI\n");break;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (1) {
|
||||
|
||||
in = ctxt->input;
|
||||
if (in == NULL) break;
|
||||
if (in->buf == NULL)
|
||||
avail = in->length - (in->cur - in->base);
|
||||
else
|
||||
avail = in->buf->buffer->use - (in->cur - in->base);
|
||||
if (avail < 1)
|
||||
goto done;
|
||||
switch (ctxt->instate) {
|
||||
case XML_PARSER_EOF:
|
||||
/*
|
||||
* Document parsing is done !
|
||||
*/
|
||||
goto done;
|
||||
case XML_PARSER_START:
|
||||
/*
|
||||
* Very first chars read from the document flow.
|
||||
*/
|
||||
cur = in->cur[0];
|
||||
if (IS_BLANK(cur)) {
|
||||
SKIP_BLANKS;
|
||||
if (in->buf == NULL)
|
||||
avail = in->length - (in->cur - in->base);
|
||||
else
|
||||
avail = in->buf->buffer->use - (in->cur - in->base);
|
||||
}
|
||||
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
|
||||
ctxt->sax->setDocumentLocator(ctxt->userData,
|
||||
&xmlDefaultSAXLocator);
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
if (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing internal subset\n");
|
||||
#endif
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
ctxt->instate = XML_PARSER_PROLOG;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering PROLOG\n");
|
||||
#endif
|
||||
} else {
|
||||
ctxt->myDoc = htmlNewDoc(NULL, NULL);
|
||||
ctxt->instate = XML_PARSER_MISC;
|
||||
}
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering MISC\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_MISC:
|
||||
SKIP_BLANKS;
|
||||
if (in->buf == NULL)
|
||||
avail = in->length - (in->cur - in->base);
|
||||
else
|
||||
avail = in->buf->buffer->use - (in->cur - in->base);
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing Comment\n");
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_MISC;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
if (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing internal subset\n");
|
||||
#endif
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
ctxt->instate = XML_PARSER_PROLOG;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering PROLOG\n");
|
||||
#endif
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(avail < 9)) {
|
||||
goto done;
|
||||
} else {
|
||||
ctxt->instate = XML_PARSER_START_TAG;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering START_TAG\n");
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case XML_PARSER_PROLOG:
|
||||
SKIP_BLANKS;
|
||||
if (in->buf == NULL)
|
||||
avail = in->length - (in->cur - in->base);
|
||||
else
|
||||
avail = in->buf->buffer->use - (in->cur - in->base);
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing Comment\n");
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_PROLOG;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(avail < 4)) {
|
||||
goto done;
|
||||
} else {
|
||||
ctxt->instate = XML_PARSER_START_TAG;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering START_TAG\n");
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case XML_PARSER_EPILOG:
|
||||
SKIP_BLANKS;
|
||||
if (in->buf == NULL)
|
||||
avail = in->length - (in->cur - in->base);
|
||||
else
|
||||
avail = in->buf->buffer->use - (in->cur - in->base);
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing Comment\n");
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_EPILOG;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(avail < 4)) {
|
||||
goto done;
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Extra content at the end of the document\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_DOCUMENT_END;
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering EOF\n");
|
||||
#endif
|
||||
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
||||
ctxt->sax->endDocument(ctxt->userData);
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
case XML_PARSER_START_TAG: {
|
||||
xmlChar *name, *oldname;
|
||||
int depth = ctxt->nameNr;
|
||||
htmlElemDescPtr info;
|
||||
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = in->cur[0];
|
||||
if (cur != '<') {
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
if (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
|
||||
goto done;
|
||||
|
||||
oldname = xmlStrdup(ctxt->name);
|
||||
htmlParseStartTag(ctxt);
|
||||
name = ctxt->name;
|
||||
#ifdef DEBUG
|
||||
if (oldname == NULL)
|
||||
fprintf(stderr, "Start of element %s\n", name);
|
||||
else if (name == NULL)
|
||||
fprintf(stderr, "Start of element failed, was %s\n",
|
||||
oldname);
|
||||
else
|
||||
fprintf(stderr, "Start of element %s, was %s\n",
|
||||
name, oldname);
|
||||
#endif
|
||||
if (((depth == ctxt->nameNr) &&
|
||||
(!xmlStrcmp(oldname, ctxt->name))) ||
|
||||
(name == NULL)) {
|
||||
if (CUR == '>')
|
||||
NEXT;
|
||||
if (oldname != NULL)
|
||||
xmlFree(oldname);
|
||||
break;
|
||||
}
|
||||
if (oldname != NULL)
|
||||
xmlFree(oldname);
|
||||
|
||||
/*
|
||||
* Lookup the info for that element.
|
||||
*/
|
||||
info = htmlTagLookup(name);
|
||||
if (info == NULL) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "Tag %s invalid\n",
|
||||
name);
|
||||
ctxt->wellFormed = 0;
|
||||
} else if (info->depr) {
|
||||
/***************************
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
|
||||
ctxt->sax->warning(ctxt->userData,
|
||||
"Tag %s is deprecated\n",
|
||||
name);
|
||||
***************************/
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for an Empty Element labelled the XML/SGML way
|
||||
*/
|
||||
if ((CUR == '/') && (NXT(1) == '>')) {
|
||||
SKIP(2);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||
ctxt->sax->endElement(ctxt->userData, name);
|
||||
oldname = htmlnamePop(ctxt);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,"End of tag the XML way: popping out %s\n",
|
||||
oldname);
|
||||
#endif
|
||||
if (oldname != NULL)
|
||||
xmlFree(oldname);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
if (CUR == '>') {
|
||||
NEXT;
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Couldn't find end of Start Tag %s\n",
|
||||
name);
|
||||
ctxt->wellFormed = 0;
|
||||
|
||||
/*
|
||||
* end of parsing of this node.
|
||||
*/
|
||||
if (!xmlStrcmp(name, ctxt->name)) {
|
||||
nodePop(ctxt);
|
||||
oldname = htmlnamePop(ctxt);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,
|
||||
"End of start tag problem: popping out %s\n", oldname);
|
||||
#endif
|
||||
if (oldname != NULL)
|
||||
xmlFree(oldname);
|
||||
}
|
||||
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for an Empty Element from DTD definition
|
||||
*/
|
||||
if ((info != NULL) && (info->empty)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||
ctxt->sax->endElement(ctxt->userData, name);
|
||||
oldname = htmlnamePop(ctxt);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
|
||||
#endif
|
||||
if (oldname != NULL)
|
||||
xmlFree(oldname);
|
||||
}
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case XML_PARSER_CONTENT:
|
||||
/*
|
||||
* Handle preparsed entities and charRef
|
||||
*/
|
||||
if (ctxt->token != 0) {
|
||||
xmlChar cur[2] = { 0 , 0 } ;
|
||||
|
||||
cur[0] = (xmlChar) ctxt->token;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
|
||||
ctxt->sax->characters(ctxt->userData, cur, 1);
|
||||
ctxt->token = 0;
|
||||
ctxt->checkIndex = 0;
|
||||
}
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = in->cur[0];
|
||||
next = in->cur[1];
|
||||
if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing Comment\n");
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
} else if ((cur == '<') && (next == '!') && (avail < 4)) {
|
||||
goto done;
|
||||
} else if ((cur == '<') && (next == '/')) {
|
||||
ctxt->instate = XML_PARSER_END_TAG;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering END_TAG\n");
|
||||
#endif
|
||||
break;
|
||||
} else if (cur == '<') {
|
||||
ctxt->instate = XML_PARSER_START_TAG;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering START_TAG\n");
|
||||
#endif
|
||||
break;
|
||||
} else if (cur == '&') {
|
||||
if (htmlParseLookupSequence(ctxt, ';', 0, 0) < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing Reference\n");
|
||||
#endif
|
||||
/* TODO: check generation of subtrees if noent !!! */
|
||||
htmlParseReference(ctxt);
|
||||
} else {
|
||||
/* TODO Avoid the extra copy, handle directly !!!!!! */
|
||||
/*
|
||||
* Goal of the following test is :
|
||||
* - minimize calls to the SAX 'character' callback
|
||||
* when they are mergeable
|
||||
*/
|
||||
if ((ctxt->inputNr == 1) &&
|
||||
(avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
|
||||
if (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0)
|
||||
goto done;
|
||||
}
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: Parsing char data\n");
|
||||
#endif
|
||||
htmlParseCharData(ctxt, 0);
|
||||
}
|
||||
break;
|
||||
case XML_PARSER_END_TAG:
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
if (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
|
||||
goto done;
|
||||
htmlParseEndTag(ctxt);
|
||||
if (ctxt->nameNr == 0) {
|
||||
ctxt->instate = XML_PARSER_EPILOG;
|
||||
} else {
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
}
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_CDATA_SECTION:
|
||||
fprintf(stderr, "HPP: internal error, state == CDATA\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_DTD:
|
||||
fprintf(stderr, "HPP: internal error, state == DTD\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_COMMENT:
|
||||
fprintf(stderr, "HPP: internal error, state == COMMENT\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_PI:
|
||||
fprintf(stderr, "HPP: internal error, state == PI\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_ENTITY_DECL:
|
||||
fprintf(stderr, "HPP: internal error, state == ENTITY_DECL\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering CONTENT\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_ENTITY_VALUE:
|
||||
fprintf(stderr, "HPP: internal error, state == ENTITY_VALUE\n");
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering DTD\n");
|
||||
#endif
|
||||
break;
|
||||
case XML_PARSER_ATTRIBUTE_VALUE:
|
||||
fprintf(stderr, "HPP: internal error, state == ATTRIBUTE_VALUE\n");
|
||||
ctxt->instate = XML_PARSER_START_TAG;
|
||||
ctxt->checkIndex = 0;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: entering START_TAG\n");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
done:
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: done %d\n", ret);
|
||||
#endif
|
||||
return(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlParseChunk:
|
||||
* @ctxt: an XML parser context
|
||||
* @chunk: an char array
|
||||
* @size: the size in byte of the chunk
|
||||
* @terminate: last chunk indicator
|
||||
*
|
||||
* Parse a Chunk of memory
|
||||
*
|
||||
* Returns zero if no error, the xmlParserErrors otherwise.
|
||||
*/
|
||||
int
|
||||
htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||
int terminate) {
|
||||
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
||||
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
|
||||
int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
||||
int cur = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
||||
ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
||||
ctxt->input->cur = ctxt->input->base + cur;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: pushed %d\n", size);
|
||||
#endif
|
||||
|
||||
htmlParseTry(ctxt);
|
||||
} else if (ctxt->instate != XML_PARSER_EOF)
|
||||
htmlParseTry(ctxt);
|
||||
if (terminate) {
|
||||
if ((ctxt->instate != XML_PARSER_EOF) &&
|
||||
(ctxt->instate != XML_PARSER_EPILOG) &&
|
||||
(ctxt->instate != XML_PARSER_MISC)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Extra content at the end of the document\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_DOCUMENT_END;
|
||||
}
|
||||
if (ctxt->instate != XML_PARSER_EOF) {
|
||||
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
||||
ctxt->sax->endDocument(ctxt->userData);
|
||||
}
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
}
|
||||
return((xmlParserErrors) ctxt->errNo);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* User entry points *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* htmlCreatePushParserCtxt :
|
||||
* @sax: a SAX handler
|
||||
* @user_data: The user data returned on SAX callbacks
|
||||
* @chunk: a pointer to an array of chars
|
||||
* @size: number of chars in the array
|
||||
* @filename: an optional file name or URI
|
||||
* @enc: an optional encoding
|
||||
*
|
||||
* Create a parser context for using the HTML parser in push mode
|
||||
* To allow content encoding detection, @size should be >= 4
|
||||
* The value of @filename is used for fetching external entities
|
||||
* and error/warning reports.
|
||||
*
|
||||
* Returns the new parser context or NULL
|
||||
*/
|
||||
htmlParserCtxtPtr
|
||||
htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
|
||||
const char *chunk, int size, const char *filename,
|
||||
xmlCharEncoding enc) {
|
||||
htmlParserCtxtPtr ctxt;
|
||||
htmlParserInputPtr inputStream;
|
||||
xmlParserInputBufferPtr buf;
|
||||
|
||||
buf = xmlAllocParserInputBuffer(enc);
|
||||
if (buf == NULL) return(NULL);
|
||||
|
||||
ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt));
|
||||
if (ctxt == NULL) {
|
||||
xmlFree(buf);
|
||||
return(NULL);
|
||||
}
|
||||
memset(ctxt, 0, sizeof(htmlParserCtxt));
|
||||
htmlInitParserCtxt(ctxt);
|
||||
if (sax != NULL) {
|
||||
if (ctxt->sax != &htmlDefaultSAXHandler)
|
||||
xmlFree(ctxt->sax);
|
||||
ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
|
||||
if (ctxt->sax == NULL) {
|
||||
xmlFree(buf);
|
||||
xmlFree(ctxt);
|
||||
return(NULL);
|
||||
}
|
||||
memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
|
||||
if (user_data != NULL)
|
||||
ctxt->userData = user_data;
|
||||
}
|
||||
if (filename == NULL) {
|
||||
ctxt->directory = NULL;
|
||||
} else {
|
||||
ctxt->directory = xmlParserGetDirectory(filename);
|
||||
}
|
||||
|
||||
inputStream = htmlNewInputStream(ctxt);
|
||||
if (inputStream == NULL) {
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
if (filename == NULL)
|
||||
inputStream->filename = NULL;
|
||||
else
|
||||
inputStream->filename = xmlMemStrdup(filename);
|
||||
inputStream->buf = buf;
|
||||
inputStream->base = inputStream->buf->buffer->content;
|
||||
inputStream->cur = inputStream->buf->buffer->content;
|
||||
|
||||
inputPush(ctxt, inputStream);
|
||||
|
||||
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
||||
(ctxt->input->buf != NULL)) {
|
||||
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "HPP: pushed %d\n", size);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(ctxt);
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlSAXParseDoc :
|
||||
|
14
HTMLparser.h
14
HTMLparser.h
@ -78,6 +78,20 @@ htmlDocPtr htmlSAXParseFile(const char *filename,
|
||||
htmlDocPtr htmlParseFile (const char *filename,
|
||||
const char *encoding);
|
||||
|
||||
/**
|
||||
* Interfaces for the Push mode
|
||||
*/
|
||||
void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
|
||||
htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
|
||||
void *user_data,
|
||||
const char *chunk,
|
||||
int size,
|
||||
const char *filename,
|
||||
xmlCharEncoding enc);
|
||||
int htmlParseChunk (htmlParserCtxtPtr ctxt,
|
||||
const char *chunk,
|
||||
int size,
|
||||
int terminate);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -310,7 +310,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
|
||||
|
||||
if (cur == NULL) {
|
||||
#ifdef DEBUG_TREE
|
||||
fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
|
||||
fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
|
||||
#endif
|
||||
*mem = NULL;
|
||||
*size = 0;
|
||||
@ -343,7 +343,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
|
||||
|
||||
if (cur == NULL) {
|
||||
#ifdef DEBUG_TREE
|
||||
fprintf(stderr, "xmlDocDump : document == NULL\n");
|
||||
fprintf(stderr, "htmlDocDump : document == NULL\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -78,6 +78,20 @@ htmlDocPtr htmlSAXParseFile(const char *filename,
|
||||
htmlDocPtr htmlParseFile (const char *filename,
|
||||
const char *encoding);
|
||||
|
||||
/**
|
||||
* Interfaces for the Push mode
|
||||
*/
|
||||
void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
|
||||
htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
|
||||
void *user_data,
|
||||
const char *chunk,
|
||||
int size,
|
||||
const char *filename,
|
||||
xmlCharEncoding enc);
|
||||
int htmlParseChunk (htmlParserCtxtPtr ctxt,
|
||||
const char *chunk,
|
||||
int size,
|
||||
int terminate);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
11
parser.c
11
parser.c
@ -7180,7 +7180,7 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
xmlParsePI(ctxt);
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
|
||||
if (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "PP: Parsing Comment\n");
|
||||
@ -7238,7 +7238,7 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
xmlParsePI(ctxt);
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
|
||||
if (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "PP: Parsing Comment\n");
|
||||
@ -7275,7 +7275,7 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->instate = XML_PARSER_EPILOG;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
|
||||
if (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "PP: Parsing Comment\n");
|
||||
@ -7425,7 +7425,7 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
xmlParsePI(ctxt);
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
|
||||
if (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
fprintf(stderr, "PP: Parsing Comment\n");
|
||||
@ -7531,7 +7531,7 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case XML_PARSER_END_TAG: {
|
||||
case XML_PARSER_END_TAG:
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
|
||||
@ -7549,7 +7549,6 @@ xmlParseTry(xmlParserCtxtPtr ctxt) {
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
case XML_PARSER_DTD: {
|
||||
/*
|
||||
* Sorry but progressive parsing of the internal subset
|
||||
|
87
testHTML.c
87
testHTML.c
@ -43,26 +43,7 @@ static int copy = 0;
|
||||
static int sax = 0;
|
||||
static int repeat = 0;
|
||||
static int noout = 0;
|
||||
|
||||
/*
|
||||
* Note: this is perfectly clean HTML, i.e. not a useful test.
|
||||
static xmlChar buffer[] =
|
||||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
|
||||
\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
|
||||
<html>\n\
|
||||
<head>\n\
|
||||
<title>This service is temporary down</title>\n\
|
||||
</head>\n\
|
||||
\n\
|
||||
<body bgcolor=\"#FFFFFF\">\n\
|
||||
<h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
|
||||
We are doing our best to get it back on-line,\n\
|
||||
\n\
|
||||
<p>The W3C system administrators</p>\n\
|
||||
</body>\n\
|
||||
</html>\n\
|
||||
";
|
||||
*/
|
||||
static int push = 0;
|
||||
|
||||
xmlSAXHandler emptySAXHandlerStruct = {
|
||||
NULL, /* internalSubset */
|
||||
@ -608,7 +589,35 @@ void parseAndPrintFile(char *filename) {
|
||||
/*
|
||||
* build an HTML tree from a string;
|
||||
*/
|
||||
doc = htmlParseFile(filename, NULL);
|
||||
if (push) {
|
||||
FILE *f;
|
||||
|
||||
f = fopen(filename, "r");
|
||||
if (f != NULL) {
|
||||
int res, size = 3;
|
||||
char chars[1024];
|
||||
htmlParserCtxtPtr ctxt;
|
||||
|
||||
if (repeat)
|
||||
size = 1024;
|
||||
res = fread(chars, 1, 4, f);
|
||||
if (res > 0) {
|
||||
ctxt = htmlCreatePushParserCtxt(NULL, NULL,
|
||||
chars, res, filename, 0);
|
||||
while ((res = fread(chars, 1, size, f)) > 0) {
|
||||
htmlParseChunk(ctxt, chars, res, 0);
|
||||
}
|
||||
htmlParseChunk(ctxt, chars, 0, 1);
|
||||
doc = ctxt->myDoc;
|
||||
htmlFreeParserCtxt(ctxt);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
doc = htmlParseFile(filename, NULL);
|
||||
}
|
||||
if (doc == NULL) {
|
||||
fprintf(stderr, "Could not parse %s\n", filename);
|
||||
}
|
||||
|
||||
/*
|
||||
* test intermediate copy if needed.
|
||||
@ -635,37 +644,6 @@ void parseAndPrintFile(char *filename) {
|
||||
xmlFreeDoc(doc);
|
||||
}
|
||||
|
||||
void parseAndPrintBuffer(xmlChar *buf) {
|
||||
htmlDocPtr doc, tmp;
|
||||
|
||||
/*
|
||||
* build an HTML tree from a string;
|
||||
*/
|
||||
doc = htmlParseDoc(buf, NULL);
|
||||
|
||||
/*
|
||||
* test intermediate copy if needed.
|
||||
*/
|
||||
if (copy) {
|
||||
tmp = doc;
|
||||
doc = xmlCopyDoc(doc, 1);
|
||||
xmlFreeDoc(tmp);
|
||||
}
|
||||
|
||||
/*
|
||||
* print it.
|
||||
*/
|
||||
if (!debug)
|
||||
htmlDocDump(stdout, doc);
|
||||
else
|
||||
xmlDebugDumpDocument(stdout, doc);
|
||||
|
||||
/*
|
||||
* free it.
|
||||
*/
|
||||
xmlFreeDoc(doc);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i, count;
|
||||
int files = 0;
|
||||
@ -675,6 +653,8 @@ int main(int argc, char **argv) {
|
||||
debug++;
|
||||
else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
|
||||
copy++;
|
||||
else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
|
||||
push++;
|
||||
else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
|
||||
sax++;
|
||||
else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
|
||||
@ -708,8 +688,9 @@ int main(int argc, char **argv) {
|
||||
printf("\t--debug : dump a debug tree of the in-memory document\n");
|
||||
printf("\t--copy : used to test the internal copy implementation\n");
|
||||
printf("\t--sax : debug the sequence of SAX callbacks\n");
|
||||
printf("\t--repeat : parse the file 100 times, for timing or profiling\n");
|
||||
printf("\t--repeat : parse the file 100 times, for timing\n");
|
||||
printf("\t--noout : do not print the result\n");
|
||||
printf("\t--push : use the push mode parser\n");
|
||||
}
|
||||
xmlCleanupParser();
|
||||
xmlMemoryDump();
|
||||
|
58
xmlmemory.c
58
xmlmemory.c
@ -25,6 +25,9 @@
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#ifdef HAVE_CTYPE_H
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "xmlmemory.h"
|
||||
@ -368,6 +371,59 @@ xmlMemUsed(void) {
|
||||
return(debugMemSize);
|
||||
}
|
||||
|
||||
#ifdef MEM_LIST
|
||||
/**
|
||||
* xmlMemContentShow:
|
||||
* @fp: a FILE descriptor used as the output file
|
||||
* @p: a memory block header
|
||||
*
|
||||
* tries to show some content from the memory block
|
||||
*/
|
||||
|
||||
void
|
||||
xmlMemContentShow(FILE *fp, MEMHDR *p)
|
||||
{
|
||||
int i,j,len = p->mh_size;
|
||||
const char *buf = HDR_2_CLIENT(p);
|
||||
|
||||
for (i = 0;i < len;i++) {
|
||||
if (buf[i] == 0) break;
|
||||
if (!isprint(buf[i])) break;
|
||||
}
|
||||
if ((i < 4) && ((buf[i] != 0) || (i == 0))) {
|
||||
if (len >= 4) {
|
||||
MEMHDR *q;
|
||||
void *cur;
|
||||
|
||||
for (j = 0;j < len -3;j += 4) {
|
||||
cur = *((void **) &buf[j]);
|
||||
q = CLIENT_2_HDR(cur);
|
||||
p = memlist;
|
||||
while (p != NULL) {
|
||||
if (p == q) break;
|
||||
p = p->mh_next;
|
||||
}
|
||||
if (p == q) {
|
||||
fprintf(fp, " pointer to #%lu at index %d",
|
||||
p->mh_number, j);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ((i == 0) && (buf[i] == 0)) {
|
||||
fprintf(fp," null");
|
||||
} else {
|
||||
if (buf[i] == 0) fprintf(fp," \"%.25s\"", buf);
|
||||
else {
|
||||
fprintf(fp," [");
|
||||
for (j = 0;j < i;j++)
|
||||
fprintf(fp,"%c", buf[j]);
|
||||
fprintf(fp,"]");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* xmlMemShow:
|
||||
* @fp: a FILE descriptor used as the output file
|
||||
@ -403,6 +459,7 @@ xmlMemShow(FILE *fp, int nr)
|
||||
fprintf(fp,"%s(%d)", p->mh_file, p->mh_line);
|
||||
if (p->mh_tag != MEMTAG)
|
||||
fprintf(fp," INVALID");
|
||||
xmlMemContentShow(fp, p);
|
||||
fprintf(fp,"\n");
|
||||
nr--;
|
||||
p = p->mh_next;
|
||||
@ -453,6 +510,7 @@ xmlMemDisplay(FILE *fp)
|
||||
if (p->mh_file != NULL) fprintf(fp,"%s(%d)", p->mh_file, p->mh_line);
|
||||
if (p->mh_tag != MEMTAG)
|
||||
fprintf(fp," INVALID");
|
||||
xmlMemContentShow(fp, p);
|
||||
fprintf(fp,"\n");
|
||||
p = p->mh_next;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user