diff --git a/HTMLparser.c b/HTMLparser.c index f84b4760..840e6ebf 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -60,23 +60,7 @@ static void htmlParseComment(htmlParserCtxtPtr ctxt); static void htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) { - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) { - ctxt->errNo = XML_ERR_NO_MEMORY; - ctxt->instate = XML_PARSER_EOF; - ctxt->disableSAX = 1; - } - if (extra) - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, - NULL, NULL, 0, 0, - "Memory allocation failed : %s\n", extra); - else - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, - NULL, NULL, 0, 0, "Memory allocation failed\n"); + xmlErrMemory(ctxt, extra); } /** @@ -93,18 +77,8 @@ static void LIBXML_ATTR_FORMAT(3,0) htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2) { - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, - XML_ERR_ERROR, NULL, 0, - (const char *) str1, (const char *) str2, - NULL, 0, 0, - msg, str1, str2); - if (ctxt != NULL) - ctxt->wellFormed = 0; + xmlErrParser(ctxt, NULL, XML_FROM_HTML, error, XML_ERR_ERROR, + str1, str2, NULL, 0, msg, str1, str2); } /** @@ -120,16 +94,8 @@ static void LIBXML_ATTR_FORMAT(3,0) htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, int val) { - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, - XML_ERR_ERROR, NULL, 0, NULL, NULL, - NULL, val, 0, msg, val); - if (ctxt != NULL) - ctxt->wellFormed = 0; + xmlErrParser(ctxt, NULL, XML_FROM_HTML, error, XML_ERR_ERROR, + NULL, NULL, NULL, val, msg, val); } /************************************************************************ @@ -343,6 +309,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) static xmlChar * htmlFindEncoding(xmlParserCtxtPtr ctxt) { const xmlChar *start, *cur, *end; + xmlChar *ret; if ((ctxt == NULL) || (ctxt->input == NULL) || (ctxt->input->flags & XML_INPUT_HAS_ENCODING)) @@ -374,7 +341,10 @@ htmlFindEncoding(xmlParserCtxtPtr ctxt) { cur++; if (cur == start) return(NULL); - return(xmlStrndup(start, cur - start)); + ret = xmlStrndup(start, cur - start); + if (ret == NULL) + htmlErrMemory(ctxt, NULL); + return(ret); } /** @@ -397,23 +367,16 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { unsigned char c; unsigned int val; - if (ctxt->instate == XML_PARSER_EOF) - return(0); - if (ctxt->token != 0) { *len = 0; return(ctxt->token); } - if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { + if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) xmlParserGrow(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - return(0); - } if ((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) { xmlChar * guess; - xmlCharEncodingHandlerPtr handler; /* * Assume it's a fixed length encoding (1) with @@ -421,13 +384,18 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * HTML constructs only use < 128 chars */ if (*ctxt->input->cur < 0x80) { - *len = 1; - if ((*ctxt->input->cur == 0) && - (ctxt->input->cur < ctxt->input->end)) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", 0); - return(' '); + if (*ctxt->input->cur == 0) { + if (ctxt->input->cur < ctxt->input->end) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", 0); + *len = 1; + return(' '); + } else { + *len = 0; + return(0); + } } + *len = 1; return(*ctxt->input->cur); } @@ -438,18 +406,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { if (guess == NULL) { xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); } else { - handler = xmlFindCharEncodingHandler((const char *) guess); - if (handler != NULL) { - /* - * Don't use UTF-8 encoder which isn't required and - * can produce invalid UTF-8. - */ - if (!xmlStrEqual(BAD_CAST handler->name, BAD_CAST "UTF-8")) - xmlSwitchToEncoding(ctxt, handler); - } else { - htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, - "Unsupported encoding %s", guess, NULL); - } + xmlSwitchEncodingName(ctxt, (const char *) guess); xmlFree(guess); } ctxt->input->flags |= XML_INPUT_HAS_ENCODING; @@ -516,12 +473,16 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } return(val); } else { - if ((*ctxt->input->cur == 0) && - (ctxt->input->cur < ctxt->input->end)) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", 0); - *len = 1; - return(' '); + if (*ctxt->input->cur == 0) { + if (ctxt->input->cur < ctxt->input->end) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", 0); + *len = 1; + return(' '); + } else { + *len = 0; + return(0); + } } /* 1-byte code */ *len = 1; @@ -2438,10 +2399,8 @@ htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { * Allocate a new document and fill the fields. */ cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); - if (cur == NULL) { - htmlErrMemory(NULL, "HTML document creation failed\n"); + if (cur == NULL) return(NULL); - } memset(cur, 0, sizeof(xmlDoc)); cur->type = XML_HTML_DOCUMENT_NODE; @@ -2461,8 +2420,15 @@ htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { cur->charset = XML_CHAR_ENCODING_UTF8; cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; if ((ExternalID != NULL) || - (URI != NULL)) - xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); + (URI != NULL)) { + xmlDtdPtr intSubset; + + intSubset = xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); + if (intSubset == NULL) { + xmlFree(cur); + return(NULL); + } + } if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); @@ -2510,12 +2476,14 @@ htmlSkipBogusComment(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT, "Incorrectly opened comment\n", NULL, NULL); - do { + while (PARSER_STOPPED(ctxt) == 0) { c = CUR; if (c == 0) break; NEXT; - } while (c != '>'); + if (c == '>') + break; + } } /** @@ -2571,6 +2539,7 @@ static const xmlChar * htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { int i = 0; xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + const xmlChar *ret; if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && (NXT(1) != ':')) return(NULL); @@ -2583,7 +2552,11 @@ htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { i++; } - return(xmlDictLookup(ctxt->dict, loc, i)); + ret = xmlDictLookup(ctxt->dict, loc, i); + if (ret == NULL) + htmlErrMemory(ctxt, NULL); + + return(ret); } @@ -2625,6 +2598,8 @@ htmlParseName(htmlParserCtxtPtr ctxt) { if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + if (ret == NULL) + htmlErrMemory(ctxt, NULL); ctxt->input->cur = in; ctxt->input->col += count; return(ret); @@ -2641,6 +2616,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; const xmlChar *base = ctxt->input->base; + const xmlChar *ret; /* * Handler for more complex cases @@ -2673,8 +2649,6 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { return(htmlParseNameComplex(ctxt)); } } - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); if (ctxt->input->cur - ctxt->input->base < len) { /* Sanity check */ @@ -2683,7 +2657,11 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { return (NULL); } - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len); + if (ret == NULL) + htmlErrMemory(ctxt, NULL); + + return(ret); } @@ -2724,7 +2702,8 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { /* * Ok loop until we reach one of the ending chars */ - while ((CUR != 0) && (CUR != stop)) { + while ((PARSER_STOPPED(ctxt) == 0) && + (CUR != 0) && (CUR != stop)) { if ((stop == 0) && (CUR == '>')) break; if ((stop == 0) && (IS_BLANK_CH(CUR))) break; if (CUR == '&') { @@ -2810,10 +2789,6 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { out = &buffer[indx]; } c = CUR_CHAR(l); - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buffer); - return(NULL); - } if (c < 0x80) { *out++ = c; bits= -6; } else if (c < 0x800) @@ -2966,7 +2941,8 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { return(ret); startPosition = CUR_PTR - BASE_PTR; - while ((CUR != 0) && (CUR != quote)) { + while ((PARSER_STOPPED(ctxt) == 0) && + (CUR != 0) && (CUR != quote)) { /* TODO: Handle UTF-8 */ if (!IS_CHAR_CH(CUR)) { htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, @@ -2980,8 +2956,13 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { - if (err == 0) + if (err == 0) { ret = xmlStrndup((BASE_PTR+startPosition), len); + if (ret == NULL) { + htmlErrMemory(ctxt, NULL); + return(NULL); + } + } NEXT; } @@ -3021,7 +3002,8 @@ htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { return(ret); startPosition = CUR_PTR - BASE_PTR; - while ((CUR != 0) && (CUR != quote)) { + while ((PARSER_STOPPED(ctxt) == 0) && + (CUR != 0) && (CUR != quote)) { if (!IS_PUBIDCHAR_CH(CUR)) { htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, "Invalid char in PubidLiteral 0x%X\n", CUR); @@ -3035,8 +3017,13 @@ htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { - if (err == 0) + if (err == 0) { ret = xmlStrndup((BASE_PTR + startPosition), len); + if (ret == NULL) { + htmlErrMemory(ctxt, NULL); + return(NULL); + } + } NEXT; } @@ -3125,9 +3112,6 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } - if (ctxt->instate == XML_PARSER_EOF) - return; - if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { buf[nbchar] = 0; if (ctxt->sax->cdataBlock!= NULL) { @@ -3200,8 +3184,6 @@ htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) { } cur = CUR_CHAR(l); } - if (ctxt->instate == XML_PARSER_EOF) - return; if (nbchar != 0) { buf[nbchar] = 0; @@ -3344,13 +3326,11 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { (ctxt->sax->processingInstruction != NULL)) ctxt->sax->processingInstruction(ctxt->userData, target, NULL); - ctxt->instate = state; - return; + goto done; } buf = (xmlChar *) xmlMallocAtomic(size); if (buf == NULL) { htmlErrMemory(ctxt, NULL); - ctxt->instate = state; return; } cur = CUR; @@ -3369,7 +3349,6 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { if (tmp == NULL) { htmlErrMemory(ctxt, NULL); xmlFree(buf); - ctxt->instate = state; return; } buf = tmp; @@ -3385,17 +3364,12 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED, "PI %s too long", target, NULL); xmlFree(buf); - ctxt->instate = state; - return; + goto done; } NEXTL(l); cur = CUR_CHAR(l); } buf[len] = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } if (cur != '>') { htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED, "ParsePI: PI %s never end ...\n", target, NULL); @@ -3415,6 +3389,8 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, "PI is not started correctly", NULL, NULL); } + +done: ctxt->instate = state; } } @@ -3453,7 +3429,6 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { buf = (xmlChar *) xmlMallocAtomic(size); if (buf == NULL) { htmlErrMemory(ctxt, "buffer allocation failed\n"); - ctxt->instate = state; return; } len = 0; @@ -3498,7 +3473,6 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { if (tmp == NULL) { xmlFree(buf); htmlErrMemory(ctxt, "growing buffer failed\n"); - ctxt->instate = state; return; } buf = tmp; @@ -3526,10 +3500,6 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { } finished: buf[len] = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } if (cur == '>') { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && @@ -3563,12 +3533,8 @@ int htmlParseCharRef(htmlParserCtxtPtr ctxt) { int val = 0; - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseCharRef: context error\n", - NULL, NULL); + if ((ctxt == NULL) || (ctxt->input == NULL)) return(0); - } if ((CUR == '&') && (NXT(1) == '#') && ((NXT(2) == 'x') || NXT(2) == 'X')) { SKIP(3); @@ -3681,7 +3647,7 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { "DOCTYPE improperly terminated\n", NULL, NULL); /* Ignore bogus content */ while ((CUR != 0) && (CUR != '>') && - (ctxt->instate != XML_PARSER_EOF)) + (PARSER_STOPPED(ctxt) == 0)) NEXT; } if (CUR == '>') @@ -3762,6 +3728,7 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { static void htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { const xmlChar *encoding; + xmlChar *copy; if (!attvalue) return; @@ -3777,7 +3744,10 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { encoding = xmlStrcasestr(attvalue, BAD_CAST"="); if (encoding && *encoding == '=') { encoding ++; - xmlSetDeclaredEncoding(ctxt, xmlStrdup(encoding)); + copy = xmlStrdup(encoding); + if (copy == NULL) + htmlErrMemory(ctxt, NULL); + xmlSetDeclaredEncoding(ctxt, copy); } } @@ -3802,13 +3772,21 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { att = atts[i++]; while (att != NULL) { value = atts[i++]; - if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) - && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) - http = 1; - else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) - xmlSetDeclaredEncoding(ctxt, xmlStrdup(value)); - else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) - content = value; + if (value != NULL) { + if ((!xmlStrcasecmp(att, BAD_CAST "http-equiv")) && + (!xmlStrcasecmp(value, BAD_CAST "Content-Type"))) { + http = 1; + } else if (!xmlStrcasecmp(att, BAD_CAST "charset")) { + xmlChar *copy; + + copy = xmlStrdup(value); + if (copy == NULL) + htmlErrMemory(ctxt, NULL); + xmlSetDeclaredEncoding(ctxt, copy); + } else if (!xmlStrcasecmp(att, BAD_CAST "content")) { + content = value; + } + } att = atts[i++]; } if ((http) && (content != NULL)) @@ -3848,13 +3826,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { int i; int discardtag = 0; - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseStartTag: context error\n", NULL, NULL); + if ((ctxt == NULL) || (ctxt->input == NULL)) return -1; - } - if (ctxt->instate == XML_PARSER_EOF) - return(-1); if (CUR != '<') return -1; NEXT; @@ -3869,7 +3842,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { NULL, NULL); /* Dump the bogus tag like browsers do */ while ((CUR != 0) && (CUR != '>') && - (ctxt->instate != XML_PARSER_EOF)) + (PARSER_STOPPED(ctxt) == 0)) NEXT; return -1; } @@ -3927,7 +3900,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { while ((CUR != 0) && (CUR != '>') && ((CUR != '/') || (NXT(1) != '>')) && - (ctxt->instate != XML_PARSER_EOF)) { + (PARSER_STOPPED(ctxt) == 0)) { GROW; attname = htmlParseAttribute(ctxt, &attvalue); if (attname != NULL) { @@ -3989,7 +3962,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { while ((CUR != 0) && !(IS_BLANK_CH(CUR)) && (CUR != '>') && ((CUR != '/') || (NXT(1) != '>')) && - (ctxt->instate != XML_PARSER_EOF)) + (PARSER_STOPPED(ctxt) == 0)) NEXT; } @@ -4066,7 +4039,8 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, "End tag : expected '>'\n", NULL, NULL); /* Skip to next '>' */ - while ((CUR != 0) && (CUR != '>')) + while ((PARSER_STOPPED(ctxt) == 0) && + (CUR != 0) && (CUR != '>')) NEXT; } if (CUR == '>') @@ -4230,9 +4204,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { while (1) { GROW; - if (ctxt->instate == XML_PARSER_EOF) - break; - /* * Our tag or one of it's parent or children is ending. */ @@ -4387,14 +4358,8 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { int depth; const xmlChar *oldptr; - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseElement: context error\n", NULL, NULL); + if ((ctxt == NULL) || (ctxt->input == NULL)) return; - } - - if (ctxt->instate == XML_PARSER_EOF) - return; /* Capture start position */ if (ctxt->record_info) { @@ -4534,14 +4499,8 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) { htmlParserNodeInfo node_info = { NULL, 0, 0, 0, 0 }; int failed; - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseElementInternal: context error\n", NULL, NULL); + if ((ctxt == NULL) || (ctxt->input == NULL)) return; - } - - if (ctxt->instate == XML_PARSER_EOF) - return; /* Capture start position */ if (ctxt->record_info) { @@ -4636,12 +4595,9 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { return; } } - while (1) { + while (PARSER_STOPPED(ctxt) == 0) { GROW; - if (ctxt->instate == XML_PARSER_EOF) - break; - /* * Our tag or one of it's parent or children is ending. */ @@ -4681,10 +4637,14 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { if (currentNode != NULL) xmlFree(currentNode); - currentNode = xmlStrdup(ctxt->name); - if (currentNode == NULL) { - htmlErrMemory(ctxt, NULL); - break; + if (ctxt->name == NULL) { + currentNode = NULL; + } else { + currentNode = xmlStrdup(ctxt->name); + if (currentNode == NULL) { + htmlErrMemory(ctxt, NULL); + break; + } } depth = ctxt->nameNr; continue; @@ -4708,10 +4668,14 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { htmlParserFinishElementParsing(ctxt); if (currentNode != NULL) xmlFree(currentNode); - currentNode = xmlStrdup(ctxt->name); - if (currentNode == NULL) { - htmlErrMemory(ctxt, NULL); - break; + if (ctxt->name == NULL) { + currentNode = NULL; + } else { + currentNode = xmlStrdup(ctxt->name); + if (currentNode == NULL) { + htmlErrMemory(ctxt, NULL); + break; + } } depth = ctxt->nameNr; continue; @@ -4763,10 +4727,14 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { htmlParseElementInternal(ctxt); if (currentNode != NULL) xmlFree(currentNode); - currentNode = xmlStrdup(ctxt->name); - if (currentNode == NULL) { - htmlErrMemory(ctxt, NULL); - break; + if (ctxt->name == NULL) { + currentNode = NULL; + } else { + currentNode = xmlStrdup(ctxt->name); + if (currentNode == NULL) { + htmlErrMemory(ctxt, NULL); + break; + } } depth = ctxt->nameNr; } @@ -4837,11 +4805,8 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { xmlInitParser(); - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseDocument: context error\n", NULL, NULL); - return(XML_ERR_INTERNAL_ERROR); - } + if ((ctxt == NULL) || (ctxt->input == NULL)) + return(-1); /* * Document locator is unused. Only for backward compatibility. @@ -4873,7 +4838,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); - /* * Parse possible comments and PIs before any content */ @@ -4902,9 +4866,10 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { /* * Parse possible comments and PIs before any content */ - while (((CUR == '<') && (NXT(1) == '!') && - (NXT(2) == '-') && (NXT(3) == '-')) || - ((CUR == '<') && (NXT(1) == '?'))) { + while ((PARSER_STOPPED(ctxt) == 0) && + (((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) || + ((CUR == '<') && (NXT(1) == '?')))) { htmlParseComment(ctxt); htmlParsePI(ctxt); SKIP_BLANKS; @@ -4930,11 +4895,14 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { dtd = xmlGetIntSubset(ctxt->myDoc); - if (dtd == NULL) + if (dtd == NULL) { ctxt->myDoc->intSubset = xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); + if (ctxt->myDoc->intSubset == NULL) + htmlErrMemory(ctxt, NULL); + } } if (! ctxt->wellFormed) return(-1); return(0); @@ -4966,17 +4934,13 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax, memset(ctxt, 0, sizeof(htmlParserCtxt)); ctxt->dict = xmlDictCreate(); - if (ctxt->dict == NULL) { - htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); + if (ctxt->dict == NULL) return(-1); - } if (ctxt->sax == NULL) ctxt->sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); - if (ctxt->sax == NULL) { - htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); + if (ctxt->sax == NULL) return(-1); - } if (sax == NULL) { memset(ctxt->sax, 0, sizeof(htmlSAXHandler)); xmlSAX2InitHtmlDefaultSAXHandler(ctxt->sax); @@ -4989,13 +4953,8 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax, /* Allocate the Input stack */ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr)); - if (ctxt->inputTab == NULL) { - htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); - ctxt->inputNr = 0; - ctxt->inputMax = 0; - ctxt->input = NULL; + if (ctxt->inputTab == NULL) return(-1); - } ctxt->inputNr = 0; ctxt->inputMax = 5; ctxt->input = NULL; @@ -5006,35 +4965,16 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax, /* Allocate the Node stack */ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr)); - if (ctxt->nodeTab == NULL) { - htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); - ctxt->nodeNr = 0; - ctxt->nodeMax = 0; - ctxt->node = NULL; - ctxt->inputNr = 0; - ctxt->inputMax = 0; - ctxt->input = NULL; + if (ctxt->nodeTab == NULL) return(-1); - } ctxt->nodeNr = 0; ctxt->nodeMax = 10; ctxt->node = NULL; /* Allocate the Name stack */ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); - if (ctxt->nameTab == NULL) { - htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); - ctxt->nameNr = 0; - ctxt->nameMax = 0; - ctxt->name = NULL; - ctxt->nodeNr = 0; - ctxt->nodeMax = 0; - ctxt->node = NULL; - ctxt->inputNr = 0; - ctxt->inputMax = 0; - ctxt->input = NULL; + if (ctxt->nameTab == NULL) return(-1); - } ctxt->nameNr = 0; ctxt->nameMax = 10; ctxt->name = NULL; @@ -5106,10 +5046,8 @@ htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData) xmlParserCtxtPtr ctxt; ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); - if (ctxt == NULL) { - htmlErrMemory(NULL, "NewParserCtxt: out of memory\n"); + if (ctxt == NULL) return(NULL); - } memset(ctxt, 0, sizeof(xmlParserCtxt)); if (htmlInitParserCtxt(ctxt, sax, userData) < 0) { htmlFreeParserCtxt(ctxt); @@ -5167,13 +5105,13 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) { /** * htmlCreateDocParserCtxt: * @str: a pointer to an array of xmlChar - * @encoding: a free form C string describing the HTML document encoding, or NULL + * @encoding: optional encoding * - * Create a parser context for an HTML document. + * Create a parser context for a null-terminated string. * - * TODO: check the need to add encoding handling there + * A non-NULL encoding overrides encoding declarations in the document. * - * Returns the new parser context or NULL + * Returns the new parser context or NULL if a memory allocation failed. */ static htmlParserCtxtPtr htmlCreateDocParserCtxt(const xmlChar *str, const char *encoding) { @@ -5207,35 +5145,8 @@ htmlCreateDocParserCtxt(const xmlChar *str, const char *encoding) { inputPush(ctxt, input); - if (encoding != NULL) { - xmlCharEncoding enc; - xmlCharEncodingHandlerPtr handler; - - enc = xmlParseCharEncoding(encoding); - /* - * registered set of known encodings - */ - if (enc != XML_CHAR_ENCODING_ERROR) { - xmlSwitchEncoding(ctxt, enc); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "Unsupported encoding %s\n", - (const xmlChar *) encoding, NULL); - } - } else { - /* - * fallback for unknown encodings - */ - handler = xmlFindCharEncodingHandler((const char *) encoding); - if (handler != NULL) { - xmlSwitchToEncoding(ctxt, handler); - } else { - htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "Unsupported encoding %s\n", - (const xmlChar *) encoding, NULL); - } - } - } + if (encoding != NULL) + xmlSwitchEncodingName(ctxt, encoding); return(ctxt); } @@ -5382,7 +5293,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { htmlParserNodeInfo node_info; - while (1) { + while (PARSER_STOPPED(ctxt) == 0) { in = ctxt->input; if (in == NULL) break; @@ -5444,8 +5355,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) ctxt->sax->startDocument(ctxt->userData); - if (ctxt->instate == XML_PARSER_EOF) - goto done; cur = in->cur[0]; next = in->cur[1]; @@ -5458,8 +5367,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; htmlParseDocTypeDecl(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_PROLOG; } else { ctxt->instate = XML_PARSER_MISC; @@ -5490,16 +5397,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; htmlParseComment(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParsePI(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '!') && (UPP(2) == 'D') && (UPP(3) == 'O') && @@ -5510,8 +5413,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; htmlParseDocTypeDecl(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '!') && (avail < 9)) { @@ -5532,16 +5433,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; htmlParseComment(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParsePI(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { @@ -5567,16 +5464,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; htmlParseComment(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParsePI(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { @@ -5659,8 +5552,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); htmlnamePop(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_CONTENT; break; } @@ -5683,8 +5574,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (ctxt->record_info) htmlNodeInfoPush(ctxt, &node_info); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_CONTENT; break; } @@ -5701,8 +5590,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (ctxt->record_info) htmlNodeInfoPush(ctxt, &node_info); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_CONTENT; break; } @@ -5775,8 +5662,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } } htmlParseScript(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; if ((cur == '<') && (next == '/')) { ctxt->instate = XML_PARSER_END_TAG; ctxt->checkIndex = 0; @@ -5804,8 +5689,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; htmlParseComment(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_CONTENT; } else { if ((!terminate) && @@ -5818,8 +5701,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParsePI(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '/')) { ctxt->instate = XML_PARSER_END_TAG; @@ -5848,7 +5729,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) goto done; ctxt->checkIndex = 0; - while ((ctxt->instate != XML_PARSER_EOF) && + while ((PARSER_STOPPED(ctxt) == 0) && (cur != '<') && (in->cur < in->end)) { if (cur == '&') { htmlParseReference(ctxt); @@ -5868,8 +5749,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParseEndTag(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - goto done; if (ctxt->nameNr == 0) { ctxt->instate = XML_PARSER_EPILOG; } else { @@ -5901,11 +5780,14 @@ done: (ctxt->instate == XML_PARSER_EPILOG))) { xmlDtdPtr dtd; dtd = xmlGetIntSubset(ctxt->myDoc); - if (dtd == NULL) + if (dtd == NULL) { ctxt->myDoc->intSubset = xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); + if (ctxt->myDoc->intSubset == NULL) + htmlErrMemory(ctxt, NULL); + } } return(ret); } @@ -5924,13 +5806,12 @@ done: int htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { - if ((ctxt == NULL) || (ctxt->input == NULL)) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseChunk: context error\n", NULL, NULL); - return(XML_ERR_INTERNAL_ERROR); - } + if ((ctxt == NULL) || (ctxt->input == NULL)) + return(XML_ERR_ARGUMENT); + if (PARSER_STOPPED(ctxt) != 0) + return(ctxt->errNo); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { + (ctxt->input->buf != NULL)) { size_t pos = ctxt->input->cur - ctxt->input->base; int res; @@ -5945,12 +5826,6 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, } htmlParseTryOrFinish(ctxt, terminate); if (terminate) { - if ((ctxt->instate != XML_PARSER_EOF) && - (ctxt->instate != XML_PARSER_EPILOG) && - (ctxt->instate != XML_PARSER_MISC)) { - ctxt->errNo = XML_ERR_DOCUMENT_END; - ctxt->wellFormed = 0; - } if (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); @@ -5991,7 +5866,7 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, xmlInitParser(); - buf = xmlAllocParserInputBuffer(enc); + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); if (buf == NULL) return(NULL); ctxt = htmlNewSAXParserCtxt(sax, user_data); @@ -6037,6 +5912,9 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, } ctxt->progressive = 1; + if (enc != XML_CHAR_ENCODING_NONE) + xmlSwitchEncoding(ctxt, enc); + return(ctxt); } #endif /* LIBXML_PUSH_ENABLED */ @@ -6107,13 +5985,16 @@ htmlParseDoc(const xmlChar *cur, const char *encoding) { /** * htmlCreateFileParserCtxt: * @filename: the filename - * @encoding: a free form C string describing the HTML document encoding, or NULL + * @encoding: optional encoding + * + * Create a parser context to read from a file. + * + * A non-NULL encoding overrides encoding declarations in the document. * - * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. * - * Returns the new parser context or NULL + * Returns the new parser context or NULL if a memory allocation failed. */ htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename, const char *encoding) @@ -6144,15 +6025,8 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) inputPush(ctxt, inputStream); - /* set encoding */ - if (encoding) { - xmlCharEncodingHandlerPtr hdlr; - - hdlr = xmlFindCharEncodingHandler(encoding); - if (hdlr != NULL) { - xmlSwitchToEncoding(ctxt, hdlr); - } - } + if (encoding != NULL) + xmlSwitchEncodingName(ctxt, encoding); return(ctxt); } @@ -6553,18 +6427,16 @@ htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, htmlCtxtUseOptions(ctxt, options); ctxt->html = 1; - if (encoding != NULL) { - xmlCharEncodingHandlerPtr hdlr; - - hdlr = xmlFindCharEncodingHandler(encoding); - if (hdlr != NULL) { - xmlSwitchToEncoding(ctxt, hdlr); - } - } + if (encoding != NULL) + xmlSwitchEncodingName(ctxt, encoding); if ((URL != NULL) && (ctxt->input != NULL) && (ctxt->input->filename == NULL)) ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); htmlParseDocument(ctxt); + if (ctxt->errNo == XML_ERR_NO_MEMORY) { + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } ret = ctxt->myDoc; ctxt->myDoc = NULL; if (!reuse) { @@ -6849,6 +6721,7 @@ htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); if (input == NULL) { + htmlErrMemory(ctxt, NULL); return(NULL); } diff --git a/HTMLtree.c b/HTMLtree.c index 8698f53e..0d778d93 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -384,6 +384,32 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra) * * ************************************************************************/ +static xmlCharEncodingHandler * +htmlFindOutputEncoder(const char *encoding) { + xmlCharEncodingHandler *handler = NULL; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } + } else { + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + } + + return(handler); +} + /** * htmlBufNodeDumpFormat: * @buf: the xmlBufPtr output @@ -479,33 +505,15 @@ int htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, xmlNodePtr cur, const char *encoding, int format) { xmlOutputBufferPtr buf; - xmlCharEncodingHandlerPtr handler = NULL; + xmlCharEncodingHandlerPtr handler; int ret; xmlInitParser(); - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } - } else { - /* - * Fallback to HTML or ASCII when the encoding is unspecified - */ - if (handler == NULL) - handler = xmlFindCharEncodingHandler("HTML"); - if (handler == NULL) - handler = xmlFindCharEncodingHandler("ascii"); - } - /* * save the content to a temp buffer. */ + handler = htmlFindOutputEncoder(encoding); buf = xmlOutputBufferCreateFile(out, handler); if (buf == NULL) return(0); @@ -556,27 +564,7 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { } encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - - } - } else { - /* - * Fallback to HTML or ASCII when the encoding is unspecified - */ - if (handler == NULL) - handler = xmlFindCharEncodingHandler("HTML"); - if (handler == NULL) - handler = xmlFindCharEncodingHandler("ascii"); - } - + handler = htmlFindOutputEncoder(encoding); buf = xmlAllocOutputBufferInternal(handler); if (buf == NULL) { *mem = NULL; @@ -712,14 +700,14 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { xmlBufWriteQuotedString(buf->buffer, escaped); xmlFree(escaped); } else { - xmlBufWriteQuotedString(buf->buffer, value); + buf->error = XML_ERR_NO_MEMORY; } } else { xmlBufWriteQuotedString(buf->buffer, value); } xmlFree(value); } else { - xmlOutputBufferWriteString(buf, "=\"\""); + buf->error = XML_ERR_NO_MEMORY; } } } @@ -860,10 +848,12 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlChar *buffer; buffer = xmlEncodeEntitiesReentrant(doc, cur->content); - if (buffer != NULL) { - xmlOutputBufferWriteString(buf, (const char *)buffer); - xmlFree(buffer); + if (buffer == NULL) { + buf->error = XML_ERR_NO_MEMORY; + return; } + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); } else { xmlOutputBufferWriteString(buf, (const char *)cur->content); } @@ -1039,26 +1029,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { } encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } - } else { - /* - * Fallback to HTML or ASCII when the encoding is unspecified - */ - if (handler == NULL) - handler = xmlFindCharEncodingHandler("HTML"); - if (handler == NULL) - handler = xmlFindCharEncodingHandler("ascii"); - } - + handler = htmlFindOutputEncoder(encoding); buf = xmlOutputBufferCreateFile(f, handler); if (buf == NULL) return(-1); htmlDocContentDumpOutput(buf, cur, NULL); @@ -1089,29 +1060,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { xmlInitParser(); encoding = (const char *) htmlGetMetaEncoding(cur); - - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } - } else { - /* - * Fallback to HTML or ASCII when the encoding is unspecified - */ - if (handler == NULL) - handler = xmlFindCharEncodingHandler("HTML"); - if (handler == NULL) - handler = xmlFindCharEncodingHandler("ascii"); - } - - /* - * save the content to a temp buffer. - */ + handler = htmlFindOutputEncoder(encoding); buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); if (buf == NULL) return(0); @@ -1144,28 +1093,12 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, xmlInitParser(); - if (encoding != NULL) { - xmlCharEncoding enc; - - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } - htmlSetMetaEncoding(cur, (const xmlChar *) encoding); - } else { + handler = htmlFindOutputEncoder(encoding); + if (handler != NULL) + htmlSetMetaEncoding(cur, (const xmlChar *) handler->name); + else htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); - /* - * Fallback to HTML or ASCII when the encoding is unspecified - */ - if (handler == NULL) - handler = xmlFindCharEncodingHandler("HTML"); - if (handler == NULL) - handler = xmlFindCharEncodingHandler("ascii"); - } - /* * save the content to a temp buffer. */ diff --git a/error.c b/error.c index 9480d63b..cf03bac2 100644 --- a/error.c +++ b/error.c @@ -25,8 +25,6 @@ #endif #endif -#define XML_MAX_ERRORS 100 - #define XML_GET_VAR_STR(msg, str) \ do { \ va_list ap; \ @@ -500,8 +498,6 @@ xmlVRaiseError(xmlStructuredErrorFunc schannel, xmlParserCtxtPtr ctxt = NULL; xmlNodePtr node = (xmlNodePtr) nod; char *str = NULL; - xmlParserInputPtr input = NULL; - /* xmlLastError is a macro retrieving the per-thread global. */ xmlErrorPtr lastError = &xmlLastError; xmlErrorPtr to = lastError; @@ -515,25 +511,6 @@ xmlVRaiseError(xmlStructuredErrorFunc schannel, (domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) || (domain == XML_FROM_IO) || (domain == XML_FROM_VALID)) { ctxt = (xmlParserCtxtPtr) ctx; - - if (ctxt != NULL) { - if (level == XML_ERR_WARNING) { - if (ctxt->nbWarnings >= XML_MAX_ERRORS) - return(0); - ctxt->nbWarnings += 1; - } else { - if (ctxt->nbErrors >= XML_MAX_ERRORS) - return(0); - ctxt->nbErrors += 1; - } - - if ((schannel == NULL) && (ctxt->sax != NULL) && - (ctxt->sax->initialized == XML_SAX2_MAGIC) && - (ctxt->sax->serror != NULL)) { - schannel = ctxt->sax->serror; - data = ctxt->userData; - } - } } /* * Check if structured error handler set @@ -560,18 +537,10 @@ xmlVRaiseError(xmlStructuredErrorFunc schannel, /* * specific processing if a parser context is provided */ - if ((ctxt != NULL) && (ctxt->input != NULL)) { - if (file == NULL) { - input = ctxt->input; - if ((input->filename == NULL) && (ctxt->inputNr > 1)) { - input = ctxt->inputTab[ctxt->inputNr - 2]; - } - file = input->filename; - line = input->line; - col = input->col; - } + if (ctxt != NULL) to = &ctxt->lastError; - } else if ((node != NULL) && (file == NULL)) { + + if ((node != NULL) && (file == NULL)) { int i; if ((node->doc != NULL) && (node->doc->URL != NULL)) { @@ -678,14 +647,7 @@ xmlVRaiseError(xmlStructuredErrorFunc schannel, /* * Find the callback channel if channel param is NULL */ - if ((ctxt != NULL) && (channel == NULL) && - (xmlStructuredError == NULL) && (ctxt->sax != NULL)) { - if (level == XML_ERR_WARNING) - channel = ctxt->sax->warning; - else - channel = ctxt->sax->error; - data = ctxt->userData; - } else if (channel == NULL) { + if ((ctxt == NULL) && (channel == NULL)) { channel = xmlGenericError; data = xmlGenericErrorContext; } diff --git a/fuzz/html.c b/fuzz/html.c index aa8d9d9b..a2fd1413 100644 --- a/fuzz/html.c +++ b/fuzz/html.c @@ -24,6 +24,7 @@ LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, int LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlParserCtxtPtr ctxt; htmlDocPtr doc; const char *docBuffer; size_t maxAlloc, docSize; @@ -31,7 +32,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { xmlFuzzDataInit(data, size); opts = (int) xmlFuzzReadInt(4); - maxAlloc = xmlFuzzReadInt(4) % (size + 1); + maxAlloc = xmlFuzzReadInt(4) % (size + 100); docBuffer = xmlFuzzReadRemaining(&docSize); if (docBuffer == NULL) { @@ -42,31 +43,50 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { /* Pull parser */ xmlFuzzMemSetLimit(maxAlloc); - doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts); + ctxt = htmlNewParserCtxt(); + if (ctxt != NULL) { + doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts); + xmlFuzzCheckMallocFailure("htmlCtxtReadMemory", + ctxt->errNo == XML_ERR_NO_MEMORY); + + if (doc != NULL) { + xmlDocPtr copy; #ifdef LIBXML_OUTPUT_ENABLED - { - xmlOutputBufferPtr out; + xmlOutputBufferPtr out; + const xmlChar *content; - /* - * Also test the serializer. Call htmlDocContentDumpOutput with our - * own buffer to avoid encoding the output. The HTML encoding is - * excruciatingly slow (see htmlEntityValueLookup). - */ - out = xmlAllocOutputBuffer(NULL); - htmlDocContentDumpOutput(out, doc, NULL); - xmlOutputBufferClose(out); - } + /* + * Also test the serializer. Call htmlDocContentDumpOutput with our + * own buffer to avoid encoding the output. The HTML encoding is + * excruciatingly slow (see htmlEntityValueLookup). + */ + xmlFuzzResetMallocFailed(); + out = xmlAllocOutputBuffer(NULL); + htmlDocContentDumpOutput(out, doc, NULL); + content = xmlOutputBufferGetContent(out); + xmlFuzzCheckMallocFailure("htmlDocContentDumpOutput", + content == NULL); + xmlOutputBufferClose(out); #endif - xmlFreeDoc(doc); + xmlFuzzResetMallocFailed(); + copy = xmlCopyDoc(doc, 1); + xmlFuzzCheckMallocFailure("xmlCopyNode", copy == NULL); + xmlFreeDoc(copy); + + xmlFreeDoc(doc); + } + + htmlFreeParserCtxt(ctxt); + } + /* Push parser */ #ifdef LIBXML_PUSH_ENABLED { static const size_t maxChunkSize = 128; - xmlParserCtxtPtr ctxt; size_t consumed, chunkSize; xmlFuzzMemSetLimit(maxAlloc); @@ -84,6 +104,8 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { } htmlParseChunk(ctxt, NULL, 0, 1); + xmlFuzzCheckMallocFailure("htmlParseChunk", + ctxt->errNo == XML_ERR_NO_MEMORY); xmlFreeDoc(ctxt->myDoc); htmlFreeParserCtxt(ctxt); }