From ae0765b6819c77aae2638b40735f354b05b9c150 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Thu, 31 Jul 2008 19:54:59 +0000 Subject: [PATCH] more progresses against the official regression tests small cleanup for * runxmlconf.c: more progresses against the official regression tests * runsuite.c: small cleanup for non-leak reports * include/libxml/tree.h: parsing flags and other properties are now added to the document node, this is generally useful and allow to make Name and NmToken validations based on the parser flags, more specifically the 5th edition of XML or not * HTMLparser.c tree.c: small side effects for the previous changes * parser.c SAX2.c valid.c: the bulk of teh changes are here, the parser and validation behaviour can be affected, parsing flags need to be copied, lot of changes. Also fixing various validation problems in the regression tests. Daniel svn path=/trunk/; revision=3762 --- ChangeLog | 14 ++ HTMLparser.c | 1 + SAX2.c | 6 + include/libxml/tree.h | 21 +++ parser.c | 109 ++++++++++--- runsuite.c | 2 +- runxmlconf.c | 141 +++++++++++------ tree.c | 4 +- valid.c | 361 +++++++++++++++++++++++++++--------------- 9 files changed, 463 insertions(+), 196 deletions(-) diff --git a/ChangeLog b/ChangeLog index 019730df..12f54601 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Thu Jul 31 21:49:45 CEST 2008 Daniel Veillard + + * runxmlconf.c: more progresses against the official regression tests + * runsuite.c: small cleanup for non-leak reports + * include/libxml/tree.h: parsing flags and other properties are + now added to the document node, this is generally useful and + allow to make Name and NmToken validations based on the parser + flags, more specifically the 5th edition of XML or not + * HTMLparser.c tree.c: small side effects for the previous changes + * parser.c SAX2.c valid.c: the bulk of teh changes are here, + the parser and validation behaviour can be affected, parsing + flags need to be copied, lot of changes. Also fixing various + validation problems in the regression tests. + Thu Jul 31 10:15:53 CEST 2008 Daniel Veillard * runxmlconf.c: added a skipped list, insert rmt-ns10-035 diff --git a/HTMLparser.c b/HTMLparser.c index 00cd6ad8..a16faf63 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2143,6 +2143,7 @@ htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { cur->refs = NULL; cur->_private = NULL; cur->charset = XML_CHAR_ENCODING_UTF8; + cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; if ((ExternalID != NULL) || (URI != NULL)) xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); diff --git a/SAX2.c b/SAX2.c index 080ec1bc..a8139602 100644 --- a/SAX2.c +++ b/SAX2.c @@ -957,6 +957,8 @@ xmlSAX2StartDocument(void *ctx) #ifdef LIBXML_HTML_ENABLED if (ctxt->myDoc == NULL) ctxt->myDoc = htmlNewDocNoDtD(NULL, NULL); + ctxt->myDoc->properties = XML_DOC_HTML; + ctxt->myDoc->parseFlags = ctxt->options; if (ctxt->myDoc == NULL) { xmlSAX2ErrMemory(ctxt, "xmlSAX2StartDocument"); return; @@ -972,6 +974,10 @@ xmlSAX2StartDocument(void *ctx) } else { doc = ctxt->myDoc = xmlNewDoc(ctxt->version); if (doc != NULL) { + doc->properties = 0; + if (ctxt->options & XML_PARSE_OLD10) + doc->properties |= XML_DOC_OLD10; + doc->parseFlags = ctxt->options; if (ctxt->encoding != NULL) doc->encoding = xmlStrdup(ctxt->encoding); else diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 0e1d47c7..5afac525 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -482,6 +482,23 @@ struct _xmlNode { #define XML_GET_LINE(n) \ (xmlGetLineNo(n)) +/** + * xmlDocProperty + * + * Set of properties of the document as found by the parser + * Some of them are linked to similary named xmlParserOption + */ +typedef enum { + XML_DOC_WELLFORMED = 1<<0, /* document is XML well formed */ + XML_DOC_NSVALID = 1<<1, /* document is Namespace valid */ + XML_DOC_OLD10 = 1<<2, /* parsed with old XML-1.0 parser */ + XML_DOC_DTDVALID = 1<<3, /* DTD validation was successful */ + XML_DOC_XINCLUDE = 1<<4, /* XInclude substitution was done */ + XML_DOC_USERBUILT = 1<<5, /* Document was built using the API + and not by parsing an instance */ + XML_DOC_INTERNAL = 1<<6, /* built for internal processing */ + XML_DOC_HTML = 1<<7 /* parsed or built HTML document */ +} xmlDocProperties; /** * xmlDoc: @@ -521,6 +538,10 @@ struct _xmlDoc { actually an xmlCharEncoding */ struct _xmlDict *dict; /* dict used to allocate names or NULL */ void *psvi; /* for type/PSVI informations */ + int parseFlags; /* set of xmlParserOption used to parse the + document */ + int properties; /* set of xmlDocProperties for this document + set at the end of parsing */ }; diff --git a/parser.c b/parser.c index a0194dc6..325b574e 100644 --- a/parser.c +++ b/parser.c @@ -448,7 +448,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, */ static void xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1) + const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; @@ -464,8 +464,8 @@ xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, ctxt->vctxt.error, ctxt->vctxt.userData, ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR, NULL, 0, (const char *) str1, - NULL, NULL, 0, 0, - msg, (const char *) str1); + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); if (ctxt != NULL) { ctxt->valid = 0; } @@ -889,7 +889,7 @@ typedef xmlDefAttrs *xmlDefAttrsPtr; struct _xmlDefAttrs { int nbAttrs; /* number of defaulted attributes on that element */ int maxAttrs; /* the size of the array */ - const xmlChar *values[4]; /* array of localname/prefix/values */ + const xmlChar *values[5]; /* array of localname/prefix/values/external */ }; /** @@ -943,7 +943,7 @@ xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) * is needed. */ static const xmlChar * -xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) +xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) { int i; int remove_head = 0; @@ -984,7 +984,8 @@ xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) return(ret); } else if (remove_head) { *len -= remove_head; - return(src + remove_head); + memmove(src, src + remove_head, 1 + *len); + return(src); } return(NULL); } @@ -1041,7 +1042,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); if (defaults == NULL) { defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + - (4 * 4) * sizeof(const xmlChar *)); + (4 * 5) * sizeof(const xmlChar *)); if (defaults == NULL) goto mem_error; defaults->nbAttrs = 0; @@ -1055,7 +1056,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, xmlDefAttrsPtr temp; temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + - (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); + (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); if (temp == NULL) goto mem_error; defaults = temp; @@ -1080,13 +1081,17 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, prefix = xmlDictLookup(ctxt->dict, fullattr, len); } - defaults->values[4 * defaults->nbAttrs] = name; - defaults->values[4 * defaults->nbAttrs + 1] = prefix; + defaults->values[5 * defaults->nbAttrs] = name; + defaults->values[5 * defaults->nbAttrs + 1] = prefix; /* intern the string and precompute the end */ len = xmlStrlen(value); value = xmlDictLookup(ctxt->dict, value, len); - defaults->values[4 * defaults->nbAttrs + 2] = value; - defaults->values[4 * defaults->nbAttrs + 3] = value + len; + defaults->values[5 * defaults->nbAttrs + 2] = value; + defaults->values[5 * defaults->nbAttrs + 3] = value + len; + if (ctxt->external) + defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; + else + defaults->values[5 * defaults->nbAttrs + 4] = NULL; defaults->nbAttrs++; return; @@ -2264,7 +2269,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", - name); + name, NULL); } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", @@ -4947,6 +4952,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, "New Doc failed"); return; } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, @@ -5019,6 +5025,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, "New Doc failed"); return; } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) @@ -5519,7 +5526,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); @@ -5577,7 +5584,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } SKIP(2); } else { @@ -5809,7 +5816,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; if (RAW == '?') { @@ -6362,6 +6369,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, } if (ctxt->myDoc == NULL) { ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return; + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); @@ -7994,7 +8006,13 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s:'\n", p, NULL, NULL); - tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + l = xmlParseNmtoken(ctxt); + if (l == NULL) + tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + else { + tmp = xmlBuildQName(l, p, NULL, 0); + xmlFree((char *)l); + } p = xmlDictLookup(ctxt->dict, tmp, -1); if (tmp != NULL) xmlFree(tmp); *prefix = NULL; @@ -8302,7 +8320,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, const xmlChar *val2; val2 = xmlAttrNormalizeSpace2(ctxt, val, len); - if (val2 != NULL) { + if ((val2 != NULL) && (val2 != val)) { xmlFree(val); val = (xmlChar *) val2; } @@ -8642,8 +8660,8 @@ failed: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); if (defaults != NULL) { for (i = 0;i < defaults->nbAttrs;i++) { - attname = defaults->values[4 * i]; - aprefix = defaults->values[4 * i + 1]; + attname = defaults->values[5 * i]; + aprefix = defaults->values[5 * i + 1]; /* * special work for namespaces defaulted defs @@ -8658,9 +8676,9 @@ failed: if (j <= nbNs) continue; nsname = xmlGetNamespace(ctxt, NULL); - if (nsname != defaults->values[4 * i + 2]) { + if (nsname != defaults->values[5 * i + 2]) { if (nsPush(ctxt, NULL, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else if (aprefix == ctxt->str_xmlns) { @@ -8675,7 +8693,7 @@ failed: nsname = xmlGetNamespace(ctxt, attname); if (nsname != defaults->values[2]) { if (nsPush(ctxt, attname, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else { @@ -8701,8 +8719,14 @@ failed: atts[nbatts++] = NULL; else atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); - atts[nbatts++] = defaults->values[4 * i + 2]; - atts[nbatts++] = defaults->values[4 * i + 3]; + atts[nbatts++] = defaults->values[5 * i + 2]; + atts[nbatts++] = defaults->values[5 * i + 3]; + if ((ctxt->standalone == 1) && + (defaults->values[5 * i + 4] != NULL)) { + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + "standalone: attribute %s on %s defaulted from external subset\n", + attname, localname); + } nbdef++; } } @@ -9876,6 +9900,15 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { ctxt->myDoc = NULL; } + if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { + ctxt->myDoc->properties |= XML_DOC_WELLFORMED; + if (ctxt->valid) + ctxt->myDoc->properties |= XML_DOC_DTDVALID; + if (ctxt->nsWellFormed) + ctxt->myDoc->properties |= XML_DOC_NSVALID; + if (ctxt->options & XML_PARSE_OLD10) + ctxt->myDoc->properties |= XML_DOC_OLD10; + } if (! ctxt->wellFormed) { ctxt->valid = 0; return(-1); @@ -11537,6 +11570,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", BAD_CAST "none", BAD_CAST "none"); @@ -11670,6 +11708,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", ExternalID, SystemID); xmlParseExternalSubset(ctxt, ExternalID, SystemID); @@ -11795,6 +11840,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if (ctx->myDoc->dict) { newDoc->dict = ctx->myDoc->dict; xmlDictReference(newDoc->dict); @@ -12015,6 +12061,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->intSubset = doc->intSubset; newDoc->extSubset = doc->extSubset; newDoc->dict = doc->dict; @@ -12246,6 +12293,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->dict = ctxt->dict; xmlDictReference(newDoc->dict); ctxt->myDoc = newDoc; @@ -12610,6 +12658,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if ((doc != NULL) && (doc->dict != NULL)) { xmlDictFree(ctxt->dict); ctxt->dict = doc->dict; @@ -13800,32 +13849,38 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi if (options & XML_PARSE_RECOVER) { ctxt->recovery = 1; options -= XML_PARSE_RECOVER; + ctxt->options |= XML_PARSE_RECOVER; } else ctxt->recovery = 0; if (options & XML_PARSE_DTDLOAD) { ctxt->loadsubset = XML_DETECT_IDS; options -= XML_PARSE_DTDLOAD; + ctxt->options |= XML_PARSE_DTDLOAD; } else ctxt->loadsubset = 0; if (options & XML_PARSE_DTDATTR) { ctxt->loadsubset |= XML_COMPLETE_ATTRS; options -= XML_PARSE_DTDATTR; + ctxt->options |= XML_PARSE_DTDATTR; } if (options & XML_PARSE_NOENT) { ctxt->replaceEntities = 1; /* ctxt->loadsubset |= XML_DETECT_IDS; */ options -= XML_PARSE_NOENT; + ctxt->options |= XML_PARSE_NOENT; } else ctxt->replaceEntities = 0; if (options & XML_PARSE_PEDANTIC) { ctxt->pedantic = 1; options -= XML_PARSE_PEDANTIC; + ctxt->options |= XML_PARSE_PEDANTIC; } else ctxt->pedantic = 0; if (options & XML_PARSE_NOBLANKS) { ctxt->keepBlanks = 0; ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; options -= XML_PARSE_NOBLANKS; + ctxt->options |= XML_PARSE_NOBLANKS; } else ctxt->keepBlanks = 1; if (options & XML_PARSE_DTDVALID) { @@ -13835,6 +13890,7 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi if (options & XML_PARSE_NOERROR) ctxt->vctxt.error = NULL; options -= XML_PARSE_DTDVALID; + ctxt->options |= XML_PARSE_DTDVALID; } else ctxt->validate = 0; if (options & XML_PARSE_NOWARNING) { @@ -13854,17 +13910,20 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi ctxt->sax->endElementNs = NULL; ctxt->sax->initialized = 1; options -= XML_PARSE_SAX1; + ctxt->options |= XML_PARSE_SAX1; } #endif /* LIBXML_SAX1_ENABLED */ if (options & XML_PARSE_NODICT) { ctxt->dictNames = 0; options -= XML_PARSE_NODICT; + ctxt->options |= XML_PARSE_NODICT; } else { ctxt->dictNames = 1; } if (options & XML_PARSE_NOCDATA) { ctxt->sax->cdataBlock = NULL; options -= XML_PARSE_NOCDATA; + ctxt->options |= XML_PARSE_NOCDATA; } if (options & XML_PARSE_NSCLEAN) { ctxt->options |= XML_PARSE_NSCLEAN; diff --git a/runsuite.c b/runsuite.c index 744875c7..af8ee5e9 100644 --- a/runsuite.c +++ b/runsuite.c @@ -358,7 +358,7 @@ done: if (rng != NULL) xmlRelaxNGFree(rng); xmlResetLastError(); - if ((memt != xmlMemUsed()) && (extraMemoryFromResolver == 0)) { + if ((memt < xmlMemUsed()) && (extraMemoryFromResolver == 0)) { test_log("Validation of tests starting line %ld leaked %d\n", xmlGetLineNo(cur), xmlMemUsed() - memt); nb_leaks++; diff --git a/runxmlconf.c b/runxmlconf.c index f5dbe441..2a1aa9dd 100644 --- a/runxmlconf.c +++ b/runxmlconf.c @@ -92,49 +92,13 @@ static int nb_skipped = 0; static int nb_tests = 0; static int nb_errors = 0; static int nb_leaks = 0; -static int extraMemoryFromResolver = 0; - -static int -fatalError(void) { - fprintf(stderr, "Exitting tests on fatal error\n"); - exit(1); -} - -/* - * that's needed to implement - */ -#define MAX_ENTITIES 20 -static char *testEntitiesName[MAX_ENTITIES]; -static char *testEntitiesValue[MAX_ENTITIES]; -static int nb_entities = 0; -static void resetEntities(void) { - int i; - - for (i = 0;i < nb_entities;i++) { - if (testEntitiesName[i] != NULL) - xmlFree(testEntitiesName[i]); - if (testEntitiesValue[i] != NULL) - xmlFree(testEntitiesValue[i]); - } - nb_entities = 0; -} -static int addEntity(char *name, char *content) { - if (nb_entities >= MAX_ENTITIES) { - fprintf(stderr, "Too many entities defined\n"); - return(-1); - } - testEntitiesName[nb_entities] = name; - testEntitiesValue[nb_entities] = content; - nb_entities++; - return(0); -} /* * We need to trap calls to the resolver to not account memory for the catalog * and not rely on any external resources. */ static xmlParserInputPtr -testExternalEntityLoader(const char *URL, const char *ID, +testExternalEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, xmlParserCtxtPtr ctxt) { xmlParserInputPtr ret; @@ -149,6 +113,8 @@ testExternalEntityLoader(const char *URL, const char *ID, */ static char testErrors[32769]; static int testErrorsSize = 0; +static int nbError = 0; +static int nbFatal = 0; static void test_log(const char *msg, ...) { va_list args; @@ -168,17 +134,19 @@ static void test_log(const char *msg, ...) { } static void -testErrorHandler(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { - va_list args; +testErrorHandler(void *userData ATTRIBUTE_UNUSED, xmlErrorPtr error) { int res; if (testErrorsSize >= 32768) return; - va_start(args, msg); - res = vsnprintf(&testErrors[testErrorsSize], + res = snprintf(&testErrors[testErrorsSize], 32768 - testErrorsSize, - msg, args); - va_end(args); + "%s:%d: %s\n", (error->file ? error->file : "entity"), + error->line, error->message); + if (error->level == XML_ERR_FATAL) + nbFatal++; + else if (error->level == XML_ERR_ERROR) + nbError++; if (testErrorsSize + res >= 32768) { /* buffer is full */ testErrorsSize = 32768; @@ -210,7 +178,7 @@ initializeLibxml2(void) { */ if (ctxtXPath->cache != NULL) xmlXPathContextSetCache(ctxtXPath, 0, -1, 0); - xmlSetGenericErrorFunc(NULL, testErrorHandler); + xmlSetStructuredErrorFunc(NULL, testErrorHandler); } /************************************************************************ @@ -219,6 +187,68 @@ initializeLibxml2(void) { * * ************************************************************************/ +static int +xmlconfTestInvalid(const char *id, const char *filename, int options) { + xmlDocPtr doc; + xmlParserCtxtPtr ctxt; + int ret = 1; + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + test_log("test %s : %s out of memory\n", + id, filename); + return(0); + } + doc = xmlCtxtReadFile(ctxt, filename, NULL, options); + if (doc == NULL) { + test_log("test %s : %s invalid document turned not well-formed too\n", + id, filename); + } else { + /* invalidity should be reported both in the context and in the document */ + if ((ctxt->valid != 0) || (doc->properties & XML_DOC_DTDVALID)) { + test_log("test %s : %s failed to detect invalid document\n", + id, filename); + nb_errors++; + ret = 0; + } + xmlFreeDoc(doc); + } + xmlFreeParserCtxt(ctxt); + return(ret); +} + +static int +xmlconfTestValid(const char *id, const char *filename, int options) { + xmlDocPtr doc; + xmlParserCtxtPtr ctxt; + int ret = 1; + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + test_log("test %s : %s out of memory\n", + id, filename); + return(0); + } + doc = xmlCtxtReadFile(ctxt, filename, NULL, options); + if (doc == NULL) { + test_log("test %s : %s failed to parse a valid document\n", + id, filename); + nb_errors++; + ret = 0; + } else { + /* validity should be reported both in the context and in the document */ + if ((ctxt->valid == 0) || ((doc->properties & XML_DOC_DTDVALID) == 0)) { + test_log("test %s : %s failed to validate a valid document\n", + id, filename); + nb_errors++; + ret = 0; + } + xmlFreeDoc(doc); + } + xmlFreeParserCtxt(ctxt); + return(ret); +} + static int xmlconfTestNotNSWF(const char *id, const char *filename, int options) { xmlDocPtr doc; @@ -272,6 +302,7 @@ xmlconfTestItem(xmlDocPtr doc, xmlNodePtr cur) { xmlChar *base = NULL; xmlChar *id = NULL; xmlChar *rec = NULL; + xmlChar *version = NULL; xmlChar *entities = NULL; xmlChar *edition = NULL; int options = 0; @@ -280,6 +311,8 @@ xmlconfTestItem(xmlDocPtr doc, xmlNodePtr cur) { int i; testErrorsSize = 0; testErrors[0] = 0; + nbError = 0; + nbFatal = 0; id = xmlGetProp(cur, BAD_CAST "ID"); if (id == NULL) { test_log("test missing ID, line %ld\n", xmlGetLineNo(cur)); @@ -311,6 +344,8 @@ xmlconfTestItem(xmlDocPtr doc, xmlNodePtr cur) { goto error; } + version = xmlGetProp(cur, BAD_CAST "VERSION"); + entities = xmlGetProp(cur, BAD_CAST "ENTITIES"); if (!xmlStrEqual(entities, BAD_CAST "none")) { options |= XML_PARSE_DTDLOAD; @@ -322,13 +357,19 @@ xmlconfTestItem(xmlDocPtr doc, xmlNodePtr cur) { (xmlStrEqual(rec, BAD_CAST "XML1.0-errata2e")) || (xmlStrEqual(rec, BAD_CAST "XML1.0-errata3e")) || (xmlStrEqual(rec, BAD_CAST "XML1.0-errata4e"))) { + if ((version != NULL) && (!xmlStrEqual(version, BAD_CAST "1.0"))) { + test_log("Skipping test %s for %s\n", (char *) id, + (char *) version); + ret = 0; + nb_skipped++; + goto error; + } ret = 1; } else if ((xmlStrEqual(rec, BAD_CAST "NS1.0")) || (xmlStrEqual(rec, BAD_CAST "NS1.0-errata1e"))) { ret = 1; nstest = 1; } else { - testErrorsSize = 0; testErrors[0] = 0; test_log("Skipping test %s for REC %s\n", (char *) id, (char *) rec); ret = 0; nb_skipped++; @@ -353,8 +394,16 @@ xmlconfTestItem(xmlDocPtr doc, xmlNodePtr cur) { else xmlconfTestNotNSWF((char *) id, (char *) filename, options); } else if (xmlStrEqual(type, BAD_CAST "valid")) { + options |= XML_PARSE_DTDVALID; + xmlconfTestValid((char *) id, (char *) filename, options); } else if (xmlStrEqual(type, BAD_CAST "invalid")) { + options |= XML_PARSE_DTDVALID; + xmlconfTestInvalid((char *) id, (char *) filename, options); } else if (xmlStrEqual(type, BAD_CAST "error")) { + test_log("Skipping error test %s \n", (char *) id); + ret = 0; + nb_skipped++; + goto error; } else { test_log("test %s unknown TYPE value %s\n", (char *) id, (char *)type); ret = -1; @@ -381,6 +430,8 @@ error: xmlFree(entities); if (edition != NULL) xmlFree(edition); + if (version != NULL) + xmlFree(version); if (filename != NULL) xmlFree(filename); if (uri != NULL) diff --git a/tree.c b/tree.c index 763381b8..432e5b21 100644 --- a/tree.c +++ b/tree.c @@ -1148,11 +1148,13 @@ xmlNewDoc(const xmlChar *version) { if (cur->version == NULL) { xmlTreeErrMemory("building doc"); xmlFree(cur); - return(NULL); + return(NULL); } cur->standalone = -1; cur->compression = -1; /* not initialized */ cur->doc = cur; + cur->parseFlags = 0; + cur->properties = XML_DOC_USERBUILT; /* * The in memory encoding is always UTF8 * This field will never change and would diff --git a/valid.c b/valid.c index 2510b8d6..594d3839 100644 --- a/valid.c +++ b/valid.c @@ -36,6 +36,11 @@ static xmlElementPtr xmlGetDtdElementDesc2(xmlDtdPtr dtd, const xmlChar *name, "Unimplemented block at %s:%d\n", \ __FILE__, __LINE__); +#ifdef LIBXML_VALID_ENABLED +static int +xmlValidateAttributeValueInternal(xmlDocPtr doc, xmlAttributeType type, + const xmlChar *value); +#endif /************************************************************************ * * * Error handling routines * @@ -2024,7 +2029,7 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, return(NULL); } if ((defaultValue != NULL) && - (!xmlValidateAttributeValue(type, defaultValue))) { + (!xmlValidateAttributeValueInternal(dtd->doc, type, defaultValue))) { xmlErrValidNode(ctxt, (xmlNodePtr) dtd, XML_DTD_ATTRIBUTE_DEFAULT, "Attribute %s of %s: invalid default value\n", elem, name, defaultValue); @@ -2042,8 +2047,10 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, (dtd->doc->intSubset != NULL) && (dtd->doc->intSubset->attributes != NULL)) { ret = xmlHashLookup3(dtd->doc->intSubset->attributes, name, ns, elem); - if (ret != NULL) + if (ret != NULL) { + xmlFreeEnumeration(tree); return(NULL); + } } /* @@ -2057,6 +2064,7 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, if (table == NULL) { xmlVErrMemory(ctxt, "xmlAddAttributeDecl: Table creation failed!\n"); + xmlFreeEnumeration(tree); return(NULL); } @@ -2064,6 +2072,7 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, ret = (xmlAttributePtr) xmlMalloc(sizeof(xmlAttribute)); if (ret == NULL) { xmlVErrMemory(ctxt, "malloc failed"); + xmlFreeEnumeration(tree); return(NULL); } memset(ret, 0, sizeof(xmlAttribute)); @@ -3445,6 +3454,109 @@ xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) { } #ifdef LIBXML_VALID_ENABLED + +static int +xmlIsDocNameStartChar(xmlDocPtr doc, int c) { + if ((doc == NULL) || (doc->properties & XML_DOC_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))) + return(1); + } else { + if (IS_LETTER(c) || (c == '_') || (c == ':')) + return(1); + } + return(0); +} + +static int +xmlIsDocNameChar(xmlDocPtr doc, int c) { + if ((doc == NULL) || (doc->properties & XML_DOC_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))) + return(1); + } else { + if ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) + return(1); + } + return(0); +} + +/** + * xmlValidateNameValue: + * @doc: pointer to the document or NULL + * @value: an Name value + * + * Validate that the given value match Name production + * + * returns 1 if valid or 0 otherwise + */ + +static int +xmlValidateNameValueInternal(xmlDocPtr doc, const xmlChar *value) { + const xmlChar *cur; + int val, len; + + if (value == NULL) return(0); + cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + if (!xmlIsDocNameStartChar(doc, val)) + return(0); + + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + while (xmlIsDocNameChar(doc, val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (val != 0) return(0); + + return(1); +} + /** * xmlValidateNameValue: * @value: an Name value @@ -3456,6 +3568,21 @@ xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) { int xmlValidateNameValue(const xmlChar *value) { + return(xmlValidateNameValueInternal(NULL, value)); +} + +/** + * xmlValidateNamesValueInternal: + * @doc: pointer to the document or NULL + * @value: an Names value + * + * Validate that the given value match Names production + * + * returns 1 if valid or 0 otherwise + */ + +static int +xmlValidateNamesValueInternal(xmlDocPtr doc, const xmlChar *value) { const xmlChar *cur; int val, len; @@ -3463,22 +3590,36 @@ xmlValidateNameValue(const xmlChar *value) { cur = value; val = xmlStringCurrentChar(NULL, cur, &len); cur += len; - if (!IS_LETTER(val) && (val != '_') && - (val != ':')) { + + if (!xmlIsDocNameStartChar(doc, val)) return(0); - } val = xmlStringCurrentChar(NULL, cur, &len); cur += len; - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { + while (xmlIsDocNameChar(doc, val)) { val = xmlStringCurrentChar(NULL, cur, &len); cur += len; } + /* Should not test IS_BLANK(val) here -- see erratum E20*/ + while (val == 0x20) { + while (val == 0x20) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (!xmlIsDocNameStartChar(doc, val)) + return(0); + + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + while (xmlIsDocNameChar(doc, val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + if (val != 0) return(0); return(1); @@ -3495,6 +3636,23 @@ xmlValidateNameValue(const xmlChar *value) { int xmlValidateNamesValue(const xmlChar *value) { + return(xmlValidateNamesValueInternal(NULL, value)); +} + +/** + * xmlValidateNmtokenValueInternal: + * @doc: pointer to the document or NULL + * @value: an Nmtoken value + * + * Validate that the given value match Nmtoken production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +static int +xmlValidateNmtokenValueInternal(xmlDocPtr doc, const xmlChar *value) { const xmlChar *cur; int val, len; @@ -3502,47 +3660,17 @@ xmlValidateNamesValue(const xmlChar *value) { cur = value; val = xmlStringCurrentChar(NULL, cur, &len); cur += len; - - if (!IS_LETTER(val) && (val != '_') && - (val != ':')) { + + if (!xmlIsDocNameChar(doc, val)) return(0); - } val = xmlStringCurrentChar(NULL, cur, &len); cur += len; - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { + while (xmlIsDocNameChar(doc, val)) { val = xmlStringCurrentChar(NULL, cur, &len); cur += len; } - /* Should not test IS_BLANK(val) here -- see erratum E20*/ - while (val == 0x20) { - while (val == 0x20) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - - if (!IS_LETTER(val) && (val != '_') && - (val != ':')) { - return(0); - } - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - } - if (val != 0) return(0); return(1); @@ -3555,12 +3683,29 @@ xmlValidateNamesValue(const xmlChar *value) { * Validate that the given value match Nmtoken production * * [ VC: Name Token ] - * + * * returns 1 if valid or 0 otherwise */ int xmlValidateNmtokenValue(const xmlChar *value) { + return(xmlValidateNmtokenValueInternal(NULL, value)); +} + +/** + * xmlValidateNmtokensValueInternal: + * @doc: pointer to the document or NULL + * @value: an Nmtokens value + * + * Validate that the given value match Nmtokens production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +static int +xmlValidateNmtokensValueInternal(xmlDocPtr doc, const xmlChar *value) { const xmlChar *cur; int val, len; @@ -3568,23 +3713,40 @@ xmlValidateNmtokenValue(const xmlChar *value) { cur = value; val = xmlStringCurrentChar(NULL, cur, &len); cur += len; - - if (!IS_LETTER(val) && !IS_DIGIT(val) && - (val != '.') && (val != '-') && - (val != '_') && (val != ':') && - (!IS_COMBINING(val)) && - (!IS_EXTENDER(val))) - return(0); - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { + while (IS_BLANK(val)) { val = xmlStringCurrentChar(NULL, cur, &len); cur += len; } + if (!xmlIsDocNameChar(doc, val)) + return(0); + + while (xmlIsDocNameChar(doc, val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + /* Should not test IS_BLANK(val) here -- see erratum E20*/ + while (val == 0x20) { + while (val == 0x20) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + if (val == 0) return(1); + + if (!xmlIsDocNameChar(doc, val)) + return(0); + + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + while (xmlIsDocNameChar(doc, val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + if (val != 0) return(0); return(1); @@ -3597,69 +3759,13 @@ xmlValidateNmtokenValue(const xmlChar *value) { * Validate that the given value match Nmtokens production * * [ VC: Name Token ] - * + * * returns 1 if valid or 0 otherwise */ int xmlValidateNmtokensValue(const xmlChar *value) { - const xmlChar *cur; - int val, len; - - if (value == NULL) return(0); - cur = value; - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - - while (IS_BLANK(val)) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - - if (!IS_LETTER(val) && !IS_DIGIT(val) && - (val != '.') && (val != '-') && - (val != '_') && (val != ':') && - (!IS_COMBINING(val)) && - (!IS_EXTENDER(val))) - return(0); - - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - - /* Should not test IS_BLANK(val) here -- see erratum E20*/ - while (val == 0x20) { - while (val == 0x20) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - if (val == 0) return(1); - - if (!IS_LETTER(val) && !IS_DIGIT(val) && - (val != '.') && (val != '-') && - (val != '_') && (val != ':') && - (!IS_COMBINING(val)) && - (!IS_EXTENDER(val))) - return(0); - - while ((IS_LETTER(val)) || (IS_DIGIT(val)) || - (val == '.') || (val == '-') || - (val == '_') || (val == ':') || - (IS_COMBINING(val)) || - (IS_EXTENDER(val))) { - val = xmlStringCurrentChar(NULL, cur, &len); - cur += len; - } - } - - if (val != 0) return(0); - - return(1); + return(xmlValidateNmtokensValueInternal(NULL, value)); } /** @@ -3710,28 +3816,34 @@ xmlValidateNotationDecl(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDocPtr doc ATT * returns 1 if valid or 0 otherwise */ -int -xmlValidateAttributeValue(xmlAttributeType type, const xmlChar *value) { +static int +xmlValidateAttributeValueInternal(xmlDocPtr doc, xmlAttributeType type, + const xmlChar *value) { switch (type) { case XML_ATTRIBUTE_ENTITIES: case XML_ATTRIBUTE_IDREFS: - return(xmlValidateNamesValue(value)); + return(xmlValidateNamesValueInternal(doc, value)); case XML_ATTRIBUTE_ENTITY: case XML_ATTRIBUTE_IDREF: case XML_ATTRIBUTE_ID: case XML_ATTRIBUTE_NOTATION: - return(xmlValidateNameValue(value)); + return(xmlValidateNameValueInternal(doc, value)); case XML_ATTRIBUTE_NMTOKENS: case XML_ATTRIBUTE_ENUMERATION: - return(xmlValidateNmtokensValue(value)); + return(xmlValidateNmtokensValueInternal(doc, value)); case XML_ATTRIBUTE_NMTOKEN: - return(xmlValidateNmtokenValue(value)); + return(xmlValidateNmtokenValueInternal(doc, value)); case XML_ATTRIBUTE_CDATA: break; } return(1); } +int +xmlValidateAttributeValue(xmlAttributeType type, const xmlChar *value) { + return(xmlValidateAttributeValueInternal(NULL, type, value)); +} + /** * xmlValidateAttributeValue2: * @ctxt: the validation context @@ -4047,11 +4159,12 @@ xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, int val; CHECK_DTD; if(attr == NULL) return(1); - + /* Attribute Default Legal */ /* Enumeration */ if (attr->defaultValue != NULL) { - val = xmlValidateAttributeValue(attr->atype, attr->defaultValue); + val = xmlValidateAttributeValueInternal(doc, attr->atype, + attr->defaultValue); if (val == 0) { xmlErrValidNode(ctxt, (xmlNodePtr) attr, XML_DTD_ATTRIBUTE_DEFAULT, "Syntax of default value for attribute %s of %s is not valid\n", @@ -4332,7 +4445,7 @@ xmlValidateOneAttribute(xmlValidCtxtPtr ctxt, xmlDocPtr doc, } attr->atype = attrDecl->atype; - val = xmlValidateAttributeValue(attrDecl->atype, value); + val = xmlValidateAttributeValueInternal(doc, attrDecl->atype, value); if (val == 0) { xmlErrValidNode(ctxt, elem, XML_DTD_ATTRIBUTE_VALUE, "Syntax of value for attribute %s of %s is not valid\n", @@ -4517,7 +4630,7 @@ xmlNodePtr elem, const xmlChar *prefix, xmlNsPtr ns, const xmlChar *value) { return(0); } - val = xmlValidateAttributeValue(attrDecl->atype, value); + val = xmlValidateAttributeValueInternal(doc, attrDecl->atype, value); if (val == 0) { if (ns->prefix != NULL) { xmlErrValidNode(ctxt, elem, XML_DTD_INVALID_DEFAULT,