1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-05 16:58:17 +03:00

testing and bug fixing related to XSLT:

- xpath.c result/XPath/tests/chaptersprefol: bugfixes on order and
  on predicate
- HTMLparser.[ch] HTMLtree.c result/HTML/doc3.htm.err
  result/HTML/doc3.htm.sax result/HTML/wired.html: sometimes one
  really want to have tags closed on output even if we accept
  unclosed ones on input
Daniel
This commit is contained in:
Daniel Veillard 2001-02-13 17:05:35 +00:00
parent 5dd2f0a6cd
commit f41fbbf6a9
10 changed files with 169 additions and 115 deletions

View File

@ -1,3 +1,12 @@
Tue Feb 13 18:01:48 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* xpath.c result/XPath/tests/chaptersprefol: bugfixes on order and
on predicate
* HTMLparser.[ch] HTMLtree.c result/HTML/doc3.htm.err
result/HTML/doc3.htm.sax result/HTML/wired.html: sometimes one
really want to have tags closed on output even if we accept
unclosed ones on input
Mon Feb 12 18:33:20 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* xpath.c: ouch don't free NULL, rare case fixed

View File

@ -377,100 +377,100 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
* DTD: 1 means that this element is valid only in the Loose DTD
* 2 means that this element is valid only in the Frameset DTD
*
* Name,Start Tag,End Tag, Empty, Depr., DTD, Description
* Name,Start Tag,End Tag,Save End, Empty, Depr., DTD, Description
*/
htmlElemDesc html40ElementTable[] = {
{ "a", 0, 0, 0, 0, 0, "anchor " },
{ "abbr", 0, 0, 0, 0, 0, "abbreviated form" },
{ "acronym", 0, 0, 0, 0, 0, "" },
{ "address", 0, 0, 0, 0, 0, "information on author " },
{ "applet", 0, 0, 0, 1, 1, "java applet " },
{ "area", 0, 2, 1, 0, 0, "client-side image map area " },
{ "b", 0, 0, 0, 0, 0, "bold text style" },
{ "base", 0, 2, 1, 0, 0, "document base uri " },
{ "basefont", 0, 2, 1, 1, 1, "base font size " },
{ "bdo", 0, 0, 0, 0, 0, "i18n bidi over-ride " },
{ "big", 0, 0, 0, 0, 0, "large text style" },
{ "blockquote", 0, 0, 0, 0, 0, "long quotation " },
{ "body", 1, 1, 0, 0, 0, "document body " },
{ "br", 0, 2, 1, 0, 0, "forced line break " },
{ "button", 0, 0, 0, 0, 0, "push button " },
{ "caption", 0, 0, 0, 0, 0, "table caption " },
{ "center", 0, 0, 0, 1, 1, "shorthand for div align=center " },
{ "cite", 0, 0, 0, 0, 0, "citation" },
{ "code", 0, 0, 0, 0, 0, "computer code fragment" },
{ "col", 0, 2, 1, 0, 0, "table column " },
{ "colgroup", 0, 1, 0, 0, 0, "table column group " },
{ "dd", 0, 1, 0, 0, 0, "definition description " },
{ "del", 0, 0, 0, 0, 0, "deleted text " },
{ "dfn", 0, 0, 0, 0, 0, "instance definition" },
{ "dir", 0, 0, 0, 1, 1, "directory list" },
{ "div", 0, 0, 0, 0, 0, "generic language/style container"},
{ "dl", 0, 0, 0, 0, 0, "definition list " },
{ "dt", 0, 1, 0, 0, 0, "definition term " },
{ "em", 0, 0, 0, 0, 0, "emphasis" },
{ "fieldset", 0, 0, 0, 0, 0, "form control group " },
{ "font", 0, 0, 0, 1, 1, "local change to font " },
{ "form", 0, 0, 0, 0, 0, "interactive form " },
{ "frame", 0, 2, 1, 0, 2, "subwindow " },
{ "frameset", 0, 0, 0, 0, 2, "window subdivision" },
{ "h1", 0, 0, 0, 0, 0, "heading " },
{ "h2", 0, 0, 0, 0, 0, "heading " },
{ "h3", 0, 0, 0, 0, 0, "heading " },
{ "h4", 0, 0, 0, 0, 0, "heading " },
{ "h5", 0, 0, 0, 0, 0, "heading " },
{ "h6", 0, 0, 0, 0, 0, "heading " },
{ "head", 1, 1, 0, 0, 0, "document head " },
{ "hr", 0, 2, 1, 0, 0, "horizontal rule " },
{ "html", 1, 1, 0, 0, 0, "document root element " },
{ "i", 0, 0, 0, 0, 0, "italic text style" },
{ "iframe", 0, 0, 0, 0, 1, "inline subwindow " },
{ "img", 0, 2, 1, 0, 0, "embedded image " },
{ "input", 0, 2, 1, 0, 0, "form control " },
{ "ins", 0, 0, 0, 0, 0, "inserted text" },
{ "isindex", 0, 2, 1, 1, 1, "single line prompt " },
{ "kbd", 0, 0, 0, 0, 0, "text to be entered by the user" },
{ "label", 0, 0, 0, 0, 0, "form field label text " },
{ "legend", 0, 0, 0, 0, 0, "fieldset legend " },
{ "li", 0, 1, 0, 0, 0, "list item " },
{ "link", 0, 2, 1, 0, 0, "a media-independent link " },
{ "map", 0, 0, 0, 0, 0, "client-side image map " },
{ "menu", 0, 0, 0, 1, 1, "menu list " },
{ "meta", 0, 2, 1, 0, 0, "generic metainformation " },
{ "noframes", 0, 0, 0, 0, 2, "alternate content container for non frame-based rendering " },
{ "noscript", 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " },
{ "object", 0, 0, 0, 0, 0, "generic embedded object " },
{ "ol", 0, 0, 0, 0, 0, "ordered list " },
{ "optgroup", 0, 0, 0, 0, 0, "option group " },
{ "option", 0, 1, 0, 0, 0, "selectable choice " },
{ "p", 0, 1, 0, 0, 0, "paragraph " },
{ "param", 0, 2, 1, 0, 0, "named property value " },
{ "pre", 0, 0, 0, 0, 0, "preformatted text " },
{ "q", 0, 0, 0, 0, 0, "short inline quotation " },
{ "s", 0, 0, 0, 1, 1, "strike-through text style" },
{ "samp", 0, 0, 0, 0, 0, "sample program output, scripts, etc." },
{ "script", 0, 0, 0, 0, 0, "script statements " },
{ "select", 0, 0, 0, 0, 0, "option selector " },
{ "small", 0, 0, 0, 0, 0, "small text style" },
{ "span", 0, 0, 0, 0, 0, "generic language/style container " },
{ "strike", 0, 0, 0, 1, 1, "strike-through text" },
{ "strong", 0, 0, 0, 0, 0, "strong emphasis" },
{ "style", 0, 0, 0, 0, 0, "style info " },
{ "sub", 0, 0, 0, 0, 0, "subscript" },
{ "sup", 0, 0, 0, 0, 0, "superscript " },
{ "table", 0, 0, 0, 0, 0, "&#160;" },
{ "tbody", 1, 1, 0, 0, 0, "table body " },
{ "td", 0, 1, 0, 0, 0, "table data cell" },
{ "textarea", 0, 0, 0, 0, 0, "multi-line text field " },
{ "tfoot", 0, 1, 0, 0, 0, "table footer " },
{ "th", 0, 1, 0, 0, 0, "table header cell" },
{ "thead", 0, 1, 0, 0, 0, "table header " },
{ "title", 0, 0, 0, 0, 0, "document title " },
{ "tr", 0, 1, 0, 0, 0, "table row " },
{ "tt", 0, 0, 0, 0, 0, "teletype or monospaced text style" },
{ "u", 0, 0, 0, 1, 1, "underlined text style" },
{ "ul", 0, 0, 0, 0, 0, "unordered list " },
{ "var", 0, 0, 0, 0, 0, "instance of a variable or program argument" },
{ "a", 0, 0, 0, 0, 0, 0, "anchor " },
{ "abbr", 0, 0, 0, 0, 0, 0, "abbreviated form" },
{ "acronym", 0, 0, 0, 0, 0, 0, "" },
{ "address", 0, 0, 0, 0, 0, 0, "information on author " },
{ "applet", 0, 0, 0, 0, 1, 1, "java applet " },
{ "area", 0, 2, 2, 1, 0, 0, "client-side image map area " },
{ "b", 0, 0, 0, 0, 0, 0, "bold text style" },
{ "base", 0, 2, 2, 1, 0, 0, "document base uri " },
{ "basefont", 0, 2, 2, 1, 1, 1, "base font size " },
{ "bdo", 0, 0, 0, 0, 0, 0, "i18n bidi over-ride " },
{ "big", 0, 0, 0, 0, 0, 0, "large text style" },
{ "blockquote", 0, 0, 0, 0, 0, 0, "long quotation " },
{ "body", 1, 1, 0, 0, 0, 0, "document body " },
{ "br", 0, 2, 2, 1, 0, 0, "forced line break " },
{ "button", 0, 0, 0, 0, 0, 0, "push button " },
{ "caption", 0, 0, 0, 0, 0, 0, "table caption " },
{ "center", 0, 0, 0, 0, 1, 1, "shorthand for div align=center " },
{ "cite", 0, 0, 0, 0, 0, 0, "citation" },
{ "code", 0, 0, 0, 0, 0, 0, "computer code fragment" },
{ "col", 0, 2, 2, 1, 0, 0, "table column " },
{ "colgroup", 0, 1, 0, 0, 0, 0, "table column group " },
{ "dd", 0, 1, 0, 0, 0, 0, "definition description " },
{ "del", 0, 0, 0, 0, 0, 0, "deleted text " },
{ "dfn", 0, 0, 0, 0, 0, 0, "instance definition" },
{ "dir", 0, 0, 0, 0, 1, 1, "directory list" },
{ "div", 0, 0, 0, 0, 0, 0, "generic language/style container"},
{ "dl", 0, 0, 0, 0, 0, 0, "definition list " },
{ "dt", 0, 1, 0, 0, 0, 0, "definition term " },
{ "em", 0, 0, 0, 0, 0, 0, "emphasis" },
{ "fieldset", 0, 0, 0, 0, 0, 0, "form control group " },
{ "font", 0, 0, 0, 0, 1, 1, "local change to font " },
{ "form", 0, 0, 0, 0, 0, 0, "interactive form " },
{ "frame", 0, 2, 2, 1, 0, 2, "subwindow " },
{ "frameset", 0, 0, 0, 0, 0, 2, "window subdivision" },
{ "h1", 0, 0, 0, 0, 0, 0, "heading " },
{ "h2", 0, 0, 0, 0, 0, 0, "heading " },
{ "h3", 0, 0, 0, 0, 0, 0, "heading " },
{ "h4", 0, 0, 0, 0, 0, 0, "heading " },
{ "h5", 0, 0, 0, 0, 0, 0, "heading " },
{ "h6", 0, 0, 0, 0, 0, 0, "heading " },
{ "head", 1, 1, 0, 0, 0, 0, "document head " },
{ "hr", 0, 2, 2, 1, 0, 0, "horizontal rule " },
{ "html", 1, 1, 0, 0, 0, 0, "document root element " },
{ "i", 0, 0, 0, 0, 0, 0, "italic text style" },
{ "iframe", 0, 0, 0, 0, 0, 1, "inline subwindow " },
{ "img", 0, 2, 2, 1, 0, 0, "embedded image " },
{ "input", 0, 2, 2, 1, 0, 0, "form control " },
{ "ins", 0, 0, 0, 0, 0, 0, "inserted text" },
{ "isindex", 0, 2, 2, 1, 1, 1, "single line prompt " },
{ "kbd", 0, 0, 0, 0, 0, 0, "text to be entered by the user" },
{ "label", 0, 0, 0, 0, 0, 0, "form field label text " },
{ "legend", 0, 0, 0, 0, 0, 0, "fieldset legend " },
{ "li", 0, 1, 1, 0, 0, 0, "list item " },
{ "link", 0, 2, 2, 1, 0, 0, "a media-independent link " },
{ "map", 0, 0, 0, 0, 0, 0, "client-side image map " },
{ "menu", 0, 0, 0, 0, 1, 1, "menu list " },
{ "meta", 0, 2, 2, 1, 0, 0, "generic metainformation " },
{ "noframes", 0, 0, 0, 0, 0, 2, "alternate content container for non frame-based rendering " },
{ "noscript", 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " },
{ "object", 0, 0, 0, 0, 0, 0, "generic embedded object " },
{ "ol", 0, 0, 0, 0, 0, 0, "ordered list " },
{ "optgroup", 0, 0, 0, 0, 0, 0, "option group " },
{ "option", 0, 1, 0, 0, 0, 0, "selectable choice " },
{ "p", 0, 1, 1, 0, 0, 0, "paragraph " },
{ "param", 0, 2, 2, 1, 0, 0, "named property value " },
{ "pre", 0, 0, 0, 0, 0, 0, "preformatted text " },
{ "q", 0, 0, 0, 0, 0, 0, "short inline quotation " },
{ "s", 0, 0, 0, 0, 1, 1, "strike-through text style" },
{ "samp", 0, 0, 0, 0, 0, 0, "sample program output, scripts, etc." },
{ "script", 0, 0, 0, 0, 0, 0, "script statements " },
{ "select", 0, 0, 0, 0, 0, 0, "option selector " },
{ "small", 0, 0, 0, 0, 0, 0, "small text style" },
{ "span", 0, 0, 0, 0, 0, 0, "generic language/style container " },
{ "strike", 0, 0, 0, 0, 1, 1, "strike-through text" },
{ "strong", 0, 0, 0, 0, 0, 0, "strong emphasis" },
{ "style", 0, 0, 0, 0, 0, 0, "style info " },
{ "sub", 0, 0, 0, 0, 0, 0, "subscript" },
{ "sup", 0, 0, 0, 0, 0, 0, "superscript " },
{ "table", 0, 0, 0, 0, 0, 0, "&#160;" },
{ "tbody", 1, 0, 0, 0, 0, 0, "table body " },
{ "td", 0, 0, 0, 0, 0, 0, "table data cell" },
{ "textarea", 0, 0, 0, 0, 0, 0, "multi-line text field " },
{ "tfoot", 0, 1, 0, 0, 0, 0, "table footer " },
{ "th", 0, 1, 0, 0, 0, 0, "table header cell" },
{ "thead", 0, 1, 0, 0, 0, 0, "table header " },
{ "title", 0, 0, 0, 0, 0, 0, "document title " },
{ "tr", 0, 1, 0, 0, 0, 0, "table row " },
{ "tt", 0, 0, 0, 0, 0, 0, "teletype or monospaced text style" },
{ "u", 0, 0, 0, 0, 1, 1, "underlined text style" },
{ "ul", 0, 0, 0, 0, 0, 0, "unordered list " },
{ "var", 0, 0, 0, 0, 0, 0, "instance of a variable or program argument" },
};
/*

View File

@ -34,11 +34,12 @@ typedef struct _htmlElemDesc htmlElemDesc;
typedef htmlElemDesc *htmlElemDescPtr;
struct _htmlElemDesc {
const char *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
char startTag; /* Whether the start tag can be implied */
char endTag; /* Whether the end tag can be implied */
char saveEndTag; /* Whether the end tag should be saved */
char empty; /* Is this an empty element ? */
char depr; /* Is this a deprecated element ? */
char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
};

View File

@ -871,7 +871,7 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const
return;
}
if ((cur->content == NULL) && (cur->children == NULL)) {
if ((info != NULL) && (info->endTag != 0) &&
if ((info != NULL) && (info->saveEndTag != 0) &&
(strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
xmlOutputBufferWriteString(buf, ">");
} else {

View File

@ -34,11 +34,12 @@ typedef struct _htmlElemDesc htmlElemDesc;
typedef htmlElemDesc *htmlElemDescPtr;
struct _htmlElemDesc {
const char *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
char startTag; /* Whether the start tag can be implied */
char endTag; /* Whether the end tag can be implied */
char saveEndTag; /* Whether the end tag should be saved */
char empty; /* Is this an empty element ? */
char depr; /* Is this a deprecated element ? */
char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
};

View File

@ -70,6 +70,12 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
./test/HTML/doc3.htm:828: error: Unexpected end tag : p
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^
./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and td
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^
./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and tbody
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^
./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and table
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^

View File

@ -2789,8 +2789,10 @@ SAX.endElement(font)
SAX.endElement(a)
SAX.characters( , 1)
SAX.error: Unexpected end tag : p
SAX.error: Opening and ending tag mismatch: center and td
SAX.endElement(td)
SAX.endElement(tr)
SAX.error: Opening and ending tag mismatch: center and tbody
SAX.endElement(tbody)
SAX.error: Opening and ending tag mismatch: center and table
SAX.endElement(table)

View File

@ -73,7 +73,7 @@
<!-- WIRED NEWS header --><!-- CMD_HOST = scoop.hotwired.com --><a name="#"></a>
<table border="0" width="600" cellspacing="0" cellpadding="0">
<tr>
<td>
<td></td>
<td colspan="2"><img src="http://static.wired.com/news/images/spacer.gif" height="5" width="447" alt=""></td>
</tr>
<tr>
@ -625,7 +625,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
</p>
</td>
<td valign="TOP" align="LEFT"><img src="http://static.wired.com/news/images/spacer.gif" height="1" width="5" alt=""></td>
<td valign="TOP" align="LEFT">
<td valign="TOP" align="LEFT"></td>
</tr>
</table>
<br>

View File

@ -23,25 +23,25 @@ Set contains 0 nodes:
Expression: /child::EXAMPLE/child::chapter[3]/preceding::*
Object is a Node Set :
Set contains 10 nodes:
1 ELEMENT p
1 ELEMENT head
2 ELEMENT title
3 ELEMENT chapter
ATTRIBUTE id
TEXT
content=chapter2
4 ELEMENT p
5 ELEMENT image
content=chapter1
4 ELEMENT title
5 ELEMENT p
6 ELEMENT image
ATTRIBUTE href
TEXT
content=linus.gif
6 ELEMENT p
7 ELEMENT title
7 ELEMENT p
8 ELEMENT chapter
ATTRIBUTE id
TEXT
content=chapter1
content=chapter2
9 ELEMENT title
10 ELEMENT head
10 ELEMENT p
========================
Expression: /child::EXAMPLE/child::chapter[3]/following::*
@ -64,10 +64,10 @@ Set contains 6 nodes:
Expression: /child::EXAMPLE/child::chapter[1]/image/preceding::*
Object is a Node Set :
Set contains 4 nodes:
1 ELEMENT p
1 ELEMENT head
2 ELEMENT title
3 ELEMENT title
4 ELEMENT head
4 ELEMENT p
========================
Expression: /child::EXAMPLE/child::chapter[1]/image/following::*

35
xpath.c
View File

@ -584,6 +584,31 @@ xmlXPathCmpNodes(xmlNodePtr node1, xmlNodePtr node2) {
return(-1); /* assume there is no sibling list corruption */
}
/**
* xmlXPathNodeSetSort:
* @set: the node set
*
* Sort the node set in document order
*/
void
xmlXPathNodeSetSort(xmlNodeSetPtr set) {
int i, j;
xmlNodePtr tmp;
if (set == NULL)
return;
for (i = 0;i < set->nodeNr -1;i++) {
for (j = i + 1; j < set->nodeNr; j++) {
if (xmlXPathCmpNodes(set->nodeTab[i], set->nodeTab[j]) == -1) {
tmp = set->nodeTab[i];
set->nodeTab[i] = set->nodeTab[j];
set->nodeTab[j] = tmp;
}
}
}
}
#define XML_NODESET_DEFAULT 10
/**
* xmlXPathNodeSetCreate:
@ -5228,12 +5253,14 @@ xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) {
void
xmlXPathEvalUnionExpr(xmlXPathParserContextPtr ctxt) {
int sort = 0;
xmlXPathEvalPathExpr(ctxt);
CHECK_ERROR;
SKIP_BLANKS;
while (CUR == '|') {
xmlXPathObjectPtr obj1,obj2, tmp;
sort = 1;
CHECK_TYPE(XPATH_NODESET);
obj1 = valuePop(ctxt);
tmp = xmlXPathNewNodeSet(ctxt->context->node);
@ -5255,6 +5282,8 @@ xmlXPathEvalUnionExpr(xmlXPathParserContextPtr ctxt) {
xmlXPathFreeObject(obj2);
SKIP_BLANKS;
}
if (sort) {
}
}
/**
@ -5515,6 +5544,9 @@ xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt) {
xmlXPathFreeObject(arg2);
SKIP_BLANKS;
}
if ((ctxt->value != NULL) && (ctxt->value->type == XPATH_NODESET) &&
(ctxt->value->nodesetval != NULL))
xmlXPathNodeSetSort(ctxt->value->nodesetval);
}
/**
@ -5581,6 +5613,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) {
xmlXPathObjectPtr obj, tmp;
xmlNodeSetPtr newset = NULL;
xmlNodeSetPtr oldset;
xmlNodePtr oldnode;
int i;
SKIP_BLANKS;
@ -5598,6 +5631,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) {
CHECK_TYPE(XPATH_NODESET);
obj = valuePop(ctxt);
oldset = obj->nodesetval;
oldnode = ctxt->context->node;
ctxt->context->node = NULL;
if ((oldset == NULL) || (oldset->nodeNr == 0)) {
@ -5675,6 +5709,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) {
xmlGenericErrorContextNodeSet(xmlGenericErrorContext,
ctxt->value->nodesetval);
#endif
ctxt->context->node = oldnode;
}
/**