mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-24 06:50:08 +03:00
html: Parse named character references according to HTML5
This commit is contained in:
parent
d5cd0f07f8
commit
5951179239
236
HTMLparser.c
236
HTMLparser.c
@ -2620,6 +2620,111 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
||||
return(ret);
|
||||
}
|
||||
|
||||
#include "html5ent.inc"
|
||||
|
||||
#define ENT_F_SEMICOLON 0x80u
|
||||
#define ENT_F_SUBTABLE 0x40u
|
||||
#define ENT_F_ALL 0xC0u
|
||||
|
||||
static const xmlChar *
|
||||
htmlFindEntityPrefix(const xmlChar *string, size_t slen, int isAttr,
|
||||
int *nlen, int *rlen) {
|
||||
const xmlChar *match = NULL;
|
||||
unsigned left, right;
|
||||
int first = string[0];
|
||||
size_t matchLen = 0;
|
||||
size_t soff = 1;
|
||||
|
||||
if (slen < 2)
|
||||
return(NULL);
|
||||
if (((first < 'A') || (first > 'Z')) &&
|
||||
((first < 'a') || (first > 'z')))
|
||||
return(NULL);
|
||||
|
||||
/*
|
||||
* Look up range by first character
|
||||
*/
|
||||
first &= 63;
|
||||
left = htmlEntAlpha[first*3] | htmlEntAlpha[first*3+1] << 8;
|
||||
right = left + htmlEntAlpha[first*3+2];
|
||||
|
||||
/*
|
||||
* Binary search
|
||||
*/
|
||||
while (left < right) {
|
||||
const xmlChar *bytes;
|
||||
unsigned mid;
|
||||
size_t len;
|
||||
int cmp;
|
||||
|
||||
mid = left + (right - left) / 2;
|
||||
bytes = htmlEntStrings + htmlEntValues[mid];
|
||||
len = bytes[0] & ~ENT_F_ALL;
|
||||
|
||||
cmp = string[soff] - bytes[1];
|
||||
|
||||
if (cmp == 0) {
|
||||
if (slen < len) {
|
||||
cmp = strncmp((const char *) string + soff + 1,
|
||||
(const char *) bytes + 2,
|
||||
slen - 1);
|
||||
/* Prefix can never match */
|
||||
if (cmp == 0)
|
||||
break;
|
||||
} else {
|
||||
cmp = strncmp((const char *) string + soff + 1,
|
||||
(const char *) bytes + 2,
|
||||
len - 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (cmp < 0) {
|
||||
right = mid;
|
||||
} else if (cmp > 0) {
|
||||
left = mid + 1;
|
||||
} else {
|
||||
int term = soff + len < slen ? string[soff + len] : 0;
|
||||
int isAlnum, isTerm;
|
||||
|
||||
isAlnum = (((term >= 'A') && (term <= 'Z')) ||
|
||||
((term >= 'a') && (term <= 'z')) ||
|
||||
((term >= '0') && (term <= '9')));
|
||||
isTerm = ((term == ';') ||
|
||||
((bytes[0] & ENT_F_SEMICOLON) &&
|
||||
((!isAttr) ||
|
||||
((!isAlnum) && (term != '=')))));
|
||||
|
||||
if (isTerm) {
|
||||
match = bytes + len + 1;
|
||||
matchLen = soff + len;
|
||||
if (term == ';')
|
||||
matchLen += 1;
|
||||
}
|
||||
|
||||
if (bytes[0] & ENT_F_SUBTABLE) {
|
||||
if (isTerm)
|
||||
match += 2;
|
||||
|
||||
if ((isAlnum) && (soff + len < slen)) {
|
||||
left = mid + bytes[len + 1];
|
||||
right = left + bytes[len + 2];
|
||||
soff += len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match == NULL)
|
||||
return(NULL);
|
||||
|
||||
*nlen = matchLen;
|
||||
*rlen = match[0];
|
||||
return(match + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* htmlParseHTMLAttribute:
|
||||
@ -2640,9 +2745,6 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
|
||||
XML_MAX_HUGE_LENGTH :
|
||||
XML_MAX_TEXT_LENGTH;
|
||||
xmlChar *out = NULL;
|
||||
const xmlChar *name = NULL;
|
||||
const xmlChar *cur = NULL;
|
||||
const htmlEntityDesc * ent;
|
||||
|
||||
/*
|
||||
* allocate a translation buffer.
|
||||
@ -2662,6 +2764,16 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
|
||||
(CUR != 0) && (CUR != stop)) {
|
||||
if ((stop == 0) && (CUR == '>')) break;
|
||||
if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
|
||||
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
|
||||
GROW;
|
||||
|
||||
if (CUR == '&') {
|
||||
if (NXT(1) == '#') {
|
||||
unsigned int c;
|
||||
@ -2680,70 +2792,28 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
|
||||
for ( ; bits >= 0; bits-= 6) {
|
||||
*out++ = ((c >> bits) & 0x3F) | 0x80;
|
||||
}
|
||||
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
} else {
|
||||
ent = htmlParseEntityRef(ctxt, &name);
|
||||
if (name == NULL) {
|
||||
*out++ = '&';
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
const xmlChar *repl;
|
||||
int nameLen, replLen;
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
} else if (ent == NULL) {
|
||||
*out++ = '&';
|
||||
cur = name;
|
||||
while (*cur != 0) {
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
SKIP(1);
|
||||
repl = htmlFindEntityPrefix(CUR_PTR,
|
||||
ctxt->input->end - CUR_PTR,
|
||||
/* isAttr */ 1,
|
||||
&nameLen, &replLen);
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
*out++ = *cur++;
|
||||
}
|
||||
if (repl == NULL) {
|
||||
*out++ = '&';
|
||||
} else {
|
||||
unsigned int c;
|
||||
int bits;
|
||||
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
c = ent->value;
|
||||
if (c < 0x80)
|
||||
{ *out++ = c; bits= -6; }
|
||||
else if (c < 0x800)
|
||||
{ *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
|
||||
else if (c < 0x10000)
|
||||
{ *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
|
||||
else
|
||||
{ *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
|
||||
|
||||
for ( ; bits >= 0; bits-= 6) {
|
||||
*out++ = ((c >> bits) & 0x3F) | 0x80;
|
||||
}
|
||||
memcpy(out, repl, replLen);
|
||||
out += replLen;
|
||||
SKIP(nameLen);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsigned int c;
|
||||
int bits, l;
|
||||
|
||||
if (out - buffer > buffer_size - 100) {
|
||||
int indx = out - buffer;
|
||||
|
||||
growBuffer(buffer);
|
||||
out = &buffer[indx];
|
||||
}
|
||||
c = CUR_CHAR(l);
|
||||
if (c < 0x80)
|
||||
{ *out++ = c; bits= -6; }
|
||||
@ -4086,9 +4156,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
||||
*/
|
||||
static void
|
||||
htmlParseReference(htmlParserCtxtPtr ctxt) {
|
||||
const htmlEntityDesc * ent;
|
||||
xmlChar out[6];
|
||||
const xmlChar *name;
|
||||
if (CUR != '&') return;
|
||||
|
||||
if (NXT(1) == '#') {
|
||||
@ -4113,43 +4181,25 @@ htmlParseReference(htmlParserCtxtPtr ctxt) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
|
||||
ctxt->sax->characters(ctxt->userData, out, i);
|
||||
} else {
|
||||
ent = htmlParseEntityRef(ctxt, &name);
|
||||
if (name == NULL) {
|
||||
htmlCheckParagraph(ctxt);
|
||||
const xmlChar *repl;
|
||||
int nameLen, replLen;
|
||||
|
||||
htmlCheckParagraph(ctxt);
|
||||
|
||||
SKIP(1);
|
||||
repl = htmlFindEntityPrefix(CUR_PTR,
|
||||
ctxt->input->end - CUR_PTR,
|
||||
/* isAttr */ 0,
|
||||
&nameLen, &replLen);
|
||||
|
||||
if (repl == NULL) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
|
||||
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
|
||||
return;
|
||||
}
|
||||
if ((ent == NULL) || !(ent->value > 0)) {
|
||||
htmlCheckParagraph(ctxt);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
|
||||
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
|
||||
ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
|
||||
/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
|
||||
}
|
||||
} else {
|
||||
unsigned int c;
|
||||
int bits, i = 0;
|
||||
|
||||
c = ent->value;
|
||||
if (c < 0x80)
|
||||
{ out[i++]= c; bits= -6; }
|
||||
else if (c < 0x800)
|
||||
{ out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
|
||||
else if (c < 0x10000)
|
||||
{ out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
|
||||
else
|
||||
{ out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
|
||||
|
||||
for ( ; bits >= 0; bits-= 6) {
|
||||
out[i++]= ((c >> bits) & 0x3F) | 0x80;
|
||||
}
|
||||
out[i] = 0;
|
||||
|
||||
htmlCheckParagraph(ctxt);
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
|
||||
ctxt->sax->characters(ctxt->userData, out, i);
|
||||
}
|
||||
ctxt->sax->characters(ctxt->userData, repl, replLen);
|
||||
SKIP(nameLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
1607
html5ent.inc
Normal file
1607
html5ent.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,2 +1,2 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html><body><p>&Ù</p></body></html>
|
||||
<html><body><p>&jÙ</p></body></html>
|
||||
|
@ -1,3 +0,0 @@
|
||||
./test/HTML/758518-entity.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
Ù
|
||||
^
|
@ -1,11 +1,10 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(html)
|
||||
SAX.startElement(body)
|
||||
SAX.startElement(p)
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters(Ù, 2)
|
||||
SAX.characters(jÙ, 3)
|
||||
SAX.endElement(p)
|
||||
SAX.endElement(body)
|
||||
SAX.endElement(html)
|
||||
|
@ -1,3 +1,3 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html><body><p>&ê
|
||||
<html><body><p>&:ê
|
||||
</p></body></html>
|
||||
|
@ -1,3 +0,0 @@
|
||||
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ê
|
||||
^
|
@ -1,13 +1,11 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(html)
|
||||
SAX.startElement(body)
|
||||
SAX.startElement(p)
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters(ê, 2)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.characters(:ê
|
||||
, 4)
|
||||
SAX.endElement(p)
|
||||
SAX.endElement(body)
|
||||
SAX.endElement(html)
|
||||
|
8
result/HTML/attr-ents.html
Normal file
8
result/HTML/attr-ents.html
Normal file
@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html>
|
||||
<body>
|
||||
<a href="index.cgi?a&lt=1&gt=2">link</a>
|
||||
<a href="index.cgi?a&lta&gta">link</a>
|
||||
<a href="index.cgi?a<>">link</a>
|
||||
</body>
|
||||
</html>
|
30
result/HTML/attr-ents.html.sax
Normal file
30
result/HTML/attr-ents.html.sax
Normal file
@ -0,0 +1,30 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.startElement(html)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.startElement(body)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.startElement(a, href='index.cgi?a&lt=1&gt=2')
|
||||
SAX.characters(link, 4)
|
||||
SAX.endElement(a)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.startElement(a, href='index.cgi?a&lta&gta')
|
||||
SAX.characters(link, 4)
|
||||
SAX.endElement(a)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.startElement(a, href='index.cgi?a<>')
|
||||
SAX.characters(link, 4)
|
||||
SAX.endElement(a)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endElement(body)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endElement(html)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endDocument()
|
@ -1,15 +1,6 @@
|
||||
./test/HTML/doc3.htm:10: HTML parser error : Misplaced DOCTYPE declaration
|
||||
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN
|
||||
^
|
||||
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&id
|
||||
^
|
||||
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media
|
||||
^
|
||||
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id
|
||||
^
|
||||
./test/HTML/doc3.htm:148: HTML parser error : Unexpected end tag : p
|
||||
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P>
|
||||
^
|
||||
@ -19,12 +10,6 @@ _top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media
|
||||
./test/HTML/doc3.htm:236: HTML parser error : Unexpected end tag : a
|
||||
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
|
||||
^
|
||||
./test/HTML/doc3.htm:747: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
er=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid
|
||||
^
|
||||
./test/HTML/doc3.htm:747: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asid
|
||||
^
|
||||
./test/HTML/doc3.htm:747: HTML parser error : Unexpected end tag : li
|
||||
light.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI>
|
||||
^
|
||||
|
@ -85,10 +85,7 @@ SAX.comment( © 2000 GameSquad.net All Rights Reserved. )
|
||||
SAX.startElement(iframe, border='0', frameborder='no', height='60', marginheight='0', marginwidth='0', scrolling='no', src='doc3_files/adcycle.htm', width='468')
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(a, href='http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&id=1', target='_top')
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(img, src='http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id=1', width='468', height='60', border='0', alt='GSN ROS Ad')
|
||||
SAX.endElement(img)
|
||||
SAX.endElement(a)
|
||||
@ -2567,8 +2564,6 @@ SAX.endElement(font)
|
||||
SAX.startElement(center)
|
||||
SAX.startElement(iframe, frameborder='0', height='60', marginheight='0', marginwidth='0', noresize, scrolling='no', src='doc3_files/ad_iframe.htm', width='468')
|
||||
SAX.startElement(a, href='http://ads.adflight.com/go_static.asp?asid=7708', target='_top')
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(img, width='468', height='60', border='0', alt='Advertisement', src='http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asid=7708')
|
||||
SAX.endElement(img)
|
||||
SAX.endElement(a)
|
||||
|
@ -1,12 +0,0 @@
|
||||
./test/HTML/entities.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<p tst="a&b" tst2="a&b" tst3="a & b">
|
||||
^
|
||||
./test/HTML/entities.html:1: HTML parser error : htmlParseEntityRef: no name
|
||||
<p tst="a&b" tst2="a&b" tst3="a & b">
|
||||
^
|
||||
./test/HTML/entities.html:3: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
a&b
|
||||
^
|
||||
./test/HTML/entities.html:4: HTML parser error : htmlParseEntityRef: no name
|
||||
a & b
|
||||
^
|
@ -2,20 +2,15 @@ SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.startElement(html)
|
||||
SAX.startElement(body)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: no name
|
||||
SAX.startElement(p, tst='a&b', tst2='a&b', tst3='a & b')
|
||||
SAX.characters(
|
||||
a, 2)
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters(b
|
||||
a, 3)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters(b, 1)
|
||||
SAX.characters(
|
||||
a , 3)
|
||||
SAX.error: htmlParseEntityRef: no name
|
||||
SAX.characters(b
|
||||
a , 4)
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters( b
|
||||
, 3)
|
||||
|
@ -1,3 +0,0 @@
|
||||
./test/HTML/fp40.htm:153: HTML parser error : htmlParseEntityRef: no name
|
||||
technical articles from Microsoft's extensive Knowledge Base, FAQs, & troublesho
|
||||
^
|
@ -422,7 +422,6 @@ SAX.characters(
|
||||
, 2)
|
||||
SAX.startElement(p)
|
||||
SAX.characters(For further technical informat, 254)
|
||||
SAX.error: htmlParseEntityRef: no name
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters( troubleshooters to find
|
||||
fast, 302)
|
||||
|
4259
result/HTML/html5-entities.html
Normal file
4259
result/HTML/html5-entities.html
Normal file
File diff suppressed because it is too large
Load Diff
12776
result/HTML/html5-entities.html.sax
Normal file
12776
result/HTML/html5-entities.html.sax
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,15 +1,3 @@
|
||||
./test/HTML/utf8bug.html:45: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<img src="showimage.aspx?path=Files_Upload\192.png&width=%>" border="0" />
|
||||
^
|
||||
./test/HTML/utf8bug.html:118: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<a href="showimage.aspx?path=Files_Upload\302.JPG&Width=" rel="lightbox" tit
|
||||
^
|
||||
./test/HTML/utf8bug.html:119: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<img src="showimage.aspx?path=Files_Upload\302.JPG&Width=220" align="left" b
|
||||
^
|
||||
./test/HTML/utf8bug.html:121: HTML parser error : Tag s1 invalid
|
||||
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
|
||||
^
|
||||
./test/HTML/utf8bug.html:177: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ین پاسخ را برای نویسنده مقاله رجانیوز copy&paste
|
||||
^
|
||||
|
@ -146,7 +146,6 @@ SAX.startElement(a, href='RSS2.asp')
|
||||
SAX.characters(
|
||||
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(img, src='showimage.aspx?path=Files_Upload\192.png&width=%>', border='0')
|
||||
SAX.endElement(img)
|
||||
SAX.characters(
|
||||
@ -400,11 +399,9 @@ SAX.startElement(div, class='Image')
|
||||
SAX.characters(
|
||||
|
||||
, 10)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(a, href='showimage.aspx?path=Files_Upload\302.JPG&Width=', rel='lightbox', title='شهير بلاگ')
|
||||
SAX.characters(
|
||||
, 5)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(img, src='showimage.aspx?path=Files_Upload\302.JPG&Width=220', align='left', border='1')
|
||||
SAX.endElement(img)
|
||||
SAX.characters(
|
||||
@ -665,10 +662,8 @@ SAX.endElement(font)
|
||||
SAX.startElement(br)
|
||||
SAX.endElement(br)
|
||||
SAX.characters(سلام , 834)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters(paste, 5)
|
||||
SAX.characters( , 1)
|
||||
SAX.characters(paste , 6)
|
||||
SAX.startElement(br)
|
||||
SAX.endElement(br)
|
||||
SAX.characters(
|
||||
|
@ -1,201 +1,15 @@
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<FORM METHOD=GET ACTION="http://nsads.hotwired.com/event.ng/Type=click&ProfileID
|
||||
^
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
D=GET ACTION="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID
|
||||
^
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
N="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID
|
||||
^
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
s.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID
|
||||
^
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID
|
||||
^
|
||||
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
pe=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID=2684&TagValues
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
" align="RIGHT"><a href="http://nsads.hotwired.com/event.ng/Type=click&ProfileID
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
GHT"><a href="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
f="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
s.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID
|
||||
^
|
||||
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
pe=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID=3228&TagValues
|
||||
^
|
||||
./test/HTML/wired.html:70: HTML parser error : Tag nobr invalid
|
||||
<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static
|
||||
^
|
||||
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
|
||||
^
|
||||
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ction=FilterSearch&Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection
|
||||
^
|
||||
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
|
||||
^
|
||||
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
tion=FilterSearch&Filter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection
|
||||
^
|
||||
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
lter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
r.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
="http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&Action
|
||||
^
|
||||
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ired.com/search97/s97.vts?collection=webmonkey_guides&Action=FilterSearch&filter
|
||||
^
|
||||
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ction=webmonkey_guides&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ilterSearch&filter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
on value="http://search.hotwired.com/search97/s97.vts?collection=hotwired&Action
|
||||
^
|
||||
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
rch.hotwired.com/search97/s97.vts?collection=hotwired&Action=FilterSearch&filter
|
||||
^
|
||||
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ts?collection=hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ilterSearch&filter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
|
||||
^
|
||||
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ction=FilterSearch&Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection
|
||||
^
|
||||
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
|
||||
^
|
||||
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
tion=FilterSearch&Filter=docs_filter.hts&ResultTemplate=animation.hts&Collection
|
||||
^
|
||||
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
lter=docs_filter.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
r.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
option value="http://search.hotwired.com/search97/s97.vts?collection=suck&Action
|
||||
^
|
||||
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
/search.hotwired.com/search97/s97.vts?collection=suck&Action=FilterSearch&filter
|
||||
^
|
||||
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
97.vts?collection=suck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
uck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
erSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
lue="http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&Action
|
||||
^
|
||||
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
otwired.com/search97/s97.vts?collection=uber_hotwired&Action=FilterSearch&filter
|
||||
^
|
||||
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
llection=uber_hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
|
||||
^
|
||||
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
n=FilterSearch&filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode
|
||||
^
|
||||
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode=Internet&Query
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs
|
||||
^
|
||||
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&MT
|
||||
^
|
||||
./test/HTML/wired.html:170: HTML parser error : Unexpected end tag : form
|
||||
</tr> </form>
|
||||
^
|
||||
./test/HTML/wired.html:248: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_search
|
||||
^
|
||||
./test/HTML/wired.html:265: HTML parser error : Unexpected end tag : form
|
||||
</tr> </form>
|
||||
^
|
||||
./test/HTML/wired.html:346: HTML parser error : Opening and ending tag mismatch: td and font
|
||||
</td>
|
||||
^
|
||||
./test/HTML/wired.html:374: HTML parser error : htmlParseEntityRef: no name
|
||||
a, sans-serif"><b><a href="/news/commentarySection/0,1292,31926,00.html">Rants &
|
||||
^
|
||||
./test/HTML/wired.html:374: HTML parser error : Opening and ending tag mismatch: td and font
|
||||
Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br> </td
|
||||
^
|
||||
@ -205,15 +19,6 @@ Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br> </td
|
||||
./test/HTML/wired.html:402: HTML parser error : Opening and ending tag mismatch: a and font
|
||||
w.vignette.com/" style="text-decoration:none"><font color="#000000">Vignette</a>
|
||||
^
|
||||
./test/HTML/wired.html:407: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
|
||||
^
|
||||
./test/HTML/wired.html:407: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
|
||||
^
|
||||
./test/HTML/wired.html:408: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
wired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Spri
|
||||
^
|
||||
./test/HTML/wired.html:408: HTML parser error : Opening and ending tag mismatch: a and font
|
||||
com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
|
||||
^
|
||||
@ -250,6 +55,3 @@ com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
|
||||
./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
|
||||
</td>
|
||||
^
|
||||
./test/HTML/wired.html:432: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||
href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1">Lycos</a
|
||||
^
|
||||
|
@ -19,12 +19,6 @@ SAX.characters(
|
||||
, 5)
|
||||
SAX.startElement(td, valign='top', align='LEFT')
|
||||
SAX.startElement(table, border='0', cellpadding='0', cellspacing='0', width='468', height='60', bgcolor='#FFFFFF')
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(form, method='GET', action='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID=2684&TagValues=8.25.156.159.166.171.172.174.179.180.181.182.183.196.197.199.208.389.412.436.2041.6750.78456.79630.81880&Redirect=http://www.springstreet.com/aa/citysearch.htm', id='form1', name='form1')
|
||||
SAX.characters(
|
||||
, 2)
|
||||
@ -298,12 +292,6 @@ SAX.endElement(td)
|
||||
SAX.characters(
|
||||
, 5)
|
||||
SAX.startElement(td, valign='top', align='RIGHT')
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(a, href='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID=3228&TagValues=8.25.159.171.172.174.179.180.181.182.183.196.197.199.208.241.389.412.436.2035.6749.6750.70367.78456.79630.81880&Redirect=http:%2F%2Fwww.hp.com%2Fgo%2Foriginal%20', target='_top')
|
||||
SAX.startElement(img, src='http://static.wired.com/advertising/blipverts/hp_colorinkjet/hp_970c_120x60_6.gif', border='1', height='60', width='120', alt='True to the Original')
|
||||
SAX.endElement(img)
|
||||
@ -437,94 +425,46 @@ SAX.startElement(select, name='url')
|
||||
SAX.characters(
|
||||
|
||||
, 4)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode=Internet&Query=', selected)
|
||||
SAX.characters(Wired News, 10)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode=Internet&Query=')
|
||||
SAX.characters(Webmonkey, 9)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode=Internet&Query=')
|
||||
SAX.characters(Webmonkey Guides, 16)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode=Internet&Query=')
|
||||
SAX.characters(HotWired Archives, 17)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode=Internet&Query=')
|
||||
SAX.characters(Wired Magazine, 14)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode=Internet&Query=')
|
||||
SAX.characters(Animation Express, 17)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=suck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode=Internet&Query=')
|
||||
SAX.characters(Suck.com, 8)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode=Internet&Query=')
|
||||
SAX.characters(All of HotWired, 15)
|
||||
SAX.endElement(option)
|
||||
SAX.characters(
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(option, value='http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&MT=')
|
||||
SAX.characters(The Web -> HotBot, 17)
|
||||
SAX.endElement(option)
|
||||
@ -1090,7 +1030,6 @@ SAX.endElement(input)
|
||||
SAX.characters(
|
||||
|
||||
, 2)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(img, src='http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_search=Y', border='0', align='top')
|
||||
SAX.endElement(img)
|
||||
SAX.characters(
|
||||
@ -1612,7 +1551,6 @@ SAX.startElement(font, size='2', face='Arial,Helvetica, sans-serif')
|
||||
SAX.startElement(b)
|
||||
SAX.startElement(a, href='/news/commentarySection/0,1292,31926,00.html')
|
||||
SAX.characters(Rants , 6)
|
||||
SAX.error: htmlParseEntityRef: no name
|
||||
SAX.characters(&, 1)
|
||||
SAX.characters( Raves, 6)
|
||||
SAX.endElement(a)
|
||||
@ -1948,9 +1886,6 @@ SAX.startElement(br)
|
||||
SAX.endElement(br)
|
||||
SAX.startElement(i)
|
||||
SAX.characters(Sponsored by , 13)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(a, href='http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s
|
||||
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
|
||||
wired.com&BANNER=Sprint', style='text-decoration:none')
|
||||
@ -2093,7 +2028,6 @@ SAX.endElement(br)
|
||||
SAX.endElement(p)
|
||||
SAX.startElement(li)
|
||||
SAX.characters(More from , 10)
|
||||
SAX.error: htmlParseEntityRef: expecting ';'
|
||||
SAX.startElement(a, href='http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1')
|
||||
SAX.characters(Lycos, 5)
|
||||
SAX.endElement(a)
|
||||
|
7
test/HTML/attr-ents.html
Normal file
7
test/HTML/attr-ents.html
Normal file
@ -0,0 +1,7 @@
|
||||
<html>
|
||||
<body>
|
||||
<a href="index.cgi?a<=1>=2">link</a>
|
||||
<a href="index.cgi?a<a>a">link</a>
|
||||
<a href="index.cgi?a<>">link</a>
|
||||
</body>
|
||||
</html>
|
4258
test/HTML/html5-entities.html
Normal file
4258
test/HTML/html5-entities.html
Normal file
File diff suppressed because it is too large
Load Diff
169
tools/genHtmlEnt.py
Executable file
169
tools/genHtmlEnt.py
Executable file
@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
# The basic idea is to find named character references using binary
|
||||
# search. Since entity strings may not have a terminator, this doesn't
|
||||
# work if one entity string is a prefix of another. In this case,
|
||||
# we branch to a subtable after matching the prefix.
|
||||
#
|
||||
# We create separate initial tables based on the first character
|
||||
# of the entity name.
|
||||
#
|
||||
# The following tables are generated:
|
||||
#
|
||||
# htmlEntAlpha: start and end of initial tables, indexing into
|
||||
# htmlEntValues
|
||||
# htmlEntValues: concatenation of all table values, which index into
|
||||
# htmlEntStrings
|
||||
# htmlEntStrings: variable sized records containing entity name,
|
||||
# replacement and optionally the position of a
|
||||
# subtable
|
||||
|
||||
try:
|
||||
with open('entities.json') as json_data:
|
||||
ents = json.load(json_data)
|
||||
except FileNotFoundError:
|
||||
print('entities.json not found, try curl -LJO',
|
||||
'https://html.spec.whatwg.org/entities.json')
|
||||
sys.exit(1)
|
||||
|
||||
def to_cchars(s):
|
||||
r = []
|
||||
|
||||
for c in s.encode():
|
||||
if c >= 0x20 and c <= 0x7E and c != ord("'") and c != ord('\\'):
|
||||
v = f"'{chr(c)}'"
|
||||
else:
|
||||
v = c
|
||||
r += [ v ]
|
||||
|
||||
return r
|
||||
|
||||
@dataclass
|
||||
class PrefixStackEntry:
|
||||
prefix: str
|
||||
table_id: int
|
||||
|
||||
@dataclass
|
||||
class AlphaFixup:
|
||||
table_id: int
|
||||
char: int
|
||||
|
||||
@dataclass
|
||||
class StringFixup:
|
||||
table_id: int
|
||||
string_index: int
|
||||
super_table_id: int
|
||||
super_offset: int
|
||||
|
||||
# Remove entity strings without trailing semicolon
|
||||
keys = (key for key in ents.keys() if key.endswith(';'))
|
||||
|
||||
# Sort entity strings
|
||||
keys = sorted(keys, key=lambda k: k[1:-1])
|
||||
|
||||
strings = []
|
||||
tables = []
|
||||
prefix_stack = []
|
||||
alpha_fixups = []
|
||||
string_fixups = []
|
||||
for i in range(64):
|
||||
tables.append([])
|
||||
|
||||
for i, key in enumerate(keys):
|
||||
name = key[1:-1]
|
||||
|
||||
next_name = None
|
||||
if i + 1 < len(keys):
|
||||
next_name = keys[i+1][1:-1]
|
||||
|
||||
while prefix_stack and not name.startswith(prefix_stack[-1].prefix):
|
||||
prefix_stack.pop()
|
||||
|
||||
# First character is initial prefix
|
||||
if not prefix_stack:
|
||||
table_id = len(tables)
|
||||
tables.append([])
|
||||
|
||||
prefix_stack.append(PrefixStackEntry(name[0], table_id))
|
||||
alpha_fixups.append(AlphaFixup(table_id, ord(name[0]) % 64))
|
||||
|
||||
string_index = len(strings)
|
||||
table = tables[prefix_stack[-1].table_id]
|
||||
table_index = len(table)
|
||||
table.append(string_index)
|
||||
|
||||
name_offset = len(prefix_stack[-1].prefix)
|
||||
name_chars = to_cchars(name[name_offset:])
|
||||
repl_chars = to_cchars(ents[key]['characters'])
|
||||
semicolon_flag = 0
|
||||
if key[:-1] in ents:
|
||||
semicolon_flag = 0x80
|
||||
|
||||
if next_name and next_name.startswith(name):
|
||||
# Create subtable
|
||||
|
||||
strings += [
|
||||
len(name_chars) | semicolon_flag | 0x40, *name_chars,
|
||||
0, 0, # subtable position, to be fixed up
|
||||
len(repl_chars), *repl_chars,
|
||||
]
|
||||
|
||||
table_id = len(tables)
|
||||
tables.append([])
|
||||
|
||||
fixup_index = string_index + 1 + len(name_chars)
|
||||
string_fixups.append(StringFixup(
|
||||
table_id, fixup_index, prefix_stack[-1].table_id, table_index,
|
||||
))
|
||||
|
||||
prefix_stack.append(PrefixStackEntry(name, table_id))
|
||||
else:
|
||||
strings += [
|
||||
len(name_chars) | semicolon_flag, *name_chars,
|
||||
len(repl_chars), *repl_chars,
|
||||
]
|
||||
|
||||
# Concat tables and record ranges
|
||||
ranges = [ 0 ]
|
||||
values = []
|
||||
for table in tables:
|
||||
values += table
|
||||
ranges.append(len(values))
|
||||
|
||||
# Create alpha table
|
||||
alpha = [ 0 ] * (59 * 3)
|
||||
for fixup in alpha_fixups:
|
||||
table_id, c = fixup.table_id, fixup.char
|
||||
start = ranges[table_id]
|
||||
end = ranges[table_id+1]
|
||||
alpha[c*3:c*3+3] = [ start & 0xFF, start >> 8, end - start ]
|
||||
|
||||
# Fix up subtable positions
|
||||
for fixup in string_fixups:
|
||||
table_id, i = fixup.table_id, fixup.string_index
|
||||
start = ranges[table_id]
|
||||
end = ranges[table_id+1]
|
||||
super_index = ranges[fixup.super_table_id] + fixup.super_offset
|
||||
strings[i:i+2] = [ start - super_index, end - start ]
|
||||
|
||||
# Print tables
|
||||
|
||||
def gen_table(ctype, cname, values, fmt, elems_per_line):
|
||||
count = len(values)
|
||||
r = ''
|
||||
|
||||
for i in range(count):
|
||||
if i != 0: r += ','
|
||||
if i % elems_per_line == 0: r += '\n '
|
||||
else: r += ' '
|
||||
r += fmt % values[i]
|
||||
|
||||
return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n'
|
||||
|
||||
print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
|
||||
print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
|
||||
print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))
|
Loading…
x
Reference in New Issue
Block a user