1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 03:55:04 +03:00

html: Parse named character references according to HTML5

This commit is contained in:
Nick Wellnhofer 2024-09-03 15:52:44 +02:00
parent d5cd0f07f8
commit 5951179239
25 changed files with 23265 additions and 432 deletions

View File

@ -2620,6 +2620,111 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
return(ret);
}
#include "html5ent.inc"
#define ENT_F_SEMICOLON 0x80u
#define ENT_F_SUBTABLE 0x40u
#define ENT_F_ALL 0xC0u
static const xmlChar *
htmlFindEntityPrefix(const xmlChar *string, size_t slen, int isAttr,
int *nlen, int *rlen) {
const xmlChar *match = NULL;
unsigned left, right;
int first = string[0];
size_t matchLen = 0;
size_t soff = 1;
if (slen < 2)
return(NULL);
if (((first < 'A') || (first > 'Z')) &&
((first < 'a') || (first > 'z')))
return(NULL);
/*
* Look up range by first character
*/
first &= 63;
left = htmlEntAlpha[first*3] | htmlEntAlpha[first*3+1] << 8;
right = left + htmlEntAlpha[first*3+2];
/*
* Binary search
*/
while (left < right) {
const xmlChar *bytes;
unsigned mid;
size_t len;
int cmp;
mid = left + (right - left) / 2;
bytes = htmlEntStrings + htmlEntValues[mid];
len = bytes[0] & ~ENT_F_ALL;
cmp = string[soff] - bytes[1];
if (cmp == 0) {
if (slen < len) {
cmp = strncmp((const char *) string + soff + 1,
(const char *) bytes + 2,
slen - 1);
/* Prefix can never match */
if (cmp == 0)
break;
} else {
cmp = strncmp((const char *) string + soff + 1,
(const char *) bytes + 2,
len - 1);
}
}
if (cmp < 0) {
right = mid;
} else if (cmp > 0) {
left = mid + 1;
} else {
int term = soff + len < slen ? string[soff + len] : 0;
int isAlnum, isTerm;
isAlnum = (((term >= 'A') && (term <= 'Z')) ||
((term >= 'a') && (term <= 'z')) ||
((term >= '0') && (term <= '9')));
isTerm = ((term == ';') ||
((bytes[0] & ENT_F_SEMICOLON) &&
((!isAttr) ||
((!isAlnum) && (term != '=')))));
if (isTerm) {
match = bytes + len + 1;
matchLen = soff + len;
if (term == ';')
matchLen += 1;
}
if (bytes[0] & ENT_F_SUBTABLE) {
if (isTerm)
match += 2;
if ((isAlnum) && (soff + len < slen)) {
left = mid + bytes[len + 1];
right = left + bytes[len + 2];
soff += len;
continue;
}
}
break;
}
}
if (match == NULL)
return(NULL);
*nlen = matchLen;
*rlen = match[0];
return(match + 1);
}
/**
* htmlParseHTMLAttribute:
@ -2640,9 +2745,6 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
XML_MAX_HUGE_LENGTH :
XML_MAX_TEXT_LENGTH;
xmlChar *out = NULL;
const xmlChar *name = NULL;
const xmlChar *cur = NULL;
const htmlEntityDesc * ent;
/*
* allocate a translation buffer.
@ -2662,6 +2764,16 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
(CUR != 0) && (CUR != stop)) {
if ((stop == 0) && (CUR == '>')) break;
if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
growBuffer(buffer);
out = &buffer[indx];
}
GROW;
if (CUR == '&') {
if (NXT(1) == '#') {
unsigned int c;
@ -2680,70 +2792,28 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
for ( ; bits >= 0; bits-= 6) {
*out++ = ((c >> bits) & 0x3F) | 0x80;
}
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
growBuffer(buffer);
out = &buffer[indx];
}
} else {
ent = htmlParseEntityRef(ctxt, &name);
if (name == NULL) {
*out++ = '&';
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
const xmlChar *repl;
int nameLen, replLen;
growBuffer(buffer);
out = &buffer[indx];
}
} else if (ent == NULL) {
*out++ = '&';
cur = name;
while (*cur != 0) {
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
SKIP(1);
repl = htmlFindEntityPrefix(CUR_PTR,
ctxt->input->end - CUR_PTR,
/* isAttr */ 1,
&nameLen, &replLen);
growBuffer(buffer);
out = &buffer[indx];
}
*out++ = *cur++;
}
if (repl == NULL) {
*out++ = '&';
} else {
unsigned int c;
int bits;
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
growBuffer(buffer);
out = &buffer[indx];
}
c = ent->value;
if (c < 0x80)
{ *out++ = c; bits= -6; }
else if (c < 0x800)
{ *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000)
{ *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else
{ *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits >= 0; bits-= 6) {
*out++ = ((c >> bits) & 0x3F) | 0x80;
}
memcpy(out, repl, replLen);
out += replLen;
SKIP(nameLen);
}
}
} else {
unsigned int c;
int bits, l;
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
growBuffer(buffer);
out = &buffer[indx];
}
c = CUR_CHAR(l);
if (c < 0x80)
{ *out++ = c; bits= -6; }
@ -4086,9 +4156,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
*/
static void
htmlParseReference(htmlParserCtxtPtr ctxt) {
const htmlEntityDesc * ent;
xmlChar out[6];
const xmlChar *name;
if (CUR != '&') return;
if (NXT(1) == '#') {
@ -4113,43 +4181,25 @@ htmlParseReference(htmlParserCtxtPtr ctxt) {
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, out, i);
} else {
ent = htmlParseEntityRef(ctxt, &name);
if (name == NULL) {
htmlCheckParagraph(ctxt);
const xmlChar *repl;
int nameLen, replLen;
htmlCheckParagraph(ctxt);
SKIP(1);
repl = htmlFindEntityPrefix(CUR_PTR,
ctxt->input->end - CUR_PTR,
/* isAttr */ 0,
&nameLen, &replLen);
if (repl == NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
return;
}
if ((ent == NULL) || !(ent->value > 0)) {
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
}
} else {
unsigned int c;
int bits, i = 0;
c = ent->value;
if (c < 0x80)
{ out[i++]= c; bits= -6; }
else if (c < 0x800)
{ out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000)
{ out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else
{ out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits >= 0; bits-= 6) {
out[i++]= ((c >> bits) & 0x3F) | 0x80;
}
out[i] = 0;
htmlCheckParagraph(ctxt);
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData, out, i);
}
ctxt->sax->characters(ctxt->userData, repl, replLen);
SKIP(nameLen);
}
}
}

1607
html5ent.inc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,2 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body><p>&amp;&Ugrave;</p></body></html>
<html><body><p>&amp;j&Ugrave;</p></body></html>

View File

@ -1,3 +0,0 @@
./test/HTML/758518-entity.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
Ù
^

View File

@ -1,11 +1,10 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&amp;, 1)
SAX.characters(&Ugrave;, 2)
SAX.characters(j&Ugrave;, 3)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)

View File

@ -1,3 +1,3 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body><p>&amp;&ecirc;
<html><body><p>&amp;:&ecirc;
</p></body></html>

View File

@ -1,3 +0,0 @@
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
ê
^

View File

@ -1,13 +1,11 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&amp;, 1)
SAX.characters(&ecirc;, 2)
SAX.characters(
, 1)
SAX.characters(:&ecirc;
, 4)
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)

View File

@ -0,0 +1,8 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<body>
<a href="index.cgi?a&amp;lt=1&amp;gt=2">link</a>
<a href="index.cgi?a&amp;lta&amp;gta">link</a>
<a href="index.cgi?a&lt;&gt;">link</a>
</body>
</html>

View File

@ -0,0 +1,30 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.characters(
, 1)
SAX.startElement(body)
SAX.characters(
, 1)
SAX.startElement(a, href='index.cgi?a&amp;lt=1&amp;gt=2')
SAX.characters(link, 4)
SAX.endElement(a)
SAX.characters(
, 1)
SAX.startElement(a, href='index.cgi?a&amp;lta&amp;gta')
SAX.characters(link, 4)
SAX.endElement(a)
SAX.characters(
, 1)
SAX.startElement(a, href='index.cgi?a&lt;&gt;')
SAX.characters(link, 4)
SAX.endElement(a)
SAX.characters(
, 1)
SAX.endElement(body)
SAX.characters(
, 1)
SAX.endElement(html)
SAX.characters(
, 1)
SAX.endDocument()

View File

@ -1,15 +1,6 @@
./test/HTML/doc3.htm:10: HTML parser error : Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN
^
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&id
^
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media
^
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id
^
./test/HTML/doc3.htm:148: HTML parser error : Unexpected end tag : p
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P>
^
@ -19,12 +10,6 @@ _top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media
./test/HTML/doc3.htm:236: HTML parser error : Unexpected end tag : a
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
^
./test/HTML/doc3.htm:747: HTML parser error : htmlParseEntityRef: expecting ';'
er=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid
^
./test/HTML/doc3.htm:747: HTML parser error : htmlParseEntityRef: expecting ';'
Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asid
^
./test/HTML/doc3.htm:747: HTML parser error : Unexpected end tag : li
light.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI>
^

View File

@ -85,10 +85,7 @@ SAX.comment( © 2000 GameSquad.net All Rights Reserved. )
SAX.startElement(iframe, border='0', frameborder='no', height='60', marginheight='0', marginwidth='0', scrolling='no', src='doc3_files/adcycle.htm', width='468')
SAX.characters(
, 1)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&amp;id=1', target='_top')
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(img, src='http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&amp;media=1&amp;id=1', width='468', height='60', border='0', alt='GSN ROS Ad')
SAX.endElement(img)
SAX.endElement(a)
@ -2567,8 +2564,6 @@ SAX.endElement(font)
SAX.startElement(center)
SAX.startElement(iframe, frameborder='0', height='60', marginheight='0', marginwidth='0', noresize, scrolling='no', src='doc3_files/ad_iframe.htm', width='468')
SAX.startElement(a, href='http://ads.adflight.com/go_static.asp?asid=7708', target='_top')
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(img, width='468', height='60', border='0', alt='Advertisement', src='http://ads.adflight.com/ad_static.asp?pid=2097&amp;sid=1881&amp;asid=7708')
SAX.endElement(img)
SAX.endElement(a)

View File

@ -1,12 +0,0 @@
./test/HTML/entities.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
<p tst="a&amp;b" tst2="a&b" tst3="a & b">
^
./test/HTML/entities.html:1: HTML parser error : htmlParseEntityRef: no name
<p tst="a&amp;b" tst2="a&b" tst3="a & b">
^
./test/HTML/entities.html:3: HTML parser error : htmlParseEntityRef: expecting ';'
a&b
^
./test/HTML/entities.html:4: HTML parser error : htmlParseEntityRef: no name
a & b
^

View File

@ -2,20 +2,15 @@ SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.startElement(body)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: no name
SAX.startElement(p, tst='a&amp;b', tst2='a&amp;b', tst3='a &amp; b')
SAX.characters(
a, 2)
SAX.characters(&amp;, 1)
SAX.characters(b
a, 3)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.characters(&amp;, 1)
SAX.characters(b, 1)
SAX.characters(
a , 3)
SAX.error: htmlParseEntityRef: no name
SAX.characters(b
a , 4)
SAX.characters(&amp;, 1)
SAX.characters( b
, 3)

View File

@ -1,3 +0,0 @@
./test/HTML/fp40.htm:153: HTML parser error : htmlParseEntityRef: no name
technical articles from Microsoft's extensive Knowledge Base, FAQs, & troublesho
^

View File

@ -422,7 +422,6 @@ SAX.characters(
, 2)
SAX.startElement(p)
SAX.characters(For further technical informat, 254)
SAX.error: htmlParseEntityRef: no name
SAX.characters(&amp;, 1)
SAX.characters( troubleshooters to find
fast, 302)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,3 @@
./test/HTML/utf8bug.html:45: HTML parser error : htmlParseEntityRef: expecting ';'
<img src="showimage.aspx?path=Files_Upload\192.png&width=%>" border="0" />
^
./test/HTML/utf8bug.html:118: HTML parser error : htmlParseEntityRef: expecting ';'
<a href="showimage.aspx?path=Files_Upload\302.JPG&Width=" rel="lightbox" tit
^
./test/HTML/utf8bug.html:119: HTML parser error : htmlParseEntityRef: expecting ';'
<img src="showimage.aspx?path=Files_Upload\302.JPG&Width=220" align="left" b
^
./test/HTML/utf8bug.html:121: HTML parser error : Tag s1 invalid
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
^
./test/HTML/utf8bug.html:177: HTML parser error : htmlParseEntityRef: expecting ';'
ین پاسخ را برای نویسنده مقاله رجانیوز copy&paste
^

View File

@ -146,7 +146,6 @@ SAX.startElement(a, href='RSS2.asp')
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(img, src='showimage.aspx?path=Files_Upload\192.png&amp;width=%&gt;', border='0')
SAX.endElement(img)
SAX.characters(
@ -400,11 +399,9 @@ SAX.startElement(div, class='Image')
SAX.characters(
, 10)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='showimage.aspx?path=Files_Upload\302.JPG&amp;Width=', rel='lightbox', title='&#1588;&#1607;&#1610;&#1585; &#1576;&#1604;&#1575;&#1711;')
SAX.characters(
, 5)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(img, src='showimage.aspx?path=Files_Upload\302.JPG&amp;Width=220', align='left', border='1')
SAX.endElement(img)
SAX.characters(
@ -665,10 +662,8 @@ SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(&#1587;&#1604;&#1575;&#1605; , 834)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.characters(&amp;, 1)
SAX.characters(paste, 5)
SAX.characters( , 1)
SAX.characters(paste , 6)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(

View File

@ -1,201 +1,15 @@
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
<FORM METHOD=GET ACTION="http://nsads.hotwired.com/event.ng/Type=click&ProfileID
^
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
D=GET ACTION="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID
^
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
N="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID
^
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
s.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID
^
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID
^
./test/HTML/wired.html:6: HTML parser error : htmlParseEntityRef: expecting ';'
pe=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID=2684&TagValues
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
" align="RIGHT"><a href="http://nsads.hotwired.com/event.ng/Type=click&ProfileID
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
GHT"><a href="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
f="http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
s.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID
^
./test/HTML/wired.html:52: HTML parser error : htmlParseEntityRef: expecting ';'
pe=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID=3228&TagValues
^
./test/HTML/wired.html:70: HTML parser error : Tag nobr invalid
<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static
^
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
^
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
ction=FilterSearch&Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection
^
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode
^
./test/HTML/wired.html:89: HTML parser error : htmlParseEntityRef: expecting ';'
ter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode=Internet&Query
^
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
^
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
tion=FilterSearch&Filter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection
^
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
lter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode
^
./test/HTML/wired.html:90: HTML parser error : htmlParseEntityRef: expecting ';'
r.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode=Internet&Query
^
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
="http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&Action
^
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
ired.com/search97/s97.vts?collection=webmonkey_guides&Action=FilterSearch&filter
^
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
ction=webmonkey_guides&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
ilterSearch&filter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode
^
./test/HTML/wired.html:91: HTML parser error : htmlParseEntityRef: expecting ';'
ter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode=Internet&Query
^
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
on value="http://search.hotwired.com/search97/s97.vts?collection=hotwired&Action
^
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
rch.hotwired.com/search97/s97.vts?collection=hotwired&Action=FilterSearch&filter
^
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
ts?collection=hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
ilterSearch&filter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode
^
./test/HTML/wired.html:92: HTML parser error : htmlParseEntityRef: expecting ';'
ter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode=Internet&Query
^
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
^
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
ction=FilterSearch&Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection
^
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode
^
./test/HTML/wired.html:93: HTML parser error : htmlParseEntityRef: expecting ';'
ter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode=Internet&Query
^
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
on value="http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter
^
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
d.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
tion=FilterSearch&Filter=docs_filter.hts&ResultTemplate=animation.hts&Collection
^
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
lter=docs_filter.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode
^
./test/HTML/wired.html:94: HTML parser error : htmlParseEntityRef: expecting ';'
r.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode=Internet&Query
^
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
option value="http://search.hotwired.com/search97/s97.vts?collection=suck&Action
^
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
/search.hotwired.com/search97/s97.vts?collection=suck&Action=FilterSearch&filter
^
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
97.vts?collection=suck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
uck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode
^
./test/HTML/wired.html:95: HTML parser error : htmlParseEntityRef: expecting ';'
erSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode=Internet&Query
^
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
lue="http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&Action
^
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
otwired.com/search97/s97.vts?collection=uber_hotwired&Action=FilterSearch&filter
^
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
llection=uber_hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate
^
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
n=FilterSearch&filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode
^
./test/HTML/wired.html:96: HTML parser error : htmlParseEntityRef: expecting ';'
filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode=Internet&Query
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
<option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&O
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs
^
./test/HTML/wired.html:97: HTML parser error : htmlParseEntityRef: expecting ';'
lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&MT
^
./test/HTML/wired.html:170: HTML parser error : Unexpected end tag : form
</tr> </form>
^
./test/HTML/wired.html:248: HTML parser error : htmlParseEntityRef: expecting ';'
MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_search
^
./test/HTML/wired.html:265: HTML parser error : Unexpected end tag : form
</tr> </form>
^
./test/HTML/wired.html:346: HTML parser error : Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:374: HTML parser error : htmlParseEntityRef: no name
a, sans-serif"><b><a href="/news/commentarySection/0,1292,31926,00.html">Rants &
^
./test/HTML/wired.html:374: HTML parser error : Opening and ending tag mismatch: td and font
Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br> </td
^
@ -205,15 +19,6 @@ Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br> </td
./test/HTML/wired.html:402: HTML parser error : Opening and ending tag mismatch: a and font
w.vignette.com/" style="text-decoration:none"><font color="#000000">Vignette</a>
^
./test/HTML/wired.html:407: HTML parser error : htmlParseEntityRef: expecting ';'
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
^
./test/HTML/wired.html:407: HTML parser error : htmlParseEntityRef: expecting ';'
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
^
./test/HTML/wired.html:408: HTML parser error : htmlParseEntityRef: expecting ';'
wired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Spri
^
./test/HTML/wired.html:408: HTML parser error : Opening and ending tag mismatch: a and font
com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
^
@ -250,6 +55,3 @@ com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:432: HTML parser error : htmlParseEntityRef: expecting ';'
href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1">Lycos</a
^

View File

@ -19,12 +19,6 @@ SAX.characters(
, 5)
SAX.startElement(td, valign='top', align='LEFT')
SAX.startElement(table, border='0', cellpadding='0', cellspacing='0', width='468', height='60', bgcolor='#FFFFFF')
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(form, method='GET', action='http://nsads.hotwired.com/event.ng/Type=click&amp;ProfileID=9688&amp;RunID=14074&amp;AdID=22584&amp;GroupID=1&amp;FamilyID=2684&amp;TagValues=8.25.156.159.166.171.172.174.179.180.181.182.183.196.197.199.208.389.412.436.2041.6750.78456.79630.81880&amp;Redirect=http://www.springstreet.com/aa/citysearch.htm', id='form1', name='form1')
SAX.characters(
, 2)
@ -298,12 +292,6 @@ SAX.endElement(td)
SAX.characters(
, 5)
SAX.startElement(td, valign='top', align='RIGHT')
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='http://nsads.hotwired.com/event.ng/Type=click&amp;ProfileID=5597&amp;RunID=17167&amp;AdID=22588&amp;GroupID=1&amp;FamilyID=3228&amp;TagValues=8.25.159.171.172.174.179.180.181.182.183.196.197.199.208.241.389.412.436.2035.6749.6750.70367.78456.79630.81880&amp;Redirect=http:%2F%2Fwww.hp.com%2Fgo%2Foriginal%20', target='_top')
SAX.startElement(img, src='http://static.wired.com/advertising/blipverts/hp_colorinkjet/hp_970c_120x60_6.gif', border='1', height='60', width='120', alt='True to the Original')
SAX.endElement(img)
@ -437,94 +425,46 @@ SAX.startElement(select, name='url')
SAX.characters(
, 4)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&amp;Filter=docs_filter.hts&amp;ResultTemplate=vignette.hts&amp;Collection=vignette&amp;QueryMode=Internet&amp;Query=', selected)
SAX.characters(Wired News, 10)
SAX.endElement(option)
SAX.characters(
, 3)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&amp;Filter=docs_filter.hts&amp;ResultTemplate=webmonkey.hts&amp;Collection=webmonkey&amp;QueryMode=Internet&amp;Query=')
SAX.characters(Webmonkey, 9)
SAX.endElement(option)
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&amp;Action=FilterSearch&amp;filter=docs_filter.hts&amp;ResultTemplate=webmonkey_guides.hts&amp;QueryMode=Internet&amp;Query=')
SAX.characters(Webmonkey Guides, 16)
SAX.endElement(option)
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=hotwired&amp;Action=FilterSearch&amp;filter=docs_filter.hts&amp;ResultTemplate=hotwired_archive.hts&amp;QueryMode=Internet&amp;Query=')
SAX.characters(HotWired Archives, 17)
SAX.endElement(option)
SAX.characters(
, 3)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&amp;Filter=docs_filter.hts&amp;ResultTemplate=magazine.hts&amp;Collection=magazine&amp;QueryMode=Internet&amp;Query=')
SAX.characters(Wired Magazine, 14)
SAX.endElement(option)
SAX.characters(
, 3)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&amp;Filter=docs_filter.hts&amp;ResultTemplate=animation.hts&amp;Collection=animation&amp;QueryMode=Internet&amp;Query=')
SAX.characters(Animation Express, 17)
SAX.endElement(option)
SAX.characters(
, 3)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=suck&amp;Action=FilterSearch&amp;filter=docs_filter.hts&amp;ResultTemplate=suck.hts&amp;QueryMode=Internet&amp;Query=')
SAX.characters(Suck.com, 8)
SAX.endElement(option)
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&amp;Action=FilterSearch&amp;filter=docs_filter.hts&amp;ResultTemplate=uber_hotwired.hts&amp;QueryMode=Internet&amp;Query=')
SAX.characters(All of HotWired, 15)
SAX.endElement(option)
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(option, value='http://www.hotbot.com/?SM=MC&amp;DV=0&amp;LG=any&amp;RD=RG&amp;DC=10&amp;DE=2&amp;_v=2&amp;OPs=MDRTP&amp;MT=')
SAX.characters(The Web -&gt; HotBot, 17)
SAX.endElement(option)
@ -1090,7 +1030,6 @@ SAX.endElement(input)
SAX.characters(
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(img, src='http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&amp;is_search=Y', border='0', align='top')
SAX.endElement(img)
SAX.characters(
@ -1612,7 +1551,6 @@ SAX.startElement(font, size='2', face='Arial,Helvetica, sans-serif')
SAX.startElement(b)
SAX.startElement(a, href='/news/commentarySection/0,1292,31926,00.html')
SAX.characters(Rants , 6)
SAX.error: htmlParseEntityRef: no name
SAX.characters(&amp;, 1)
SAX.characters( Raves, 6)
SAX.endElement(a)
@ -1948,9 +1886,6 @@ SAX.startElement(br)
SAX.endElement(br)
SAX.startElement(i)
SAX.characters(Sponsored by , 13)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s
ervlet/appservlet?from=/wired/sprint/&amp;template=/security/security.html&amp;SITE=
wired.com&amp;BANNER=Sprint', style='text-decoration:none')
@ -2093,7 +2028,6 @@ SAX.endElement(br)
SAX.endElement(p)
SAX.startElement(li)
SAX.characters(More from , 10)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&amp;lpv=1')
SAX.characters(Lycos, 5)
SAX.endElement(a)

7
test/HTML/attr-ents.html Normal file
View File

@ -0,0 +1,7 @@
<html>
<body>
<a href="index.cgi?a&lt=1&gt=2">link</a>
<a href="index.cgi?a&lta&gta">link</a>
<a href="index.cgi?a&lt&gt">link</a>
</body>
</html>

File diff suppressed because it is too large Load Diff

169
tools/genHtmlEnt.py Executable file
View File

@ -0,0 +1,169 @@
#!/usr/bin/env python3
import json
import sys
from dataclasses import dataclass
# The basic idea is to find named character references using binary
# search. Since entity strings may not have a terminator, this doesn't
# work if one entity string is a prefix of another. In this case,
# we branch to a subtable after matching the prefix.
#
# We create separate initial tables based on the first character
# of the entity name.
#
# The following tables are generated:
#
# htmlEntAlpha: start and end of initial tables, indexing into
# htmlEntValues
# htmlEntValues: concatenation of all table values, which index into
# htmlEntStrings
# htmlEntStrings: variable sized records containing entity name,
# replacement and optionally the position of a
# subtable
try:
with open('entities.json') as json_data:
ents = json.load(json_data)
except FileNotFoundError:
print('entities.json not found, try curl -LJO',
'https://html.spec.whatwg.org/entities.json')
sys.exit(1)
def to_cchars(s):
r = []
for c in s.encode():
if c >= 0x20 and c <= 0x7E and c != ord("'") and c != ord('\\'):
v = f"'{chr(c)}'"
else:
v = c
r += [ v ]
return r
@dataclass
class PrefixStackEntry:
prefix: str
table_id: int
@dataclass
class AlphaFixup:
table_id: int
char: int
@dataclass
class StringFixup:
table_id: int
string_index: int
super_table_id: int
super_offset: int
# Remove entity strings without trailing semicolon
keys = (key for key in ents.keys() if key.endswith(';'))
# Sort entity strings
keys = sorted(keys, key=lambda k: k[1:-1])
strings = []
tables = []
prefix_stack = []
alpha_fixups = []
string_fixups = []
for i in range(64):
tables.append([])
for i, key in enumerate(keys):
name = key[1:-1]
next_name = None
if i + 1 < len(keys):
next_name = keys[i+1][1:-1]
while prefix_stack and not name.startswith(prefix_stack[-1].prefix):
prefix_stack.pop()
# First character is initial prefix
if not prefix_stack:
table_id = len(tables)
tables.append([])
prefix_stack.append(PrefixStackEntry(name[0], table_id))
alpha_fixups.append(AlphaFixup(table_id, ord(name[0]) % 64))
string_index = len(strings)
table = tables[prefix_stack[-1].table_id]
table_index = len(table)
table.append(string_index)
name_offset = len(prefix_stack[-1].prefix)
name_chars = to_cchars(name[name_offset:])
repl_chars = to_cchars(ents[key]['characters'])
semicolon_flag = 0
if key[:-1] in ents:
semicolon_flag = 0x80
if next_name and next_name.startswith(name):
# Create subtable
strings += [
len(name_chars) | semicolon_flag | 0x40, *name_chars,
0, 0, # subtable position, to be fixed up
len(repl_chars), *repl_chars,
]
table_id = len(tables)
tables.append([])
fixup_index = string_index + 1 + len(name_chars)
string_fixups.append(StringFixup(
table_id, fixup_index, prefix_stack[-1].table_id, table_index,
))
prefix_stack.append(PrefixStackEntry(name, table_id))
else:
strings += [
len(name_chars) | semicolon_flag, *name_chars,
len(repl_chars), *repl_chars,
]
# Concat tables and record ranges
ranges = [ 0 ]
values = []
for table in tables:
values += table
ranges.append(len(values))
# Create alpha table
alpha = [ 0 ] * (59 * 3)
for fixup in alpha_fixups:
table_id, c = fixup.table_id, fixup.char
start = ranges[table_id]
end = ranges[table_id+1]
alpha[c*3:c*3+3] = [ start & 0xFF, start >> 8, end - start ]
# Fix up subtable positions
for fixup in string_fixups:
table_id, i = fixup.table_id, fixup.string_index
start = ranges[table_id]
end = ranges[table_id+1]
super_index = ranges[fixup.super_table_id] + fixup.super_offset
strings[i:i+2] = [ start - super_index, end - start ]
# Print tables
def gen_table(ctype, cname, values, fmt, elems_per_line):
count = len(values)
r = ''
for i in range(count):
if i != 0: r += ','
if i % elems_per_line == 0: r += '\n '
else: r += ' '
r += fmt % values[i]
return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n'
print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))