mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-01-26 10:03:34 +03:00
parser: Support custom charset conversion implementations
Implement xmlCtxtSetCharEncConvImpl. I agree that the name is terrible.
This commit is contained in:
parent
c59c24494d
commit
221df37529
@ -452,6 +452,9 @@ struct _xmlParserCtxt {
|
||||
|
||||
xmlResourceLoader resourceLoader XML_DEPRECATED_MEMBER;
|
||||
void *resourceCtxt XML_DEPRECATED_MEMBER;
|
||||
|
||||
xmlCharEncConvImpl convImpl XML_DEPRECATED_MEMBER;
|
||||
void *convCtxt XML_DEPRECATED_MEMBER;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1438,6 +1441,10 @@ XMLPUBFUN void
|
||||
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt,
|
||||
xmlResourceLoader loader,
|
||||
void *vctxt);
|
||||
XMLPUBFUN void
|
||||
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncConvImpl impl,
|
||||
void *vctxt);
|
||||
XMLPUBFUN void
|
||||
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,
|
||||
unsigned maxAmpl);
|
||||
|
@ -376,7 +376,8 @@ XMLPUBFUN xmlParserInputPtr
|
||||
xmlInputCreateIO(const char *url, xmlInputReadCallback ioRead,
|
||||
xmlInputCloseCallback ioClose, void *ioCtxt, int flags);
|
||||
XMLPUBFUN int
|
||||
xmlInputSetEncoding(xmlParserInputPtr input, const char *encoding);
|
||||
xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
|
||||
/**
|
||||
* Namespaces.
|
||||
|
@ -1078,9 +1078,34 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* xmlCtxtSetCharEncConvImpl:
|
||||
* @ctxt: parser context
|
||||
* @impl: callback
|
||||
* @vctxt: user data
|
||||
*
|
||||
* Installs a custom implementation to convert between character
|
||||
* encodings.
|
||||
*
|
||||
* This bypasses legacy feature like global encoding handlers or
|
||||
* encoding aliases.
|
||||
*
|
||||
* Available since 2.14.0.
|
||||
*/
|
||||
void
|
||||
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
|
||||
void *vctxt) {
|
||||
if (ctxt == NULL)
|
||||
return;
|
||||
|
||||
ctxt->convImpl = impl;
|
||||
ctxt->convCtxt = vctxt;
|
||||
}
|
||||
|
||||
static int
|
||||
xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
|
||||
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
|
||||
xmlChar out[200];
|
||||
xmlParserInputPtr input = ctxt->input;
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
int inlen, outlen, res, i;
|
||||
|
||||
@ -1088,9 +1113,10 @@ xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
|
||||
|
||||
/*
|
||||
* To detect the EBCDIC code page, we convert the first 200 bytes
|
||||
* to EBCDIC-US and try to find the encoding declaration.
|
||||
* to IBM037 (EBCDIC-US) and try to find the encoding declaration.
|
||||
*/
|
||||
res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
|
||||
res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
|
||||
ctxt->convImpl, ctxt->convCtxt, &handler);
|
||||
if (res != 0)
|
||||
return(res);
|
||||
outlen = sizeof(out) - 1;
|
||||
@ -1133,8 +1159,9 @@ xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
|
||||
break;
|
||||
out[i] = 0;
|
||||
xmlCharEncCloseFunc(handler);
|
||||
res = xmlOpenCharEncodingHandler((char *) out + start,
|
||||
/* output */ 0, &handler);
|
||||
res = xmlCreateCharEncodingHandler((char *) out + start,
|
||||
/* output */ 0, ctxt->convImpl, ctxt->convCtxt,
|
||||
&handler);
|
||||
if (res != 0)
|
||||
return(res);
|
||||
*hout = handler;
|
||||
@ -1147,7 +1174,8 @@ done:
|
||||
* Encoding handlers are stateful, so we have to recreate them.
|
||||
*/
|
||||
xmlCharEncCloseFunc(handler);
|
||||
res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
|
||||
res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
|
||||
ctxt->convImpl, ctxt->convCtxt, &handler);
|
||||
if (res != 0)
|
||||
return(res);
|
||||
*hout = handler;
|
||||
@ -1184,7 +1212,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
res = 0;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
res = xmlDetectEBCDIC(ctxt->input, &handler);
|
||||
res = xmlDetectEBCDIC(ctxt, &handler);
|
||||
break;
|
||||
default:
|
||||
res = xmlLookupCharEncodingHandler(enc, &handler);
|
||||
@ -1224,7 +1252,8 @@ xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
if (encoding == NULL)
|
||||
return(-1);
|
||||
|
||||
res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
|
||||
res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
|
||||
ctxt->convImpl, ctxt->convCtxt, &handler);
|
||||
if (res != 0) {
|
||||
xmlFatalErr(ctxt, res, encoding);
|
||||
return(-1);
|
||||
@ -1267,7 +1296,7 @@ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
|
||||
*
|
||||
* Returns an xmlParserErrors code.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
||||
xmlCharEncodingHandlerPtr handler) {
|
||||
int nbchars;
|
||||
@ -1341,33 +1370,6 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
||||
return(XML_ERR_OK);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlInputSetEncoding:
|
||||
* @input: the input stream
|
||||
* @encoding: the encoding name
|
||||
*
|
||||
* Use specified encoding to decode input data. This overrides the
|
||||
* encoding found in the XML declaration.
|
||||
*
|
||||
* Available since 2.14.0.
|
||||
*
|
||||
* Returns an xmlParserErrors code.
|
||||
*/
|
||||
int
|
||||
xmlInputSetEncoding(xmlParserInputPtr input, const char *encoding) {
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
int res;
|
||||
|
||||
if (encoding == NULL)
|
||||
return(XML_ERR_ARGUMENT);
|
||||
|
||||
res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
|
||||
if (res != 0)
|
||||
return(res);
|
||||
|
||||
return(xmlInputSetEncodingHandler(input, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchInputEncoding:
|
||||
* @ctxt: the parser context, only for error reporting
|
||||
@ -2260,8 +2262,15 @@ xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
|
||||
if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
|
||||
(xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
|
||||
encoding = xmlNanoHTTPEncoding(input->buf->context);
|
||||
if (encoding != NULL)
|
||||
xmlInputSetEncoding(input, encoding);
|
||||
if (encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
int res;
|
||||
|
||||
res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
|
||||
&handler);
|
||||
if (res == 0)
|
||||
xmlInputSetEncodingHandler(input, handler);
|
||||
}
|
||||
}
|
||||
|
||||
redir = xmlNanoHTTPRedir(input->buf->context);
|
||||
|
32
testapi.c
32
testapi.c
@ -15876,38 +15876,6 @@ test_xmlInputCreateUrl(void) {
|
||||
static int
|
||||
test_xmlInputSetEncoding(void) {
|
||||
int test_ret = 0;
|
||||
|
||||
int mem_base;
|
||||
int ret_val;
|
||||
xmlParserInputPtr input; /* the input stream */
|
||||
int n_input;
|
||||
const char * encoding; /* the encoding name */
|
||||
int n_encoding;
|
||||
|
||||
for (n_input = 0;n_input < gen_nb_xmlParserInputPtr;n_input++) {
|
||||
for (n_encoding = 0;n_encoding < gen_nb_const_char_ptr;n_encoding++) {
|
||||
mem_base = xmlMemBlocks();
|
||||
input = gen_xmlParserInputPtr(n_input, 0);
|
||||
encoding = gen_const_char_ptr(n_encoding, 1);
|
||||
|
||||
ret_val = xmlInputSetEncoding(input, encoding);
|
||||
desret_int(ret_val);
|
||||
call_tests++;
|
||||
des_xmlParserInputPtr(n_input, input, 0);
|
||||
des_const_char_ptr(n_encoding, encoding, 1);
|
||||
xmlResetLastError();
|
||||
if (mem_base != xmlMemBlocks()) {
|
||||
printf("Leak of %d blocks found in xmlInputSetEncoding",
|
||||
xmlMemBlocks() - mem_base);
|
||||
test_ret++;
|
||||
printf(" %d", n_input);
|
||||
printf(" %d", n_encoding);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
function_tests++;
|
||||
|
||||
return(test_ret);
|
||||
}
|
||||
|
||||
|
84
testparser.c
84
testparser.c
@ -518,6 +518,89 @@ testBuildRelativeUri(void) {
|
||||
return err;
|
||||
}
|
||||
|
||||
static int charEncConvImplError;
|
||||
|
||||
static int
|
||||
rot13Convert(void *vctxt, unsigned char *out, int *outlen,
|
||||
const unsigned char *in, int *inlen) {
|
||||
int *ctxt = vctxt;
|
||||
int inSize = *inlen;
|
||||
int outSize = *outlen;
|
||||
int rot, i;
|
||||
|
||||
rot = *ctxt;
|
||||
|
||||
for (i = 0; i < inSize && i < outSize; i++) {
|
||||
int c = in[i];
|
||||
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
c = 'A' + (c - 'A' + rot) % 26;
|
||||
else if (c >= 'a' && c <= 'z')
|
||||
c = 'a' + (c - 'a' + rot) % 26;
|
||||
|
||||
out[i] = c;
|
||||
}
|
||||
|
||||
*inlen = i;
|
||||
*outlen = i;
|
||||
|
||||
return XML_ENC_ERR_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
rot13ConvCtxtDtor(void *vctxt) {
|
||||
xmlFree(vctxt);
|
||||
}
|
||||
|
||||
static int
|
||||
rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name,
|
||||
xmlCharEncConverter *conv) {
|
||||
int *inputCtxt;
|
||||
|
||||
if (strcmp(name, "rot13") != 0) {
|
||||
fprintf(stderr, "rot13ConvImpl received wrong name\n");
|
||||
charEncConvImplError = 1;
|
||||
|
||||
return XML_ERR_UNSUPPORTED_ENCODING;
|
||||
}
|
||||
|
||||
conv->convert = rot13Convert;
|
||||
conv->ctxtDtor = rot13ConvCtxtDtor;
|
||||
|
||||
inputCtxt = xmlMalloc(sizeof(*inputCtxt));
|
||||
*inputCtxt = 13;
|
||||
conv->inputCtxt = inputCtxt;
|
||||
|
||||
return XML_ERR_OK;
|
||||
}
|
||||
|
||||
static int
|
||||
testCharEncConvImpl(void) {
|
||||
xmlParserCtxtPtr ctxt;
|
||||
xmlDocPtr doc;
|
||||
xmlNodePtr root;
|
||||
int err = 0;
|
||||
|
||||
ctxt = xmlNewParserCtxt();
|
||||
xmlCtxtSetCharEncConvImpl(ctxt, rot13ConvImpl, NULL);
|
||||
charEncConvImplError = 0;
|
||||
doc = xmlCtxtReadDoc(ctxt, BAD_CAST "<?kzy irefvba='1.0'?><qbp/>", NULL,
|
||||
"rot13", 0);
|
||||
if (charEncConvImplError)
|
||||
err = 1;
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
|
||||
root = xmlDocGetRootElement(doc);
|
||||
if (root == NULL || strcmp((char *) root->name, "doc") != 0) {
|
||||
fprintf(stderr, "testCharEncConvImpl failed\n");
|
||||
err = 1;
|
||||
}
|
||||
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
main(void) {
|
||||
int err = 0;
|
||||
@ -546,6 +629,7 @@ main(void) {
|
||||
err |= testWriterClose();
|
||||
#endif
|
||||
err |= testBuildRelativeUri();
|
||||
err |= testCharEncConvImpl();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user