1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

r2159: converted samba4 over to UTF-16.

I had previously thought this was unnecessary, as windows doesn't use
standards compliant UTF-16, and for filesystem operations treats bytes
as UCS-2, but Bjoern Jacke has pointed out to me that this means we
don't correctly store extended UTF-16 characters as UTF-8 on
disk. This can be seen with (for example) the gothic characters with
codepoints above 64k.

This commit also adds a LOCAL-ICONV torture test that tests the first
1 million codepoints against the system iconv library, and tests 5
million random UTF-16LE buffers for identical error handling to the
system iconv library.

the lib/iconv.c changes need backporting to samba3
(This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)
This commit is contained in:
Andrew Tridgell 2004-09-01 04:39:06 +00:00 committed by Gerald (Jerry) Carter
parent 48f3df41bb
commit 31c1c7846f
13 changed files with 542 additions and 98 deletions

View File

@ -20,7 +20,7 @@
*/
/* this defines the charset types used in samba */
typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UCS2BE=5} charset_t;
typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
#define NUM_CHARSETS 6

View File

@ -47,12 +47,12 @@ static const char *charset_name(charset_t ch)
{
const char *ret = NULL;
if (ch == CH_UCS2) ret = "UTF-16LE";
if (ch == CH_UTF16) ret = "UTF-16LE";
else if (ch == CH_UNIX) ret = lp_unix_charset();
else if (ch == CH_DOS) ret = lp_dos_charset();
else if (ch == CH_DISPLAY) ret = lp_display_charset();
else if (ch == CH_UTF8) ret = "UTF8";
else if (ch == CH_UCS2BE) ret = "UCS-2BE";
else if (ch == CH_UTF16BE) ret = "UTF-16BE";
if (!ret || !*ret) ret = "ASCII";
return ret;
@ -81,13 +81,13 @@ void init_iconv(void)
/* so that charset_name() works we need to get the UNIX<->UCS2 going
first */
if (!conv_handles[CH_UNIX][CH_UCS2])
conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2),
if (!conv_handles[CH_UNIX][CH_UTF16])
conv_handles[CH_UNIX][CH_UTF16] = smb_iconv_open(charset_name(CH_UTF16),
"ASCII");
if (!conv_handles[CH_UCS2][CH_UNIX])
conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII",
charset_name(CH_UCS2));
if (!conv_handles[CH_UTF16][CH_UNIX])
conv_handles[CH_UTF16][CH_UNIX] = smb_iconv_open("ASCII",
charset_name(CH_UTF16));
for (c1=0;c1<NUM_CHARSETS;c1++) {
for (c2=0;c2<NUM_CHARSETS;c2++) {
@ -293,7 +293,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
size_t size;
smb_ucs2_t *buffer;
size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
(void **) &buffer);
if (size == -1) {
smb_panic("failed to create UCS2 buffer");
@ -303,7 +303,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
return srclen;
}
size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
free(buffer);
return size;
}
@ -313,7 +313,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
size_t size;
smb_ucs2_t *buffer;
size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
(void **) &buffer);
if (size == -1) {
smb_panic("failed to create UCS2 buffer");
@ -322,7 +322,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
free(buffer);
return srclen;
}
size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
free(buffer);
return size;
}
@ -461,7 +461,7 @@ ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest
/* ucs2 is always a multiple of 2 bytes */
dest_len &= ~1;
len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
len += convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
return len;
}
@ -480,7 +480,7 @@ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
size_t src_len = strlen(src)+1;
*dest = NULL;
return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest);
return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (const void **)dest);
}
@ -498,7 +498,7 @@ ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
size_t src_len = strlen(src)+1;
*dest = NULL;
return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
return convert_string_allocate(CH_UNIX, CH_UTF16, src, src_len, (void **)dest);
}
/**
@ -603,7 +603,7 @@ size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_
if (src_len != (size_t)-1)
src_len &= ~1;
ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
if (dest_len)
dest[MIN(ret, dest_len-1)] = 0;
@ -627,7 +627,7 @@ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
{
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
*dest = NULL;
return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest);
return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (const void **)dest);
}
/**
@ -642,7 +642,7 @@ ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
{
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
*dest = NULL;
return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
return convert_string_allocate(CH_UTF16, CH_UNIX, src, src_len, dest);
}
/**

View File

@ -39,8 +39,8 @@
*
* Note that the only function provided by iconv is conversion between
* characters. It doesn't directly support operations like
* uppercasing or comparison. We have to convert to UCS-2 and compare
* there.
* uppercasing or comparison. We have to convert to UTF-16LE and
* compare there.
*
* @sa Samba Developers Guide
**/
@ -55,10 +55,11 @@ static size_t iconv_copy (void *,const char **, size_t *, char **, size_t *);
static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
static const struct charset_functions const builtin_functions[] = {
/* windows is really neither UCS-2 not UTF-16 */
/* windows is closest to UTF-16 */
{"UCS-2LE", iconv_copy, iconv_copy},
{"UTF-16LE", iconv_copy, iconv_copy},
{"UCS-2BE", iconv_swab, iconv_swab},
{"UTF-16BE", iconv_swab, iconv_swab},
/* we include the UTF-8 alias to cope with differing locale settings */
{"UTF8", utf8_pull, utf8_push},
@ -217,23 +218,23 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
#endif
/* check for conversion to/from ucs2 */
if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
if (strcasecmp(fromcode, "UTF-16LE") == 0 && to) {
ret->direct = to->push;
return ret;
}
if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
if (strcasecmp(tocode, "UTF-16LE") == 0 && from) {
ret->direct = from->pull;
return ret;
}
#ifdef HAVE_NATIVE_ICONV
if (strcasecmp(fromcode, "UCS-2LE") == 0) {
if (strcasecmp(fromcode, "UTF-16LE") == 0) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_push;
ret->cd_push = NULL;
return ret;
}
if (strcasecmp(tocode, "UCS-2LE") == 0) {
if (strcasecmp(tocode, "UTF-16LE") == 0) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_pull;
ret->cd_pull = NULL;
@ -460,100 +461,231 @@ static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
const uint8_t *c = (const uint8_t *)*inbuf;
uint8_t *uc = (uint8_t *)*outbuf;
int len = 1;
size_t in_left=*inbytesleft, out_left=*outbytesleft;
const uint8_t *c = (const uint8_t *)*inbuf;
uint8_t *uc = (uint8_t *)*outbuf;
while (in_left >= 1 && out_left >= 2) {
if ((c[0] & 0x80) == 0) {
uc[0] = c[0];
uc[1] = 0;
} else if ((c[0] & 0xf0) == 0xe0) {
if (*inbytesleft < 3) {
DEBUG(0,("short utf8 char\n"));
goto badseq;
}
uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
uc[0] = (c[1]<<6) | (c[2]&0x3f);
len = 3;
} else if ((c[0] & 0xe0) == 0xc0) {
if (*inbytesleft < 2) {
DEBUG(0,("short utf8 char\n"));
goto badseq;
c += 1;
in_left -= 1;
out_left -= 2;
uc += 2;
continue;
}
if ((c[0] & 0xe0) == 0xc0) {
if (in_left < 2 ||
(c[1] & 0xc0) != 0x80) {
errno = EILSEQ;
goto error;
}
uc[1] = (c[0]>>2) & 0x7;
uc[0] = (c[0]<<6) | (c[1]&0x3f);
len = 2;
c += 2;
in_left -= 2;
out_left -= 2;
uc += 2;
continue;
}
(*inbuf) += len;
(*inbytesleft) -= len;
(*outbytesleft) -= 2;
(*outbuf) += 2;
if ((c[0] & 0xf0) == 0xe0) {
if (in_left < 3 ||
(c[1] & 0xc0) != 0x80 ||
(c[2] & 0xc0) != 0x80) {
errno = EILSEQ;
goto error;
}
uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
uc[0] = (c[1]<<6) | (c[2]&0x3f);
c += 3;
in_left -= 3;
out_left -= 2;
uc += 2;
continue;
}
if ((c[0] & 0xf8) == 0xf0) {
unsigned int codepoint;
if (in_left < 4 ||
(c[1] & 0xc0) != 0x80 ||
(c[2] & 0xc0) != 0x80 ||
(c[3] & 0xc0) != 0x80) {
errno = EILSEQ;
goto error;
}
codepoint =
(c[3]&0x3f) |
((c[2]&0x3f)<<6) |
((c[1]&0x3f)<<12) |
((c[0]&0x7)<<18);
if (codepoint < 0x10000) {
/* accept UTF-8 characters that are not
minimally packed, but pack the result */
uc[0] = (codepoint & 0xFF);
uc[1] = (codepoint >> 8);
c += 4;
in_left -= 4;
out_left -= 2;
uc += 2;
continue;
}
codepoint -= 0x10000;
if (out_left < 4) {
errno = E2BIG;
goto error;
}
uc[0] = (codepoint>>10) & 0xFF;
uc[1] = (codepoint>>18) | 0xd8;
uc[2] = codepoint & 0xFF;
uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
c += 4;
in_left -= 4;
out_left -= 4;
uc += 4;
continue;
}
/* we don't handle 5 byte sequences */
errno = EINVAL;
goto error;
}
if (*inbytesleft > 0) {
if (in_left > 0) {
errno = E2BIG;
return -1;
goto error;
}
*inbytesleft = in_left;
*outbytesleft = out_left;
*inbuf = c;
*outbuf = uc;
return 0;
badseq:
errno = EINVAL;
error:
*inbytesleft = in_left;
*outbytesleft = out_left;
*inbuf = c;
*outbuf = uc;
return -1;
}
static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
uint8_t *c = (uint8_t *)*outbuf;
const uint8_t *uc = (const uint8_t *)*inbuf;
int len=1;
size_t in_left=*inbytesleft, out_left=*outbytesleft;
uint8_t *c = (uint8_t *)*outbuf;
const uint8_t *uc = (const uint8_t *)*inbuf;
if (uc[1] & 0xf8) {
if (*outbytesleft < 3) {
DEBUG(0,("short utf8 write\n"));
goto toobig;
}
c[0] = 0xe0 | (uc[1]>>4);
c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
c[2] = 0x80 | (uc[0]&0x3f);
len = 3;
} else if (uc[1] | (uc[0] & 0x80)) {
if (*outbytesleft < 2) {
DEBUG(0,("short utf8 write\n"));
goto toobig;
}
c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
c[1] = 0x80 | (uc[0]&0x3f);
len = 2;
} else {
while (in_left >= 2 && out_left >= 1) {
unsigned int codepoint;
if (uc[1] == 0 && !(uc[0] & 0x80)) {
/* simplest case */
c[0] = uc[0];
in_left -= 2;
out_left -= 1;
uc += 2;
c += 1;
continue;
}
if ((uc[1]&0xf8) == 0) {
/* next simplest case */
if (out_left < 2) {
errno = E2BIG;
goto error;
}
c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
c[1] = 0x80 | (uc[0] & 0x3f);
in_left -= 2;
out_left -= 2;
uc += 2;
c += 2;
continue;
}
(*inbytesleft) -= 2;
(*outbytesleft) -= len;
(*inbuf) += 2;
(*outbuf) += len;
if ((uc[1] & 0xfc) == 0xdc) {
/* its the second part of a 4 byte sequence. Illegal */
if (in_left < 4) {
errno = EINVAL;
} else {
errno = EILSEQ;
}
goto error;
}
if ((uc[1] & 0xfc) != 0xd8) {
codepoint = uc[0] | (uc[1]<<8);
if (out_left < 3) {
errno = E2BIG;
goto error;
}
c[0] = 0xe0 | (codepoint >> 12);
c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
c[2] = 0x80 | (codepoint & 0x3f);
in_left -= 2;
out_left -= 3;
uc += 2;
c += 3;
continue;
}
/* its the first part of a 4 byte sequence */
if (in_left < 4) {
errno = EINVAL;
goto error;
}
if ((uc[3] & 0xfc) != 0xdc) {
errno = EILSEQ;
goto error;
}
codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
(uc[0]<<10) | ((uc[1] & 0x3)<<18));
if (out_left < 4) {
errno = E2BIG;
goto error;
}
c[0] = 0xf0 | (codepoint >> 18);
c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
c[3] = 0x80 | (codepoint & 0x3f);
in_left -= 4;
out_left -= 4;
uc += 4;
c += 4;
}
if (*inbytesleft == 1) {
if (in_left == 1) {
errno = EINVAL;
return -1;
goto error;
}
if (*inbytesleft > 1) {
if (in_left > 1) {
errno = E2BIG;
return -1;
goto error;
}
*inbytesleft = in_left;
*outbytesleft = out_left;
*inbuf = uc;
*outbuf = c;
return 0;
toobig:
errno = E2BIG;
error:
*inbytesleft = in_left;
*outbytesleft = out_left;
*inbuf = uc;
*outbuf = c;
return -1;
}

View File

@ -96,9 +96,9 @@ static int check_dos_char(smb_ucs2_t c)
char buf[10];
smb_ucs2_t c2 = 0;
int len1, len2;
len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
len1 = convert_string(CH_UTF16, CH_DOS, &c, 2, buf, sizeof(buf));
if (len1 == 0) return 0;
len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
len2 = convert_string(CH_DOS, CH_UTF16, buf, len1, &c2, 2);
if (len2 != 2) return 0;
return (c == c2);
}

View File

@ -526,7 +526,7 @@ static size_t smbcli_req_pull_ucs2(struct smbcli_request *req, TALLOC_CTX *mem_c
return 0;
}
ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
if (ret == -1) {
*dest = NULL;
return 0;
@ -725,7 +725,7 @@ static size_t smbcli_blob_pull_ucs2(TALLOC_CTX* mem_ctx,
src_len2 += 2;
}
ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
if (ret == -1) {
*dest = NULL;
return 0;

View File

@ -421,14 +421,14 @@ NTSTATUS ndr_pull_string(struct ndr_pull *ndr, int ndr_flags, const char **s)
uint32_t len1, ofs, len2;
uint16_t len3;
int ret;
int chset = CH_UCS2;
int chset = CH_UTF16;
if (!(ndr_flags & NDR_SCALARS)) {
return NT_STATUS_OK;
}
if (NDR_BE(ndr)) {
chset = CH_UCS2BE;
chset = CH_UTF16BE;
}
switch (ndr->flags & LIBNDR_STRING_FLAGS) {
@ -600,14 +600,14 @@ NTSTATUS ndr_push_string(struct ndr_push *ndr, int ndr_flags, const char *s)
{
ssize_t s_len, c_len;
int ret;
int chset = CH_UCS2;
int chset = CH_UTF16;
if (!(ndr_flags & NDR_SCALARS)) {
return NT_STATUS_OK;
}
if (NDR_BE(ndr)) {
chset = CH_UCS2BE;
chset = CH_UTF16BE;
}
s_len = s?strlen(s):0;

View File

@ -468,7 +468,7 @@ static size_t req_pull_ucs2(struct smbsrv_request *req, const char **dest, const
src_len2 += 2;
}
ret = convert_string_talloc(req->mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
ret = convert_string_talloc(req->mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
if (ret == -1) {
*dest = NULL;

View File

@ -50,7 +50,7 @@ static NTSTATUS unicode_open(struct smbcli_tree *tree,
}
SSVAL(ucs_name, i*2, 0);
i = convert_string_allocate(CH_UCS2, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
i = convert_string_allocate(CH_UTF16, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
if (i == -1) {
free(ucs_name);
return NT_STATUS_NO_MEMORY;

View File

@ -50,7 +50,7 @@ BOOL torture_utable(int dummy)
SSVAL(&c2, 0, c);
fstrcpy(fname, "\\utable\\x");
p = fname+strlen(fname);
len = convert_string(CH_UCS2, CH_UNIX,
len = convert_string(CH_UTF16, CH_UNIX,
&c2, 2,
p, sizeof(fname)-strlen(fname));
p[len] = 0;
@ -107,7 +107,7 @@ static char *form_name(int c)
p = fname+strlen(fname);
SSVAL(&c2, 0, c);
len = convert_string(CH_UCS2, CH_UNIX,
len = convert_string(CH_UTF16, CH_UNIX,
&c2, 2,
p, sizeof(fname)-strlen(fname));
p[len] = 0;

View File

@ -10,6 +10,8 @@ SMB_SUBSYSTEM_MK(TORTURE_RAP,torture/config.mk)
SMB_SUBSYSTEM_MK(TORTURE_AUTH,torture/config.mk)
SMB_SUBSYSTEM_MK(TORTURE_LOCAL,torture/config.mk)
SMB_SUBSYSTEM_MK(TORTURE_NBENCH,torture/config.mk)
SMB_SUBSYSTEM_MK(TORTURE_LDAP,torture/config.mk)

View File

@ -93,6 +93,16 @@ REQUIRED_SUBSYSTEMS = \
# End SUBSYSTEM TORTURE_AUTH
#################################
#################################
# Start SUBSYSTEM TORTURE_LOCAL
[SUBSYSTEM::TORTURE_LOCAL]
ADD_OBJ_FILES = \
torture/local/iconv.o
REQUIRED_SUBSYSTEMS = \
LIBSMB
# End SUBSYSTEM TORTURE_LOCAL
#################################
#################################
# Start SUBSYSTEM TORTURE_NBENCH
[SUBSYSTEM::TORTURE_NBENCH]
@ -125,6 +135,7 @@ REQUIRED_SUBSYSTEMS = \
TORTURE_RPC \
TORTURE_RAP \
TORTURE_AUTH \
TORTURE_LOCAL \
TORTURE_NBENCH \
TORTURE_LDAP \
CONFIG \

View File

@ -0,0 +1,298 @@
/*
Unix SMB/CIFS implementation.
local testing of iconv routines. This tests the system iconv code against
the built-in iconv code
Copyright (C) Andrew Tridgell 2004
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "includes.h"
/*
generate a UTF-16LE buffer for a given unicode codepoint
*/
static int gen_codepoint(unsigned int codepoint,
char *buf, size_t *size)
{
static iconv_t cd;
uint8_t in[4];
char *ptr_in;
size_t size_in, size_out, ret;
if (!cd) {
cd = iconv_open("UTF-16LE", "UCS-4LE");
}
in[0] = codepoint & 0xFF;
in[1] = (codepoint>>8) & 0xFF;
in[2] = (codepoint>>16) & 0xFF;
in[3] = (codepoint>>24) & 0xFF;
ptr_in = in;
size_in = 4;
size_out = 8;
ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
*size = 8 - size_out;
return ret;
}
/*
work out the unicode codepoint of the first UTF-8 character in the buffer
*/
static unsigned int get_codepoint(char *buf, size_t size)
{
static iconv_t cd;
uint8_t out[4];
char *ptr_out;
size_t size_out, size_in, ret;
if (!cd) {
cd = iconv_open("UCS-4LE", "UTF-8");
}
size_in = size;
ptr_out = out;
size_out = sizeof(out);
memset(out, 0, sizeof(out));
ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
}
/*
display a buffer with name prefix
*/
static void show_buf(const char *name, uint8_t *buf, size_t size)
{
int i;
printf("%s ", name);
for (i=0;i<size;i++) {
printf("%02x ", buf[i]);
}
printf("\n");
}
/*
given a UTF-16LE buffer, test the system and built-in iconv code
to make sure they do exactly the same thing in converting the buffer
to UTF-8, then convert it back again and ensure we get the same buffer
back
*/
static int test_buffer(uint8_t *inbuf, size_t size)
{
uint8_t buf1[1000], buf2[1000], buf3[1000];
size_t outsize1, outsize2, outsize3;
const char *ptr_in;
char *ptr_out;
size_t size_in1, size_in2, size_in3;
size_t ret1, ret2, ret3, len1, len2;
int ok = 1;
int errno1, errno2;
static iconv_t cd;
static smb_iconv_t cd2, cd3;
if (!cd) {
cd = iconv_open("UTF-8", "UTF-16LE");
cd2 = smb_iconv_open("UTF-8", "UTF-16LE");
cd3 = smb_iconv_open("UTF-16LE", "UTF-8");
}
#if 0
int i;
for (i=0;i<50;i++) {
ptr_in = inbuf;
ptr_out = buf1;
size_in1 = size;
outsize1 = sizeof(buf1);
memset(ptr_out, 0, outsize1);
errno = 0;
ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
errno1 = errno;
}
#endif
ptr_in = inbuf;
ptr_out = buf1;
size_in1 = size;
outsize1 = sizeof(buf1);
memset(ptr_out, 0, outsize1);
errno = 0;
ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
errno1 = errno;
ptr_in = inbuf;
ptr_out = buf2;
size_in2 = size;
outsize2 = sizeof(buf2);
memset(ptr_out, 0, outsize2);
errno = 0;
ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
errno2 = errno;
len1 = sizeof(buf1) - outsize1;
len2 = sizeof(buf2) - outsize2;
/* codepoints above 1M are not interesting for now */
if (len2 > len1 &&
memcmp(buf1, buf2, len1) == 0 &&
get_codepoint(buf2+len1, len2-len1) >= (1<<20)) {
return ok;
}
if (len1 > len2 &&
memcmp(buf1, buf2, len2) == 0 &&
get_codepoint(buf1+len2, len1-len2) >= (1<<20)) {
return ok;
}
if (ret1 != ret2) {
printf("ret1=%d ret2=%d\n", ret1, ret2);
ok = 0;
}
if (errno1 != errno2) {
printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
ok = 0;
}
if (outsize1 != outsize2) {
printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
outsize1, outsize2);
ok = 0;
}
if (size_in1 != size_in2) {
printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
size_in1, size_in2);
ok = 0;
}
if (!ok ||
len1 != len2 ||
memcmp(buf1, buf2, len1) != 0) {
printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
show_buf(" IN1:", inbuf, size-size_in1);
show_buf(" IN2:", inbuf, size-size_in2);
show_buf("OUT1:", buf1, len1);
show_buf("OUT2:", buf2, len2);
if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
printf("next codepoint is %u\n", get_codepoint(buf2+len1, len2-len1));
}
if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
printf("next codepoint is %u\n", get_codepoint(buf1+len2,len1-len2));
}
ok = 0;
}
if (!ok) return ok;
size = size - size_in1;
ptr_in = buf1;
ptr_out = buf3;
size_in3 = len1;
outsize3 = sizeof(buf3);
memset(ptr_out, 0, outsize3);
ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
if (ret3 != 0) {
printf("pull failed - %s\n", strerror(errno));
ok = 0;
}
if (outsize3 != sizeof(buf3) - size) {
printf("wrong outsize3 - %d should be %d\n",
outsize3, sizeof(buf3) - size);
ok = 0;
}
if (memcmp(buf3, inbuf, size) != 0) {
int i;
printf("pull bytes mismatch:\n");
for (i=0;i<size;i++) {
printf("%02x ", inbuf[i]);
}
printf("\n");
for (i=0;i<size;i++) {
printf("%02x ", buf3[i]);
}
printf("\n");
ok = 0;
}
return ok;
}
BOOL torture_local_iconv(int dummy)
{
size_t size;
unsigned char inbuf[1000];
int ok = 1;
unsigned int codepoint, i, c;
srandom(time(NULL));
printf("Testing first 1M codepoints\n");
for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
if (gen_codepoint(codepoint, inbuf, &size) != 0) {
continue;
}
if (codepoint % 100 == 0) {
printf("codepoint=%u \r", codepoint);
}
ok = test_buffer(inbuf, size);
}
printf("Testing 5M random UTF-16LE sequences\n");
for (i=0;ok && i<500000;i++) {
if (i % 100 == 0) {
printf("i=%u \r", i);
}
size = random() % 100;
for (c=0;c<size;c++) {
if (random() % 100 < 80) {
inbuf[c] = random() % 128;
} else {
inbuf[c] = random();
}
if (random() % 10 == 0) {
inbuf[c] |= 0xd8;
}
if (random() % 10 == 0) {
inbuf[c] |= 0xdc;
}
}
ok = test_buffer(inbuf, size);
}
return ok == 1;
}

View File

@ -4212,8 +4212,9 @@ static struct {
{"RPC-MULTIBIND", torture_multi_bind, 0},
{"RPC-DRSUAPI", torture_rpc_drsuapi, 0},
/* crypto testers */
{"CRYPT-NTLMSSP", torture_ntlmssp_self_check, 0},
/* local (no server) testers */
{"LOCAL-NTLMSSP", torture_ntlmssp_self_check, 0},
{"LOCAL-ICONV", torture_local_iconv, 0},
/* ldap testers */
{"LDAP-BASIC", torture_ldap_basic, 0},