mirror of
https://github.com/samba-team/samba.git
synced 2025-02-02 09:47:23 +03:00
r2159: converted samba4 over to UTF-16.
I had previously thought this was unnecessary, as windows doesn't use standards compliant UTF-16, and for filesystem operations treats bytes as UCS-2, but Bjoern Jacke has pointed out to me that this means we don't correctly store extended UTF-16 characters as UTF-8 on disk. This can be seen with (for example) the gothic characters with codepoints above 64k. This commit also adds a LOCAL-ICONV torture test that tests the first 1 million codepoints against the system iconv library, and tests 5 million random UTF-16LE buffers for identical error handling to the system iconv library. the lib/iconv.c changes need backporting to samba3 (This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)
This commit is contained in:
parent
48f3df41bb
commit
31c1c7846f
@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/* this defines the charset types used in samba */
|
||||
typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UCS2BE=5} charset_t;
|
||||
typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
|
||||
|
||||
#define NUM_CHARSETS 6
|
||||
|
||||
|
@ -47,12 +47,12 @@ static const char *charset_name(charset_t ch)
|
||||
{
|
||||
const char *ret = NULL;
|
||||
|
||||
if (ch == CH_UCS2) ret = "UTF-16LE";
|
||||
if (ch == CH_UTF16) ret = "UTF-16LE";
|
||||
else if (ch == CH_UNIX) ret = lp_unix_charset();
|
||||
else if (ch == CH_DOS) ret = lp_dos_charset();
|
||||
else if (ch == CH_DISPLAY) ret = lp_display_charset();
|
||||
else if (ch == CH_UTF8) ret = "UTF8";
|
||||
else if (ch == CH_UCS2BE) ret = "UCS-2BE";
|
||||
else if (ch == CH_UTF16BE) ret = "UTF-16BE";
|
||||
|
||||
if (!ret || !*ret) ret = "ASCII";
|
||||
return ret;
|
||||
@ -81,13 +81,13 @@ void init_iconv(void)
|
||||
|
||||
/* so that charset_name() works we need to get the UNIX<->UCS2 going
|
||||
first */
|
||||
if (!conv_handles[CH_UNIX][CH_UCS2])
|
||||
conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2),
|
||||
if (!conv_handles[CH_UNIX][CH_UTF16])
|
||||
conv_handles[CH_UNIX][CH_UTF16] = smb_iconv_open(charset_name(CH_UTF16),
|
||||
"ASCII");
|
||||
|
||||
if (!conv_handles[CH_UCS2][CH_UNIX])
|
||||
conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII",
|
||||
charset_name(CH_UCS2));
|
||||
if (!conv_handles[CH_UTF16][CH_UNIX])
|
||||
conv_handles[CH_UTF16][CH_UNIX] = smb_iconv_open("ASCII",
|
||||
charset_name(CH_UTF16));
|
||||
|
||||
for (c1=0;c1<NUM_CHARSETS;c1++) {
|
||||
for (c2=0;c2<NUM_CHARSETS;c2++) {
|
||||
@ -293,7 +293,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
|
||||
size_t size;
|
||||
smb_ucs2_t *buffer;
|
||||
|
||||
size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
|
||||
size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
|
||||
(void **) &buffer);
|
||||
if (size == -1) {
|
||||
smb_panic("failed to create UCS2 buffer");
|
||||
@ -303,7 +303,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
|
||||
return srclen;
|
||||
}
|
||||
|
||||
size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
|
||||
size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
|
||||
free(buffer);
|
||||
return size;
|
||||
}
|
||||
@ -313,7 +313,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
|
||||
size_t size;
|
||||
smb_ucs2_t *buffer;
|
||||
|
||||
size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
|
||||
size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
|
||||
(void **) &buffer);
|
||||
if (size == -1) {
|
||||
smb_panic("failed to create UCS2 buffer");
|
||||
@ -322,7 +322,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
|
||||
free(buffer);
|
||||
return srclen;
|
||||
}
|
||||
size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
|
||||
size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
|
||||
free(buffer);
|
||||
return size;
|
||||
}
|
||||
@ -461,7 +461,7 @@ ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest
|
||||
/* ucs2 is always a multiple of 2 bytes */
|
||||
dest_len &= ~1;
|
||||
|
||||
len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
|
||||
len += convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
|
||||
return len;
|
||||
}
|
||||
|
||||
@ -480,7 +480,7 @@ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
|
||||
size_t src_len = strlen(src)+1;
|
||||
|
||||
*dest = NULL;
|
||||
return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest);
|
||||
return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (const void **)dest);
|
||||
}
|
||||
|
||||
|
||||
@ -498,7 +498,7 @@ ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
|
||||
size_t src_len = strlen(src)+1;
|
||||
|
||||
*dest = NULL;
|
||||
return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
|
||||
return convert_string_allocate(CH_UNIX, CH_UTF16, src, src_len, (void **)dest);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -603,7 +603,7 @@ size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_
|
||||
if (src_len != (size_t)-1)
|
||||
src_len &= ~1;
|
||||
|
||||
ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
|
||||
ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
|
||||
if (dest_len)
|
||||
dest[MIN(ret, dest_len-1)] = 0;
|
||||
|
||||
@ -627,7 +627,7 @@ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
|
||||
{
|
||||
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
|
||||
*dest = NULL;
|
||||
return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest);
|
||||
return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (const void **)dest);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -642,7 +642,7 @@ ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
|
||||
{
|
||||
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
|
||||
*dest = NULL;
|
||||
return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);
|
||||
return convert_string_allocate(CH_UTF16, CH_UNIX, src, src_len, dest);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -39,8 +39,8 @@
|
||||
*
|
||||
* Note that the only function provided by iconv is conversion between
|
||||
* characters. It doesn't directly support operations like
|
||||
* uppercasing or comparison. We have to convert to UCS-2 and compare
|
||||
* there.
|
||||
* uppercasing or comparison. We have to convert to UTF-16LE and
|
||||
* compare there.
|
||||
*
|
||||
* @sa Samba Developers Guide
|
||||
**/
|
||||
@ -55,10 +55,11 @@ static size_t iconv_copy (void *,const char **, size_t *, char **, size_t *);
|
||||
static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
|
||||
|
||||
static const struct charset_functions const builtin_functions[] = {
|
||||
/* windows is really neither UCS-2 not UTF-16 */
|
||||
/* windows is closest to UTF-16 */
|
||||
{"UCS-2LE", iconv_copy, iconv_copy},
|
||||
{"UTF-16LE", iconv_copy, iconv_copy},
|
||||
{"UCS-2BE", iconv_swab, iconv_swab},
|
||||
{"UTF-16BE", iconv_swab, iconv_swab},
|
||||
|
||||
/* we include the UTF-8 alias to cope with differing locale settings */
|
||||
{"UTF8", utf8_pull, utf8_push},
|
||||
@ -217,23 +218,23 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
|
||||
#endif
|
||||
|
||||
/* check for conversion to/from ucs2 */
|
||||
if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
|
||||
if (strcasecmp(fromcode, "UTF-16LE") == 0 && to) {
|
||||
ret->direct = to->push;
|
||||
return ret;
|
||||
}
|
||||
if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
|
||||
if (strcasecmp(tocode, "UTF-16LE") == 0 && from) {
|
||||
ret->direct = from->pull;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef HAVE_NATIVE_ICONV
|
||||
if (strcasecmp(fromcode, "UCS-2LE") == 0) {
|
||||
if (strcasecmp(fromcode, "UTF-16LE") == 0) {
|
||||
ret->direct = sys_iconv;
|
||||
ret->cd_direct = ret->cd_push;
|
||||
ret->cd_push = NULL;
|
||||
return ret;
|
||||
}
|
||||
if (strcasecmp(tocode, "UCS-2LE") == 0) {
|
||||
if (strcasecmp(tocode, "UTF-16LE") == 0) {
|
||||
ret->direct = sys_iconv;
|
||||
ret->cd_direct = ret->cd_pull;
|
||||
ret->cd_pull = NULL;
|
||||
@ -460,100 +461,231 @@ static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
|
||||
static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
{
|
||||
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
|
||||
const uint8_t *c = (const uint8_t *)*inbuf;
|
||||
uint8_t *uc = (uint8_t *)*outbuf;
|
||||
int len = 1;
|
||||
size_t in_left=*inbytesleft, out_left=*outbytesleft;
|
||||
const uint8_t *c = (const uint8_t *)*inbuf;
|
||||
uint8_t *uc = (uint8_t *)*outbuf;
|
||||
|
||||
while (in_left >= 1 && out_left >= 2) {
|
||||
if ((c[0] & 0x80) == 0) {
|
||||
uc[0] = c[0];
|
||||
uc[1] = 0;
|
||||
} else if ((c[0] & 0xf0) == 0xe0) {
|
||||
if (*inbytesleft < 3) {
|
||||
DEBUG(0,("short utf8 char\n"));
|
||||
goto badseq;
|
||||
}
|
||||
uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
|
||||
uc[0] = (c[1]<<6) | (c[2]&0x3f);
|
||||
len = 3;
|
||||
} else if ((c[0] & 0xe0) == 0xc0) {
|
||||
if (*inbytesleft < 2) {
|
||||
DEBUG(0,("short utf8 char\n"));
|
||||
goto badseq;
|
||||
c += 1;
|
||||
in_left -= 1;
|
||||
out_left -= 2;
|
||||
uc += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((c[0] & 0xe0) == 0xc0) {
|
||||
if (in_left < 2 ||
|
||||
(c[1] & 0xc0) != 0x80) {
|
||||
errno = EILSEQ;
|
||||
goto error;
|
||||
}
|
||||
uc[1] = (c[0]>>2) & 0x7;
|
||||
uc[0] = (c[0]<<6) | (c[1]&0x3f);
|
||||
len = 2;
|
||||
c += 2;
|
||||
in_left -= 2;
|
||||
out_left -= 2;
|
||||
uc += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
(*inbuf) += len;
|
||||
(*inbytesleft) -= len;
|
||||
(*outbytesleft) -= 2;
|
||||
(*outbuf) += 2;
|
||||
if ((c[0] & 0xf0) == 0xe0) {
|
||||
if (in_left < 3 ||
|
||||
(c[1] & 0xc0) != 0x80 ||
|
||||
(c[2] & 0xc0) != 0x80) {
|
||||
errno = EILSEQ;
|
||||
goto error;
|
||||
}
|
||||
uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
|
||||
uc[0] = (c[1]<<6) | (c[2]&0x3f);
|
||||
c += 3;
|
||||
in_left -= 3;
|
||||
out_left -= 2;
|
||||
uc += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((c[0] & 0xf8) == 0xf0) {
|
||||
unsigned int codepoint;
|
||||
if (in_left < 4 ||
|
||||
(c[1] & 0xc0) != 0x80 ||
|
||||
(c[2] & 0xc0) != 0x80 ||
|
||||
(c[3] & 0xc0) != 0x80) {
|
||||
errno = EILSEQ;
|
||||
goto error;
|
||||
}
|
||||
codepoint =
|
||||
(c[3]&0x3f) |
|
||||
((c[2]&0x3f)<<6) |
|
||||
((c[1]&0x3f)<<12) |
|
||||
((c[0]&0x7)<<18);
|
||||
if (codepoint < 0x10000) {
|
||||
/* accept UTF-8 characters that are not
|
||||
minimally packed, but pack the result */
|
||||
uc[0] = (codepoint & 0xFF);
|
||||
uc[1] = (codepoint >> 8);
|
||||
c += 4;
|
||||
in_left -= 4;
|
||||
out_left -= 2;
|
||||
uc += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
codepoint -= 0x10000;
|
||||
|
||||
if (out_left < 4) {
|
||||
errno = E2BIG;
|
||||
goto error;
|
||||
}
|
||||
|
||||
uc[0] = (codepoint>>10) & 0xFF;
|
||||
uc[1] = (codepoint>>18) | 0xd8;
|
||||
uc[2] = codepoint & 0xFF;
|
||||
uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
|
||||
c += 4;
|
||||
in_left -= 4;
|
||||
out_left -= 4;
|
||||
uc += 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we don't handle 5 byte sequences */
|
||||
errno = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (*inbytesleft > 0) {
|
||||
if (in_left > 0) {
|
||||
errno = E2BIG;
|
||||
return -1;
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
*inbytesleft = in_left;
|
||||
*outbytesleft = out_left;
|
||||
*inbuf = c;
|
||||
*outbuf = uc;
|
||||
return 0;
|
||||
|
||||
badseq:
|
||||
errno = EINVAL;
|
||||
error:
|
||||
*inbytesleft = in_left;
|
||||
*outbytesleft = out_left;
|
||||
*inbuf = c;
|
||||
*outbuf = uc;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
{
|
||||
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
|
||||
uint8_t *c = (uint8_t *)*outbuf;
|
||||
const uint8_t *uc = (const uint8_t *)*inbuf;
|
||||
int len=1;
|
||||
size_t in_left=*inbytesleft, out_left=*outbytesleft;
|
||||
uint8_t *c = (uint8_t *)*outbuf;
|
||||
const uint8_t *uc = (const uint8_t *)*inbuf;
|
||||
|
||||
if (uc[1] & 0xf8) {
|
||||
if (*outbytesleft < 3) {
|
||||
DEBUG(0,("short utf8 write\n"));
|
||||
goto toobig;
|
||||
}
|
||||
c[0] = 0xe0 | (uc[1]>>4);
|
||||
c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
|
||||
c[2] = 0x80 | (uc[0]&0x3f);
|
||||
len = 3;
|
||||
} else if (uc[1] | (uc[0] & 0x80)) {
|
||||
if (*outbytesleft < 2) {
|
||||
DEBUG(0,("short utf8 write\n"));
|
||||
goto toobig;
|
||||
}
|
||||
c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
|
||||
c[1] = 0x80 | (uc[0]&0x3f);
|
||||
len = 2;
|
||||
} else {
|
||||
while (in_left >= 2 && out_left >= 1) {
|
||||
unsigned int codepoint;
|
||||
|
||||
if (uc[1] == 0 && !(uc[0] & 0x80)) {
|
||||
/* simplest case */
|
||||
c[0] = uc[0];
|
||||
in_left -= 2;
|
||||
out_left -= 1;
|
||||
uc += 2;
|
||||
c += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((uc[1]&0xf8) == 0) {
|
||||
/* next simplest case */
|
||||
if (out_left < 2) {
|
||||
errno = E2BIG;
|
||||
goto error;
|
||||
}
|
||||
c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
|
||||
c[1] = 0x80 | (uc[0] & 0x3f);
|
||||
in_left -= 2;
|
||||
out_left -= 2;
|
||||
uc += 2;
|
||||
c += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
(*inbytesleft) -= 2;
|
||||
(*outbytesleft) -= len;
|
||||
(*inbuf) += 2;
|
||||
(*outbuf) += len;
|
||||
if ((uc[1] & 0xfc) == 0xdc) {
|
||||
/* its the second part of a 4 byte sequence. Illegal */
|
||||
if (in_left < 4) {
|
||||
errno = EINVAL;
|
||||
} else {
|
||||
errno = EILSEQ;
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
|
||||
if ((uc[1] & 0xfc) != 0xd8) {
|
||||
codepoint = uc[0] | (uc[1]<<8);
|
||||
if (out_left < 3) {
|
||||
errno = E2BIG;
|
||||
goto error;
|
||||
}
|
||||
c[0] = 0xe0 | (codepoint >> 12);
|
||||
c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
|
||||
c[2] = 0x80 | (codepoint & 0x3f);
|
||||
|
||||
in_left -= 2;
|
||||
out_left -= 3;
|
||||
uc += 2;
|
||||
c += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* its the first part of a 4 byte sequence */
|
||||
if (in_left < 4) {
|
||||
errno = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
if ((uc[3] & 0xfc) != 0xdc) {
|
||||
errno = EILSEQ;
|
||||
goto error;
|
||||
}
|
||||
codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
|
||||
(uc[0]<<10) | ((uc[1] & 0x3)<<18));
|
||||
|
||||
if (out_left < 4) {
|
||||
errno = E2BIG;
|
||||
goto error;
|
||||
}
|
||||
c[0] = 0xf0 | (codepoint >> 18);
|
||||
c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
|
||||
c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
|
||||
c[3] = 0x80 | (codepoint & 0x3f);
|
||||
|
||||
in_left -= 4;
|
||||
out_left -= 4;
|
||||
uc += 4;
|
||||
c += 4;
|
||||
}
|
||||
|
||||
if (*inbytesleft == 1) {
|
||||
if (in_left == 1) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (*inbytesleft > 1) {
|
||||
if (in_left > 1) {
|
||||
errno = E2BIG;
|
||||
return -1;
|
||||
goto error;
|
||||
}
|
||||
|
||||
*inbytesleft = in_left;
|
||||
*outbytesleft = out_left;
|
||||
*inbuf = uc;
|
||||
*outbuf = c;
|
||||
|
||||
return 0;
|
||||
|
||||
toobig:
|
||||
errno = E2BIG;
|
||||
error:
|
||||
*inbytesleft = in_left;
|
||||
*outbytesleft = out_left;
|
||||
*inbuf = uc;
|
||||
*outbuf = c;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -96,9 +96,9 @@ static int check_dos_char(smb_ucs2_t c)
|
||||
char buf[10];
|
||||
smb_ucs2_t c2 = 0;
|
||||
int len1, len2;
|
||||
len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
|
||||
len1 = convert_string(CH_UTF16, CH_DOS, &c, 2, buf, sizeof(buf));
|
||||
if (len1 == 0) return 0;
|
||||
len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
|
||||
len2 = convert_string(CH_DOS, CH_UTF16, buf, len1, &c2, 2);
|
||||
if (len2 != 2) return 0;
|
||||
return (c == c2);
|
||||
}
|
||||
|
@ -526,7 +526,7 @@ static size_t smbcli_req_pull_ucs2(struct smbcli_request *req, TALLOC_CTX *mem_c
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
if (ret == -1) {
|
||||
*dest = NULL;
|
||||
return 0;
|
||||
@ -725,7 +725,7 @@ static size_t smbcli_blob_pull_ucs2(TALLOC_CTX* mem_ctx,
|
||||
src_len2 += 2;
|
||||
}
|
||||
|
||||
ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
if (ret == -1) {
|
||||
*dest = NULL;
|
||||
return 0;
|
||||
|
@ -421,14 +421,14 @@ NTSTATUS ndr_pull_string(struct ndr_pull *ndr, int ndr_flags, const char **s)
|
||||
uint32_t len1, ofs, len2;
|
||||
uint16_t len3;
|
||||
int ret;
|
||||
int chset = CH_UCS2;
|
||||
int chset = CH_UTF16;
|
||||
|
||||
if (!(ndr_flags & NDR_SCALARS)) {
|
||||
return NT_STATUS_OK;
|
||||
}
|
||||
|
||||
if (NDR_BE(ndr)) {
|
||||
chset = CH_UCS2BE;
|
||||
chset = CH_UTF16BE;
|
||||
}
|
||||
|
||||
switch (ndr->flags & LIBNDR_STRING_FLAGS) {
|
||||
@ -600,14 +600,14 @@ NTSTATUS ndr_push_string(struct ndr_push *ndr, int ndr_flags, const char *s)
|
||||
{
|
||||
ssize_t s_len, c_len;
|
||||
int ret;
|
||||
int chset = CH_UCS2;
|
||||
int chset = CH_UTF16;
|
||||
|
||||
if (!(ndr_flags & NDR_SCALARS)) {
|
||||
return NT_STATUS_OK;
|
||||
}
|
||||
|
||||
if (NDR_BE(ndr)) {
|
||||
chset = CH_UCS2BE;
|
||||
chset = CH_UTF16BE;
|
||||
}
|
||||
|
||||
s_len = s?strlen(s):0;
|
||||
|
@ -468,7 +468,7 @@ static size_t req_pull_ucs2(struct smbsrv_request *req, const char **dest, const
|
||||
src_len2 += 2;
|
||||
}
|
||||
|
||||
ret = convert_string_talloc(req->mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
ret = convert_string_talloc(req->mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
|
||||
|
||||
if (ret == -1) {
|
||||
*dest = NULL;
|
||||
|
@ -50,7 +50,7 @@ static NTSTATUS unicode_open(struct smbcli_tree *tree,
|
||||
}
|
||||
SSVAL(ucs_name, i*2, 0);
|
||||
|
||||
i = convert_string_allocate(CH_UCS2, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
|
||||
i = convert_string_allocate(CH_UTF16, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
|
||||
if (i == -1) {
|
||||
free(ucs_name);
|
||||
return NT_STATUS_NO_MEMORY;
|
||||
|
@ -50,7 +50,7 @@ BOOL torture_utable(int dummy)
|
||||
SSVAL(&c2, 0, c);
|
||||
fstrcpy(fname, "\\utable\\x");
|
||||
p = fname+strlen(fname);
|
||||
len = convert_string(CH_UCS2, CH_UNIX,
|
||||
len = convert_string(CH_UTF16, CH_UNIX,
|
||||
&c2, 2,
|
||||
p, sizeof(fname)-strlen(fname));
|
||||
p[len] = 0;
|
||||
@ -107,7 +107,7 @@ static char *form_name(int c)
|
||||
p = fname+strlen(fname);
|
||||
SSVAL(&c2, 0, c);
|
||||
|
||||
len = convert_string(CH_UCS2, CH_UNIX,
|
||||
len = convert_string(CH_UTF16, CH_UNIX,
|
||||
&c2, 2,
|
||||
p, sizeof(fname)-strlen(fname));
|
||||
p[len] = 0;
|
||||
|
@ -10,6 +10,8 @@ SMB_SUBSYSTEM_MK(TORTURE_RAP,torture/config.mk)
|
||||
|
||||
SMB_SUBSYSTEM_MK(TORTURE_AUTH,torture/config.mk)
|
||||
|
||||
SMB_SUBSYSTEM_MK(TORTURE_LOCAL,torture/config.mk)
|
||||
|
||||
SMB_SUBSYSTEM_MK(TORTURE_NBENCH,torture/config.mk)
|
||||
|
||||
SMB_SUBSYSTEM_MK(TORTURE_LDAP,torture/config.mk)
|
||||
|
@ -93,6 +93,16 @@ REQUIRED_SUBSYSTEMS = \
|
||||
# End SUBSYSTEM TORTURE_AUTH
|
||||
#################################
|
||||
|
||||
#################################
|
||||
# Start SUBSYSTEM TORTURE_LOCAL
|
||||
[SUBSYSTEM::TORTURE_LOCAL]
|
||||
ADD_OBJ_FILES = \
|
||||
torture/local/iconv.o
|
||||
REQUIRED_SUBSYSTEMS = \
|
||||
LIBSMB
|
||||
# End SUBSYSTEM TORTURE_LOCAL
|
||||
#################################
|
||||
|
||||
#################################
|
||||
# Start SUBSYSTEM TORTURE_NBENCH
|
||||
[SUBSYSTEM::TORTURE_NBENCH]
|
||||
@ -125,6 +135,7 @@ REQUIRED_SUBSYSTEMS = \
|
||||
TORTURE_RPC \
|
||||
TORTURE_RAP \
|
||||
TORTURE_AUTH \
|
||||
TORTURE_LOCAL \
|
||||
TORTURE_NBENCH \
|
||||
TORTURE_LDAP \
|
||||
CONFIG \
|
||||
|
298
source4/torture/local/iconv.c
Normal file
298
source4/torture/local/iconv.c
Normal file
@ -0,0 +1,298 @@
|
||||
/*
|
||||
Unix SMB/CIFS implementation.
|
||||
|
||||
local testing of iconv routines. This tests the system iconv code against
|
||||
the built-in iconv code
|
||||
|
||||
Copyright (C) Andrew Tridgell 2004
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include "includes.h"
|
||||
|
||||
/*
|
||||
generate a UTF-16LE buffer for a given unicode codepoint
|
||||
*/
|
||||
static int gen_codepoint(unsigned int codepoint,
|
||||
char *buf, size_t *size)
|
||||
{
|
||||
static iconv_t cd;
|
||||
uint8_t in[4];
|
||||
char *ptr_in;
|
||||
size_t size_in, size_out, ret;
|
||||
if (!cd) {
|
||||
cd = iconv_open("UTF-16LE", "UCS-4LE");
|
||||
}
|
||||
|
||||
in[0] = codepoint & 0xFF;
|
||||
in[1] = (codepoint>>8) & 0xFF;
|
||||
in[2] = (codepoint>>16) & 0xFF;
|
||||
in[3] = (codepoint>>24) & 0xFF;
|
||||
|
||||
ptr_in = in;
|
||||
size_in = 4;
|
||||
size_out = 8;
|
||||
|
||||
ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
|
||||
|
||||
*size = 8 - size_out;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
work out the unicode codepoint of the first UTF-8 character in the buffer
|
||||
*/
|
||||
static unsigned int get_codepoint(char *buf, size_t size)
|
||||
{
|
||||
static iconv_t cd;
|
||||
uint8_t out[4];
|
||||
char *ptr_out;
|
||||
size_t size_out, size_in, ret;
|
||||
if (!cd) {
|
||||
cd = iconv_open("UCS-4LE", "UTF-8");
|
||||
}
|
||||
|
||||
size_in = size;
|
||||
ptr_out = out;
|
||||
size_out = sizeof(out);
|
||||
memset(out, 0, sizeof(out));
|
||||
|
||||
ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
|
||||
|
||||
return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
|
||||
}
|
||||
|
||||
/*
|
||||
display a buffer with name prefix
|
||||
*/
|
||||
static void show_buf(const char *name, uint8_t *buf, size_t size)
|
||||
{
|
||||
int i;
|
||||
printf("%s ", name);
|
||||
for (i=0;i<size;i++) {
|
||||
printf("%02x ", buf[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
given a UTF-16LE buffer, test the system and built-in iconv code
|
||||
to make sure they do exactly the same thing in converting the buffer
|
||||
to UTF-8, then convert it back again and ensure we get the same buffer
|
||||
back
|
||||
*/
|
||||
static int test_buffer(uint8_t *inbuf, size_t size)
|
||||
{
|
||||
uint8_t buf1[1000], buf2[1000], buf3[1000];
|
||||
size_t outsize1, outsize2, outsize3;
|
||||
const char *ptr_in;
|
||||
char *ptr_out;
|
||||
size_t size_in1, size_in2, size_in3;
|
||||
size_t ret1, ret2, ret3, len1, len2;
|
||||
int ok = 1;
|
||||
int errno1, errno2;
|
||||
static iconv_t cd;
|
||||
static smb_iconv_t cd2, cd3;
|
||||
|
||||
if (!cd) {
|
||||
cd = iconv_open("UTF-8", "UTF-16LE");
|
||||
cd2 = smb_iconv_open("UTF-8", "UTF-16LE");
|
||||
cd3 = smb_iconv_open("UTF-16LE", "UTF-8");
|
||||
}
|
||||
|
||||
#if 0
|
||||
int i;
|
||||
for (i=0;i<50;i++) {
|
||||
ptr_in = inbuf;
|
||||
ptr_out = buf1;
|
||||
size_in1 = size;
|
||||
outsize1 = sizeof(buf1);
|
||||
|
||||
memset(ptr_out, 0, outsize1);
|
||||
errno = 0;
|
||||
ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
|
||||
errno1 = errno;
|
||||
}
|
||||
#endif
|
||||
|
||||
ptr_in = inbuf;
|
||||
ptr_out = buf1;
|
||||
size_in1 = size;
|
||||
outsize1 = sizeof(buf1);
|
||||
|
||||
memset(ptr_out, 0, outsize1);
|
||||
errno = 0;
|
||||
ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
|
||||
errno1 = errno;
|
||||
|
||||
ptr_in = inbuf;
|
||||
ptr_out = buf2;
|
||||
size_in2 = size;
|
||||
outsize2 = sizeof(buf2);
|
||||
|
||||
memset(ptr_out, 0, outsize2);
|
||||
errno = 0;
|
||||
ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
|
||||
errno2 = errno;
|
||||
|
||||
len1 = sizeof(buf1) - outsize1;
|
||||
len2 = sizeof(buf2) - outsize2;
|
||||
|
||||
/* codepoints above 1M are not interesting for now */
|
||||
if (len2 > len1 &&
|
||||
memcmp(buf1, buf2, len1) == 0 &&
|
||||
get_codepoint(buf2+len1, len2-len1) >= (1<<20)) {
|
||||
return ok;
|
||||
}
|
||||
if (len1 > len2 &&
|
||||
memcmp(buf1, buf2, len2) == 0 &&
|
||||
get_codepoint(buf1+len2, len1-len2) >= (1<<20)) {
|
||||
return ok;
|
||||
}
|
||||
|
||||
if (ret1 != ret2) {
|
||||
printf("ret1=%d ret2=%d\n", ret1, ret2);
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (errno1 != errno2) {
|
||||
printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
|
||||
show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
|
||||
show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (outsize1 != outsize2) {
|
||||
printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
|
||||
outsize1, outsize2);
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (size_in1 != size_in2) {
|
||||
printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
|
||||
size_in1, size_in2);
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (!ok ||
|
||||
len1 != len2 ||
|
||||
memcmp(buf1, buf2, len1) != 0) {
|
||||
printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
|
||||
show_buf(" IN1:", inbuf, size-size_in1);
|
||||
show_buf(" IN2:", inbuf, size-size_in2);
|
||||
show_buf("OUT1:", buf1, len1);
|
||||
show_buf("OUT2:", buf2, len2);
|
||||
if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
|
||||
printf("next codepoint is %u\n", get_codepoint(buf2+len1, len2-len1));
|
||||
}
|
||||
if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
|
||||
printf("next codepoint is %u\n", get_codepoint(buf1+len2,len1-len2));
|
||||
}
|
||||
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (!ok) return ok;
|
||||
|
||||
size = size - size_in1;
|
||||
ptr_in = buf1;
|
||||
ptr_out = buf3;
|
||||
size_in3 = len1;
|
||||
outsize3 = sizeof(buf3);
|
||||
|
||||
memset(ptr_out, 0, outsize3);
|
||||
ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
|
||||
|
||||
if (ret3 != 0) {
|
||||
printf("pull failed - %s\n", strerror(errno));
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (outsize3 != sizeof(buf3) - size) {
|
||||
printf("wrong outsize3 - %d should be %d\n",
|
||||
outsize3, sizeof(buf3) - size);
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
if (memcmp(buf3, inbuf, size) != 0) {
|
||||
int i;
|
||||
printf("pull bytes mismatch:\n");
|
||||
for (i=0;i<size;i++) {
|
||||
printf("%02x ", inbuf[i]);
|
||||
}
|
||||
printf("\n");
|
||||
for (i=0;i<size;i++) {
|
||||
printf("%02x ", buf3[i]);
|
||||
}
|
||||
printf("\n");
|
||||
ok = 0;
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
BOOL torture_local_iconv(int dummy)
|
||||
{
|
||||
size_t size;
|
||||
unsigned char inbuf[1000];
|
||||
int ok = 1;
|
||||
unsigned int codepoint, i, c;
|
||||
|
||||
srandom(time(NULL));
|
||||
|
||||
printf("Testing first 1M codepoints\n");
|
||||
for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
|
||||
if (gen_codepoint(codepoint, inbuf, &size) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (codepoint % 100 == 0) {
|
||||
printf("codepoint=%u \r", codepoint);
|
||||
}
|
||||
|
||||
ok = test_buffer(inbuf, size);
|
||||
}
|
||||
|
||||
|
||||
printf("Testing 5M random UTF-16LE sequences\n");
|
||||
for (i=0;ok && i<500000;i++) {
|
||||
if (i % 100 == 0) {
|
||||
printf("i=%u \r", i);
|
||||
}
|
||||
|
||||
size = random() % 100;
|
||||
for (c=0;c<size;c++) {
|
||||
if (random() % 100 < 80) {
|
||||
inbuf[c] = random() % 128;
|
||||
} else {
|
||||
inbuf[c] = random();
|
||||
}
|
||||
if (random() % 10 == 0) {
|
||||
inbuf[c] |= 0xd8;
|
||||
}
|
||||
if (random() % 10 == 0) {
|
||||
inbuf[c] |= 0xdc;
|
||||
}
|
||||
}
|
||||
ok = test_buffer(inbuf, size);
|
||||
}
|
||||
|
||||
return ok == 1;
|
||||
}
|
||||
|
||||
|
@ -4212,8 +4212,9 @@ static struct {
|
||||
{"RPC-MULTIBIND", torture_multi_bind, 0},
|
||||
{"RPC-DRSUAPI", torture_rpc_drsuapi, 0},
|
||||
|
||||
/* crypto testers */
|
||||
{"CRYPT-NTLMSSP", torture_ntlmssp_self_check, 0},
|
||||
/* local (no server) testers */
|
||||
{"LOCAL-NTLMSSP", torture_ntlmssp_self_check, 0},
|
||||
{"LOCAL-ICONV", torture_local_iconv, 0},
|
||||
|
||||
/* ldap testers */
|
||||
{"LDAP-BASIC", torture_ldap_basic, 0},
|
||||
|
Loading…
x
Reference in New Issue
Block a user