From 31c1c7846f6b6e5848bc39a28a65118bfa98e35d Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Wed, 1 Sep 2004 04:39:06 +0000
Subject: [PATCH] r2159: converted samba4 over to UTF-16.

I had previously thought this was unnecessary, as windows doesn't use
standards compliant UTF-16, and for filesystem operations treats bytes
as UCS-2, but Bjoern Jacke has pointed out to me that this means we
don't correctly store extended UTF-16 characters as UTF-8 on
disk. This can be seen with (for example) the gothic characters with
codepoints above 64k.

This commit also adds a LOCAL-ICONV torture test that tests the first
1 million codepoints against the system iconv library, and tests 5
million random UTF-16LE buffers for identical error handling to the
system iconv library.

the lib/iconv.c changes need backporting to samba3
(This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)
---
 source4/include/charset.h       |   2 +-
 source4/lib/charcnv.c           |  34 ++--
 source4/lib/iconv.c             | 264 +++++++++++++++++++++-------
 source4/lib/util_unistr.c       |   4 +-
 source4/libcli/raw/rawrequest.c |   4 +-
 source4/librpc/ndr/ndr_basic.c  |   8 +-
 source4/smb_server/request.c    |   2 +-
 source4/torture/basic/charset.c |   2 +-
 source4/torture/basic/utable.c  |   4 +-
 source4/torture/config.m4       |   2 +
 source4/torture/config.mk       |  11 ++
 source4/torture/local/iconv.c   | 298 ++++++++++++++++++++++++++++++++
 source4/torture/torture.c       |   5 +-
 13 files changed, 542 insertions(+), 98 deletions(-)
 create mode 100644 source4/torture/local/iconv.c

diff --git a/source4/include/charset.h b/source4/include/charset.h
index daf10e23c13..f1482ac08c8 100644
--- a/source4/include/charset.h
+++ b/source4/include/charset.h
@@ -20,7 +20,7 @@
 */
 
 /* this defines the charset types used in samba */
-typedef enum {CH_UCS2=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UCS2BE=5} charset_t;
+typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
 
 #define NUM_CHARSETS 6
 
diff --git a/source4/lib/charcnv.c b/source4/lib/charcnv.c
index 8204a3bcdd8..2109e957d92 100644
--- a/source4/lib/charcnv.c
+++ b/source4/lib/charcnv.c
@@ -47,12 +47,12 @@ static const char *charset_name(charset_t ch)
 {
 	const char *ret = NULL;
 
-	if (ch == CH_UCS2) ret = "UTF-16LE";
+	if (ch == CH_UTF16) ret = "UTF-16LE";
 	else if (ch == CH_UNIX) ret = lp_unix_charset();
 	else if (ch == CH_DOS) ret = lp_dos_charset();
 	else if (ch == CH_DISPLAY) ret = lp_display_charset();
 	else if (ch == CH_UTF8) ret = "UTF8";
-	else if (ch == CH_UCS2BE) ret = "UCS-2BE";
+	else if (ch == CH_UTF16BE) ret = "UTF-16BE";
 
 	if (!ret || !*ret) ret = "ASCII";
 	return ret;
@@ -81,13 +81,13 @@ void init_iconv(void)
 
 	/* so that charset_name() works we need to get the UNIX<->UCS2 going
 	   first */
-	if (!conv_handles[CH_UNIX][CH_UCS2])
-		conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), 
+	if (!conv_handles[CH_UNIX][CH_UTF16])
+		conv_handles[CH_UNIX][CH_UTF16] = smb_iconv_open(charset_name(CH_UTF16), 
 								"ASCII");
 
-	if (!conv_handles[CH_UCS2][CH_UNIX])
-		conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", 
-								charset_name(CH_UCS2));
+	if (!conv_handles[CH_UTF16][CH_UNIX])
+		conv_handles[CH_UTF16][CH_UNIX] = smb_iconv_open("ASCII", 
+								charset_name(CH_UTF16));
 
 	for (c1=0;c1<NUM_CHARSETS;c1++) {
 		for (c2=0;c2<NUM_CHARSETS;c2++) {
@@ -293,7 +293,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 	size_t size;
 	smb_ucs2_t *buffer;
 	
-	size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
+	size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
 				       (void **) &buffer);
 	if (size == -1) {
 		smb_panic("failed to create UCS2 buffer");
@@ -303,7 +303,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 		return srclen;
 	}
 	
-	size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
+	size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
 	free(buffer);
 	return size;
 }
@@ -313,7 +313,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 	size_t size;
 	smb_ucs2_t *buffer;
 	
-	size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen,
+	size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen,
 				       (void **) &buffer);
 	if (size == -1) {
 		smb_panic("failed to create UCS2 buffer");
@@ -322,7 +322,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 		free(buffer);
 		return srclen;
 	}
-	size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
+	size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen);
 	free(buffer);
 	return size;
 }
@@ -461,7 +461,7 @@ ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest
 	/* ucs2 is always a multiple of 2 bytes */
 	dest_len &= ~1;
 
-	len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
+	len += convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len);
 	return len;
 }
 
@@ -480,7 +480,7 @@ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
 	size_t src_len = strlen(src)+1;
 
 	*dest = NULL;
-	return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest);
+	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (const void **)dest);
 }
 
 
@@ -498,7 +498,7 @@ ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
 	size_t src_len = strlen(src)+1;
 
 	*dest = NULL;
-	return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest);	
+	return convert_string_allocate(CH_UNIX, CH_UTF16, src, src_len, (void **)dest);	
 }
 
 /**
@@ -603,7 +603,7 @@ size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_
 	if (src_len != (size_t)-1)
 		src_len &= ~1;
 	
-	ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
+	ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len);
 	if (dest_len)
 		dest[MIN(ret, dest_len-1)] = 0;
 
@@ -627,7 +627,7 @@ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
 {
 	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
 	*dest = NULL;
-	return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest);
+	return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (const void **)dest);
 }
 
 /**
@@ -642,7 +642,7 @@ ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src)
 {
 	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
 	*dest = NULL;
-	return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest);	
+	return convert_string_allocate(CH_UTF16, CH_UNIX, src, src_len, dest);	
 }
 
 /**
diff --git a/source4/lib/iconv.c b/source4/lib/iconv.c
index c869c253cf0..9f6526faa5e 100644
--- a/source4/lib/iconv.c
+++ b/source4/lib/iconv.c
@@ -39,8 +39,8 @@
  *
  * Note that the only function provided by iconv is conversion between
  * characters.  It doesn't directly support operations like
- * uppercasing or comparison.  We have to convert to UCS-2 and compare
- * there.
+ * uppercasing or comparison.  We have to convert to UTF-16LE and
+ * compare there.
  *
  * @sa Samba Developers Guide
  **/
@@ -55,10 +55,11 @@ static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
 static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
 
 static const struct charset_functions const builtin_functions[] = {
-	/* windows is really neither UCS-2 not UTF-16 */
+	/* windows is closest to UTF-16 */
 	{"UCS-2LE",  iconv_copy, iconv_copy},
 	{"UTF-16LE",  iconv_copy, iconv_copy},
 	{"UCS-2BE",  iconv_swab, iconv_swab},
+	{"UTF-16BE",  iconv_swab, iconv_swab},
 
 	/* we include the UTF-8 alias to cope with differing locale settings */
 	{"UTF8",   utf8_pull,  utf8_push},
@@ -217,23 +218,23 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 #endif
 
 	/* check for conversion to/from ucs2 */
-	if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+	if (strcasecmp(fromcode, "UTF-16LE") == 0 && to) {
 		ret->direct = to->push;
 		return ret;
 	}
-	if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+	if (strcasecmp(tocode, "UTF-16LE") == 0 && from) {
 		ret->direct = from->pull;
 		return ret;
 	}
 
 #ifdef HAVE_NATIVE_ICONV
-	if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+	if (strcasecmp(fromcode, "UTF-16LE") == 0) {
 		ret->direct = sys_iconv;
 		ret->cd_direct = ret->cd_push;
 		ret->cd_push = NULL;
 		return ret;
 	}
-	if (strcasecmp(tocode, "UCS-2LE") == 0) {
+	if (strcasecmp(tocode, "UTF-16LE") == 0) {
 		ret->direct = sys_iconv;
 		ret->cd_direct = ret->cd_pull;
 		ret->cd_pull = NULL;
@@ -460,100 +461,231 @@ static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
 			 char **outbuf, size_t *outbytesleft)
 {
-	while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-		const uint8_t *c = (const uint8_t *)*inbuf;
-		uint8_t *uc = (uint8_t *)*outbuf;
-		int len = 1;
+	size_t in_left=*inbytesleft, out_left=*outbytesleft;
+	const uint8_t *c = (const uint8_t *)*inbuf;
+	uint8_t *uc = (uint8_t *)*outbuf;
 
+	while (in_left >= 1 && out_left >= 2) {
 		if ((c[0] & 0x80) == 0) {
 			uc[0] = c[0];
 			uc[1] = 0;
-		} else if ((c[0] & 0xf0) == 0xe0) {
-			if (*inbytesleft < 3) {
-				DEBUG(0,("short utf8 char\n"));
-				goto badseq;
-			}
-			uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
-			uc[0] = (c[1]<<6) | (c[2]&0x3f);
-			len = 3;
-		} else if ((c[0] & 0xe0) == 0xc0) {
-			if (*inbytesleft < 2) {
-				DEBUG(0,("short utf8 char\n"));
-				goto badseq;
+			c  += 1;
+			in_left  -= 1;
+			out_left -= 2;
+			uc += 2;
+			continue;
+		}
+
+		if ((c[0] & 0xe0) == 0xc0) {
+			if (in_left < 2 ||
+			    (c[1] & 0xc0) != 0x80) {
+				errno = EILSEQ;
+				goto error;
 			}
 			uc[1] = (c[0]>>2) & 0x7;
 			uc[0] = (c[0]<<6) | (c[1]&0x3f);
-			len = 2;
+			c  += 2;
+			in_left  -= 2;
+			out_left -= 2;
+			uc += 2;
+			continue;
 		}
 
-		(*inbuf)  += len;
-		(*inbytesleft)  -= len;
-		(*outbytesleft) -= 2;
-		(*outbuf) += 2;
+		if ((c[0] & 0xf0) == 0xe0) {
+			if (in_left < 3 ||
+			    (c[1] & 0xc0) != 0x80 || 
+			    (c[2] & 0xc0) != 0x80) {
+				errno = EILSEQ;
+				goto error;
+			}
+			uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
+			uc[0] = (c[1]<<6) | (c[2]&0x3f);
+			c  += 3;
+			in_left  -= 3;
+			out_left -= 2;
+			uc += 2;
+			continue;
+		}
+
+		if ((c[0] & 0xf8) == 0xf0) {
+			unsigned int codepoint;
+			if (in_left < 4 ||
+			    (c[1] & 0xc0) != 0x80 || 
+			    (c[2] & 0xc0) != 0x80 ||
+			    (c[3] & 0xc0) != 0x80) {
+				errno = EILSEQ;
+				goto error;
+			}
+			codepoint = 
+				(c[3]&0x3f) | 
+				((c[2]&0x3f)<<6) | 
+				((c[1]&0x3f)<<12) |
+				((c[0]&0x7)<<18);
+			if (codepoint < 0x10000) {
+				/* accept UTF-8 characters that are not
+				   minimally packed, but pack the result */
+				uc[0] = (codepoint & 0xFF);
+				uc[1] = (codepoint >> 8);
+				c += 4;
+				in_left -= 4;
+				out_left -= 2;
+				uc += 2;
+				continue;
+			}
+
+			codepoint -= 0x10000;
+
+			if (out_left < 4) {
+				errno = E2BIG;
+				goto error;
+			}
+
+			uc[0] = (codepoint>>10) & 0xFF;
+			uc[1] = (codepoint>>18) | 0xd8;
+			uc[2] = codepoint & 0xFF;
+			uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
+			c  += 4;
+			in_left  -= 4;
+			out_left -= 4;
+			uc += 4;
+			continue;
+		}
+
+		/* we don't handle 5 byte sequences */
+		errno = EINVAL;
+		goto error;
 	}
 
-	if (*inbytesleft > 0) {
+	if (in_left > 0) {
 		errno = E2BIG;
-		return -1;
+		goto error;
 	}
-	
+
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf = c;
+	*outbuf = uc;	
 	return 0;
 
-badseq:
-	errno = EINVAL;
+error:
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf = c;
+	*outbuf = uc;
 	return -1;
 }
 
 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
-			 char **outbuf, size_t *outbytesleft)
+			char **outbuf, size_t *outbytesleft)
 {
-	while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-		uint8_t *c = (uint8_t *)*outbuf;
-		const uint8_t *uc = (const uint8_t *)*inbuf;
-		int len=1;
+	size_t in_left=*inbytesleft, out_left=*outbytesleft;
+	uint8_t *c = (uint8_t *)*outbuf;
+	const uint8_t *uc = (const uint8_t *)*inbuf;
 
-		if (uc[1] & 0xf8) {
-			if (*outbytesleft < 3) {
-				DEBUG(0,("short utf8 write\n"));
-				goto toobig;
-			}
-			c[0] = 0xe0 | (uc[1]>>4);
-			c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
-			c[2] = 0x80 | (uc[0]&0x3f);
-			len = 3;
-		} else if (uc[1] | (uc[0] & 0x80)) {
-			if (*outbytesleft < 2) {
-				DEBUG(0,("short utf8 write\n"));
-				goto toobig;
-			}
-			c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
-			c[1] = 0x80 | (uc[0]&0x3f);
-			len = 2;
-		} else {
+	while (in_left >= 2 && out_left >= 1) {
+		unsigned int codepoint;
+
+		if (uc[1] == 0 && !(uc[0] & 0x80)) {
+			/* simplest case */
 			c[0] = uc[0];
+			in_left  -= 2;
+			out_left -= 1;
+			uc += 2;
+			c  += 1;
+			continue;
 		}
 
+		if ((uc[1]&0xf8) == 0) {
+			/* next simplest case */
+			if (out_left < 2) {
+				errno = E2BIG;
+				goto error;
+			}
+			c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
+			c[1] = 0x80 | (uc[0] & 0x3f);
+			in_left  -= 2;
+			out_left -= 2;
+			uc += 2;
+			c  += 2;
+			continue;
+		}
 
-		(*inbytesleft)  -= 2;
-		(*outbytesleft) -= len;
-		(*inbuf)  += 2;
-		(*outbuf) += len;
+		if ((uc[1] & 0xfc) == 0xdc) {
+			/* its the second part of a 4 byte sequence. Illegal */
+			if (in_left < 4) {
+				errno = EINVAL;
+			} else {
+				errno = EILSEQ;
+			}
+			goto error;
+		}
+
+		if ((uc[1] & 0xfc) != 0xd8) {
+			codepoint = uc[0] | (uc[1]<<8);
+			if (out_left < 3) {
+				errno = E2BIG;
+				goto error;
+			}
+			c[0] = 0xe0 | (codepoint >> 12);
+			c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+			c[2] = 0x80 | (codepoint & 0x3f);
+			
+			in_left  -= 2;
+			out_left -= 3;
+			uc  += 2;
+			c   += 3;
+			continue;
+		}
+
+		/* its the first part of a 4 byte sequence */
+		if (in_left < 4) {
+			errno = EINVAL;
+			goto error;
+		}
+		if ((uc[3] & 0xfc) != 0xdc) {
+			errno = EILSEQ;
+			goto error;
+		}
+		codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 
+				       (uc[0]<<10) | ((uc[1] & 0x3)<<18));
+		
+		if (out_left < 4) {
+			errno = E2BIG;
+			goto error;
+		}
+		c[0] = 0xf0 | (codepoint >> 18);
+		c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+		c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+		c[3] = 0x80 | (codepoint & 0x3f);
+		
+		in_left  -= 4;
+		out_left -= 4;
+		uc       += 4;
+		c        += 4;
 	}
 
-	if (*inbytesleft == 1) {
+	if (in_left == 1) {
 		errno = EINVAL;
-		return -1;
+		goto error;
 	}
 
-	if (*inbytesleft > 1) {
+	if (in_left > 1) {
 		errno = E2BIG;
-		return -1;
+		goto error;
 	}
+
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf  = uc;
+	*outbuf = c;
 	
 	return 0;
 
-toobig:
-	errno = E2BIG;
+error:
+	*inbytesleft = in_left;
+	*outbytesleft = out_left;
+	*inbuf  = uc;
+	*outbuf = c;
 	return -1;
 }
 
diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c
index 2bd990836ee..63d68fa12e3 100644
--- a/source4/lib/util_unistr.c
+++ b/source4/lib/util_unistr.c
@@ -96,9 +96,9 @@ static int check_dos_char(smb_ucs2_t c)
 	char buf[10];
 	smb_ucs2_t c2 = 0;
 	int len1, len2;
-	len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
+	len1 = convert_string(CH_UTF16, CH_DOS, &c, 2, buf, sizeof(buf));
 	if (len1 == 0) return 0;
-	len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
+	len2 = convert_string(CH_DOS, CH_UTF16, buf, len1, &c2, 2);
 	if (len2 != 2) return 0;
 	return (c == c2);
 }
diff --git a/source4/libcli/raw/rawrequest.c b/source4/libcli/raw/rawrequest.c
index 87bbe5a31b2..51c0c0b925a 100644
--- a/source4/libcli/raw/rawrequest.c
+++ b/source4/libcli/raw/rawrequest.c
@@ -526,7 +526,7 @@ static size_t smbcli_req_pull_ucs2(struct smbcli_request *req, TALLOC_CTX *mem_c
 		return 0;
 	}
 
-	ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+	ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
 	if (ret == -1) {
 		*dest = NULL;
 		return 0;
@@ -725,7 +725,7 @@ static size_t smbcli_blob_pull_ucs2(TALLOC_CTX* mem_ctx,
 		src_len2 += 2;
 	}
 
-	ret = convert_string_talloc(mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+	ret = convert_string_talloc(mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
 	if (ret == -1) {
 		*dest = NULL;
 		return 0;
diff --git a/source4/librpc/ndr/ndr_basic.c b/source4/librpc/ndr/ndr_basic.c
index 34a1935c3b0..7f36f7e4ba6 100644
--- a/source4/librpc/ndr/ndr_basic.c
+++ b/source4/librpc/ndr/ndr_basic.c
@@ -421,14 +421,14 @@ NTSTATUS ndr_pull_string(struct ndr_pull *ndr, int ndr_flags, const char **s)
 	uint32_t len1, ofs, len2;
 	uint16_t len3;
 	int ret;
-	int chset = CH_UCS2;
+	int chset = CH_UTF16;
 
 	if (!(ndr_flags & NDR_SCALARS)) {
 		return NT_STATUS_OK;
 	}
 
 	if (NDR_BE(ndr)) {
-		chset = CH_UCS2BE;
+		chset = CH_UTF16BE;
 	}
 
 	switch (ndr->flags & LIBNDR_STRING_FLAGS) {
@@ -600,14 +600,14 @@ NTSTATUS ndr_push_string(struct ndr_push *ndr, int ndr_flags, const char *s)
 {
 	ssize_t s_len, c_len;
 	int ret;
-	int chset = CH_UCS2;
+	int chset = CH_UTF16;
 
 	if (!(ndr_flags & NDR_SCALARS)) {
 		return NT_STATUS_OK;
 	}
 
 	if (NDR_BE(ndr)) {
-		chset = CH_UCS2BE;
+		chset = CH_UTF16BE;
 	}
 	
 	s_len = s?strlen(s):0;
diff --git a/source4/smb_server/request.c b/source4/smb_server/request.c
index e3f54bde654..645ea105a2b 100644
--- a/source4/smb_server/request.c
+++ b/source4/smb_server/request.c
@@ -468,7 +468,7 @@ static size_t req_pull_ucs2(struct smbsrv_request *req, const char **dest, const
 		src_len2 += 2;
 	}
 
-	ret = convert_string_talloc(req->mem_ctx, CH_UCS2, CH_UNIX, src, src_len2, (const void **)dest);
+	ret = convert_string_talloc(req->mem_ctx, CH_UTF16, CH_UNIX, src, src_len2, (const void **)dest);
 
 	if (ret == -1) {
 		*dest = NULL;
diff --git a/source4/torture/basic/charset.c b/source4/torture/basic/charset.c
index 330dcf1707b..879f20617fd 100644
--- a/source4/torture/basic/charset.c
+++ b/source4/torture/basic/charset.c
@@ -50,7 +50,7 @@ static NTSTATUS unicode_open(struct smbcli_tree *tree,
 	}
 	SSVAL(ucs_name, i*2, 0);
 
-	i = convert_string_allocate(CH_UCS2, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
+	i = convert_string_allocate(CH_UTF16, CH_UNIX, ucs_name, (1+u_name_len)*2, (void **)&fname);
 	if (i == -1) {
 		free(ucs_name);
 		return NT_STATUS_NO_MEMORY;
diff --git a/source4/torture/basic/utable.c b/source4/torture/basic/utable.c
index f98e1b1b4ad..6faf020ef9a 100644
--- a/source4/torture/basic/utable.c
+++ b/source4/torture/basic/utable.c
@@ -50,7 +50,7 @@ BOOL torture_utable(int dummy)
 		SSVAL(&c2, 0, c);
 		fstrcpy(fname, "\\utable\\x");
 		p = fname+strlen(fname);
-		len = convert_string(CH_UCS2, CH_UNIX, 
+		len = convert_string(CH_UTF16, CH_UNIX, 
 				     &c2, 2, 
 				     p, sizeof(fname)-strlen(fname));
 		p[len] = 0;
@@ -107,7 +107,7 @@ static char *form_name(int c)
 	p = fname+strlen(fname);
 	SSVAL(&c2, 0, c);
 
-	len = convert_string(CH_UCS2, CH_UNIX, 
+	len = convert_string(CH_UTF16, CH_UNIX, 
 			     &c2, 2, 
 			     p, sizeof(fname)-strlen(fname));
 	p[len] = 0;
diff --git a/source4/torture/config.m4 b/source4/torture/config.m4
index e5f1d357e64..fdd5be13551 100644
--- a/source4/torture/config.m4
+++ b/source4/torture/config.m4
@@ -10,6 +10,8 @@ SMB_SUBSYSTEM_MK(TORTURE_RAP,torture/config.mk)
 
 SMB_SUBSYSTEM_MK(TORTURE_AUTH,torture/config.mk)
 
+SMB_SUBSYSTEM_MK(TORTURE_LOCAL,torture/config.mk)
+
 SMB_SUBSYSTEM_MK(TORTURE_NBENCH,torture/config.mk)
 
 SMB_SUBSYSTEM_MK(TORTURE_LDAP,torture/config.mk)
diff --git a/source4/torture/config.mk b/source4/torture/config.mk
index 6d1ddd4d5a7..43a42217f59 100644
--- a/source4/torture/config.mk
+++ b/source4/torture/config.mk
@@ -93,6 +93,16 @@ REQUIRED_SUBSYSTEMS = \
 # End SUBSYSTEM TORTURE_AUTH
 #################################
 
+#################################
+# Start SUBSYSTEM TORTURE_LOCAL
+[SUBSYSTEM::TORTURE_LOCAL]
+ADD_OBJ_FILES = \
+		torture/local/iconv.o
+REQUIRED_SUBSYSTEMS = \
+		LIBSMB
+# End SUBSYSTEM TORTURE_LOCAL
+#################################
+
 #################################
 # Start SUBSYSTEM TORTURE_NBENCH
 [SUBSYSTEM::TORTURE_NBENCH]
@@ -125,6 +135,7 @@ REQUIRED_SUBSYSTEMS = \
 		TORTURE_RPC \
 		TORTURE_RAP \
 		TORTURE_AUTH \
+		TORTURE_LOCAL \
 		TORTURE_NBENCH \
 		TORTURE_LDAP \
 		CONFIG \
diff --git a/source4/torture/local/iconv.c b/source4/torture/local/iconv.c
new file mode 100644
index 00000000000..0867be0beef
--- /dev/null
+++ b/source4/torture/local/iconv.c
@@ -0,0 +1,298 @@
+/* 
+   Unix SMB/CIFS implementation.
+
+   local testing of iconv routines. This tests the system iconv code against
+   the built-in iconv code
+
+   Copyright (C) Andrew Tridgell 2004
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+/*
+  generate a UTF-16LE buffer for a given unicode codepoint
+*/
+static int gen_codepoint(unsigned int codepoint,
+			  char *buf, size_t *size)
+{
+	static iconv_t cd;
+	uint8_t in[4];
+	char *ptr_in;
+	size_t size_in, size_out, ret;
+	if (!cd) {
+		cd = iconv_open("UTF-16LE", "UCS-4LE");
+	}
+
+	in[0] = codepoint & 0xFF;
+	in[1] = (codepoint>>8) & 0xFF;
+	in[2] = (codepoint>>16) & 0xFF;
+	in[3] = (codepoint>>24) & 0xFF;
+
+	ptr_in = in;
+	size_in = 4;
+	size_out = 8;
+
+	ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
+
+	*size = 8 - size_out;
+
+	return ret;
+}
+
+
+/*
+  work out the unicode codepoint of the first UTF-8 character in the buffer
+*/
+static unsigned int get_codepoint(char *buf, size_t size)
+{
+	static iconv_t cd;
+	uint8_t out[4];
+	char *ptr_out;
+	size_t size_out, size_in, ret;
+	if (!cd) {
+		cd = iconv_open("UCS-4LE", "UTF-8");
+	}
+
+	size_in = size;
+	ptr_out = out;
+	size_out = sizeof(out);
+	memset(out, 0, sizeof(out));
+
+	ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
+
+	return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
+}
+
+/*
+  display a buffer with name prefix
+*/
+static void show_buf(const char *name, uint8_t *buf, size_t size)
+{
+	int i;
+	printf("%s ", name);
+	for (i=0;i<size;i++) {
+		printf("%02x ", buf[i]);
+	}
+	printf("\n");
+}
+
+/*
+  given a UTF-16LE buffer, test the system and built-in iconv code
+  to make sure they do exactly the same thing in converting the buffer
+  to UTF-8, then convert it back again and ensure we get the same buffer
+  back
+*/
+static int test_buffer(uint8_t *inbuf, size_t size)
+{
+	uint8_t buf1[1000], buf2[1000], buf3[1000];
+	size_t outsize1, outsize2, outsize3;
+	const char *ptr_in;
+	char *ptr_out;
+	size_t size_in1, size_in2, size_in3;
+	size_t ret1, ret2, ret3, len1, len2;
+	int ok = 1;
+	int errno1, errno2;
+	static iconv_t cd;
+	static smb_iconv_t cd2, cd3;
+	
+	if (!cd) {
+		cd = iconv_open("UTF-8", "UTF-16LE");
+		cd2 = smb_iconv_open("UTF-8", "UTF-16LE");
+		cd3 = smb_iconv_open("UTF-16LE", "UTF-8");
+	}
+
+#if 0
+	int i;
+	for (i=0;i<50;i++) {
+		ptr_in = inbuf;
+		ptr_out = buf1;
+		size_in1 = size;
+		outsize1 = sizeof(buf1);
+
+		memset(ptr_out, 0, outsize1);
+		errno = 0;
+		ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+		errno1 = errno;
+	}
+#endif
+
+	ptr_in = inbuf;
+	ptr_out = buf1;
+	size_in1 = size;
+	outsize1 = sizeof(buf1);
+
+	memset(ptr_out, 0, outsize1);
+	errno = 0;
+	ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+	errno1 = errno;
+
+	ptr_in = inbuf;
+	ptr_out = buf2;
+	size_in2 = size;
+	outsize2 = sizeof(buf2);
+	
+	memset(ptr_out, 0, outsize2);
+	errno = 0;
+	ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
+	errno2 = errno;
+
+	len1 = sizeof(buf1) - outsize1;
+	len2 = sizeof(buf2) - outsize2;
+
+	/* codepoints above 1M are not interesting for now */
+	if (len2 > len1 && 
+	    memcmp(buf1, buf2, len1) == 0 && 
+	    get_codepoint(buf2+len1, len2-len1) >= (1<<20)) {
+		return ok;
+	}
+	if (len1 > len2 && 
+	    memcmp(buf1, buf2, len2) == 0 && 
+	    get_codepoint(buf1+len2, len1-len2) >= (1<<20)) {
+		return ok;
+	}
+
+	if (ret1 != ret2) {
+		printf("ret1=%d ret2=%d\n", ret1, ret2);
+		ok = 0;
+	}
+
+	if (errno1 != errno2) {
+		printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
+		show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
+		show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
+		ok = 0;
+	}
+	
+	if (outsize1 != outsize2) {
+		printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
+		       outsize1, outsize2);
+		ok = 0;
+	}
+	
+	if (size_in1 != size_in2) {
+		printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
+		       size_in1, size_in2);
+		ok = 0;
+	}
+
+	if (!ok ||
+	    len1 != len2 ||
+	    memcmp(buf1, buf2, len1) != 0) {
+		printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
+		show_buf(" IN1:", inbuf, size-size_in1);
+		show_buf(" IN2:", inbuf, size-size_in2);
+		show_buf("OUT1:", buf1, len1);
+		show_buf("OUT2:", buf2, len2);
+		if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
+			printf("next codepoint is %u\n", get_codepoint(buf2+len1, len2-len1));
+		}
+		if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
+			printf("next codepoint is %u\n", get_codepoint(buf1+len2,len1-len2));
+		}
+
+		ok = 0;
+	}
+	
+	if (!ok) return ok;
+
+	size = size - size_in1;
+	ptr_in = buf1;
+	ptr_out = buf3;
+	size_in3 = len1;
+	outsize3 = sizeof(buf3);
+
+	memset(ptr_out, 0, outsize3);
+	ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
+	
+	if (ret3 != 0) {
+		printf("pull failed - %s\n", strerror(errno));
+		ok = 0;
+	}
+	
+	if (outsize3 != sizeof(buf3) - size) {
+		printf("wrong outsize3 - %d should be %d\n", 
+		       outsize3, sizeof(buf3) - size);
+		ok = 0;
+	}
+	
+	if (memcmp(buf3, inbuf, size) != 0) {
+		int i;
+		printf("pull bytes mismatch:\n");
+		for (i=0;i<size;i++) {
+			printf("%02x ", inbuf[i]);
+		}
+		printf("\n");
+		for (i=0;i<size;i++) {
+			printf("%02x ", buf3[i]);
+		}
+		printf("\n");
+		ok = 0;
+	}
+
+	return ok;
+}
+
+BOOL torture_local_iconv(int dummy) 
+{
+	size_t size;
+	unsigned char inbuf[1000];
+	int ok = 1;
+	unsigned int codepoint, i, c;
+
+	srandom(time(NULL));
+
+	printf("Testing first 1M codepoints\n");
+	for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
+		if (gen_codepoint(codepoint, inbuf, &size) != 0) {
+			continue;
+		}
+
+		if (codepoint % 100 == 0) {
+			printf("codepoint=%u   \r", codepoint);
+		}
+
+		ok = test_buffer(inbuf, size);
+	}
+
+
+	printf("Testing 5M random UTF-16LE sequences\n");
+	for (i=0;ok && i<500000;i++) {
+		if (i % 100 == 0) {
+			printf("i=%u              \r", i);
+		}
+
+		size = random() % 100;
+		for (c=0;c<size;c++) {
+			if (random() % 100 < 80) {
+				inbuf[c] = random() % 128;
+			} else {
+				inbuf[c] = random();
+			}
+			if (random() % 10 == 0) {
+				inbuf[c] |= 0xd8;
+			}
+			if (random() % 10 == 0) {
+				inbuf[c] |= 0xdc;
+			}
+		}
+		ok = test_buffer(inbuf, size);
+	}
+
+	return ok == 1;
+}
+
+
diff --git a/source4/torture/torture.c b/source4/torture/torture.c
index 9488c987eca..e3a7d8e5d88 100644
--- a/source4/torture/torture.c
+++ b/source4/torture/torture.c
@@ -4212,8 +4212,9 @@ static struct {
 	{"RPC-MULTIBIND", torture_multi_bind, 0},
 	{"RPC-DRSUAPI", torture_rpc_drsuapi, 0},
 
-	/* crypto testers */
-	{"CRYPT-NTLMSSP", torture_ntlmssp_self_check, 0},
+	/* local (no server) testers */
+	{"LOCAL-NTLMSSP", torture_ntlmssp_self_check, 0},
+	{"LOCAL-ICONV", torture_local_iconv, 0},
 
 	/* ldap testers */
 	{"LDAP-BASIC", torture_ldap_basic, 0},