1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00

s3-charcnv: Move convert_string() et al to lib/util/charset

This is the first step to this being the common convert_string
implementation.

Andrew Bartlett

Signed-off-by: Andrew Tridgell <tridge@samba.org>
This commit is contained in:
Andrew Bartlett 2011-04-12 14:01:41 +10:00 committed by Andrew Tridgell
parent bf431fbedb
commit 4158e9a7e5
5 changed files with 471 additions and 447 deletions

View File

@ -0,0 +1,467 @@
/*
Unix SMB/CIFS implementation.
Character set conversion Extensions
Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
Copyright (C) Andrew Tridgell 2001
Copyright (C) Simo Sorce 2001
Copyright (C) Martin Pool 2003
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
/**
* @file
*
* @brief Character-set conversion routines built on our iconv.
*
* @note Samba's internal character set (at least in the 3.0 series)
* is always the same as the one for the Unix filesystem. It is
* <b>not</b> necessarily UTF-8 and may be different on machines that
* need i18n filenames to be compatible with Unix software. It does
* have to be a superset of ASCII. All multibyte sequences must start
* with a byte with the high bit set.
*
* @sa lib/iconv.c
*/
/**
* Convert string from one encoding to another, making error checking etc
* Slow path version - uses (slow) iconv.
*
* @param src pointer to source string (multibyte or singlebyte)
* @param srclen length of the source string in bytes
* @param dest pointer to destination string (multibyte or singlebyte)
* @param destlen maximal length allowed for string
* @param converted size is the number of bytes occupied in the destination
*
* @returns false and sets errno on fail, true on success.
*
* Ensure the srclen contains the terminating zero.
*
**/
static bool convert_string_internal(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen, size_t *converted_size)
{
size_t i_len, o_len;
size_t retval;
const char* inbuf = (const char*)src;
char* outbuf = (char*)dest;
smb_iconv_t descriptor;
struct smb_iconv_handle *ic;
lazy_initialize_conv();
ic = get_iconv_handle();
descriptor = get_conv_handle(ic, from, to);
if (srclen == (size_t)-1) {
if (from == CH_UTF16LE || from == CH_UTF16BE) {
srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
} else {
srclen = strlen((const char *)src)+1;
}
}
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
errno = EINVAL;
return false;
}
i_len=srclen;
o_len=destlen;
retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
if (retval == (size_t)-1) {
return false;
}
*converted_size = destlen-o_len;
return true;
}
/**
* Convert string from one encoding to another, making error checking etc
* Fast path version - handles ASCII first.
*
* @param src pointer to source string (multibyte or singlebyte)
* @param srclen length of the source string in bytes, or -1 for nul terminated.
* @param dest pointer to destination string (multibyte or singlebyte)
* @param destlen maximal length allowed for string - *NEVER* -1.
* @param converted size is the number of bytes occupied in the destination
*
* @returns false and sets errno on fail, true on success.
*
* Ensure the srclen contains the terminating zero.
*
* This function has been hand-tuned to provide a fast path.
* Don't change unless you really know what you are doing. JRA.
**/
bool convert_string_error(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen,
size_t *converted_size)
{
/*
* NB. We deliberately don't do a strlen here if srclen == -1.
* This is very expensive over millions of calls and is taken
* care of in the slow path in convert_string_internal. JRA.
*/
#ifdef DEVELOPER
SMB_ASSERT(destlen != (size_t)-1);
#endif
if (srclen == 0) {
*converted_size = 0;
return true;
}
if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
size_t retval = 0;
/* If all characters are ascii, fast path here. */
while (slen && dlen) {
if ((lastp = *p) <= 0x7f) {
*q++ = *p++;
if (slen != (size_t)-1) {
slen--;
}
dlen--;
retval++;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
/* If all characters are ascii, fast path here. */
while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
*q++ = *p;
if (slen != (size_t)-1) {
slen -= 2;
}
p += 2;
dlen--;
retval++;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
/* If all characters are ascii, fast path here. */
while (slen && (dlen >= 2)) {
if ((lastp = *p) <= 0x7F) {
*q++ = *p++;
*q++ = '\0';
if (slen != (size_t)-1) {
slen--;
}
dlen -= 2;
retval += 2;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
}
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
general_case:
#endif
return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
}
bool convert_string(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen,
size_t *converted_size)
{
bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
if(ret==false) {
const char *reason="unknown error";
switch(errno) {
case EINVAL:
reason="Incomplete multibyte sequence";
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
case E2BIG:
{
struct smb_iconv_handle *ic;
lazy_initialize_conv();
ic = get_iconv_handle();
reason="No more room";
if (from == CH_UNIX) {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
} else {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen));
}
break;
}
case EILSEQ:
reason="Illegal multibyte sequence";
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
default:
DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
}
/* smb_panic(reason); */
}
return ret;
}
/**
* Convert between character sets, allocating a new buffer using talloc for the result.
*
* @param srclen length of source buffer.
* @param dest always set at least to NULL
* @parm converted_size set to the number of bytes occupied by the string in
* the destination on success.
* @note -1 is not accepted for srclen.
*
* @return true if new buffer was correctly allocated, and string was
* converted.
*
* Ensure the srclen contains the terminating zero.
*
* I hate the goto's in this function. It's embarressing.....
* There has to be a cleaner way to do this. JRA.
*/
bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
void const *src, size_t srclen, void *dst,
size_t *converted_size)
{
size_t i_len, o_len, destlen = (srclen * 3) / 2;
size_t retval;
const char *inbuf = (const char *)src;
char *outbuf = NULL, *ob = NULL;
smb_iconv_t descriptor;
void **dest = (void **)dst;
struct smb_iconv_handle *ic;
*dest = NULL;
if (src == NULL || srclen == (size_t)-1) {
errno = EINVAL;
return false;
}
if (srclen == 0) {
/* We really should treat this as an error, but
there are too many callers that need this to
return a NULL terminated string in the correct
character set. */
if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
destlen = 2;
} else {
destlen = 1;
}
ob = talloc_zero_array(ctx, char, destlen);
if (ob == NULL) {
errno = ENOMEM;
return false;
}
*converted_size = destlen;
*dest = ob;
return true;
}
lazy_initialize_conv();
ic = get_iconv_handle();
descriptor = get_conv_handle(ic, from, to);
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
errno = EOPNOTSUPP;
return false;
}
convert:
/* +2 is for ucs2 null termination. */
if ((destlen*2)+2 < destlen) {
/* wrapped ! abort. */
DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
TALLOC_FREE(outbuf);
errno = EOPNOTSUPP;
return false;
} else {
destlen = destlen * 2;
}
/* +2 is for ucs2 null termination. */
ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
if (!ob) {
DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
errno = ENOMEM;
return false;
}
outbuf = ob;
i_len = srclen;
o_len = destlen;
retval = smb_iconv(descriptor,
&inbuf, &i_len,
&outbuf, &o_len);
if(retval == (size_t)-1) {
const char *reason="unknown error";
switch(errno) {
case EINVAL:
reason="Incomplete multibyte sequence";
DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
break;
case E2BIG:
goto convert;
case EILSEQ:
reason="Illegal multibyte sequence";
DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
break;
}
DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
/* smb_panic(reason); */
TALLOC_FREE(ob);
return false;
}
destlen = destlen - o_len;
/* Don't shrink unless we're reclaiming a lot of
* space. This is in the hot codepath and these
* reallocs *cost*. JRA.
*/
if (o_len > 1024) {
/* We're shrinking here so we know the +2 is safe from wrap. */
ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
}
if (destlen && !ob) {
DEBUG(0, ("convert_string_talloc: out of memory!\n"));
errno = ENOMEM;
return false;
}
*dest = ob;
/* Must ucs2 null terminate in the extra space we allocated. */
ob[destlen] = '\0';
ob[destlen+1] = '\0';
/* Ensure we can never return a *converted_size of zero. */
if (destlen == 0) {
/* As we're now returning false on a bad smb_iconv call,
this should never happen. But be safe anyway. */
if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
destlen = 2;
} else {
destlen = 1;
}
}
*converted_size = destlen;
return true;
}

View File

@ -442,7 +442,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
lib/util_transfer_file.o ../lib/async_req/async_sock.o \
lib/addrchange.o \
$(TDB_LIB_OBJ) \
$(VERSION_OBJ) lib/charcnv.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
$(VERSION_OBJ) lib/charcnv.o ../lib/util/charset/convert_string.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
lib/interface.o lib/pidfile.o lib/dumpcore.o \
lib/system.o lib/sendfile.o lib/recvfile.o lib/time.o \
lib/username.o \

View File

@ -72,6 +72,7 @@ int bitmap_find(struct bitmap *bm, unsigned ofs);
/* The following definitions come from lib/charcnv.c */
void lazy_initialize_conv(void);
void gfree_charcnv(void);
void init_iconv(void);
bool convert_string(charset_t from, charset_t to,

View File

@ -22,25 +22,9 @@
*/
#include "includes.h"
/**
* @file
*
* @brief Character-set conversion routines built on our iconv.
*
* @note Samba's internal character set (at least in the 3.0 series)
* is always the same as the one for the Unix filesystem. It is
* <b>not</b> necessarily UTF-8 and may be different on machines that
* need i18n filenames to be compatible with Unix software. It does
* have to be a superset of ASCII. All multibyte sequences must start
* with a byte with the high bit set.
*
* @sa lib/iconv.c
*/
static bool initialized;
static void lazy_initialize_conv(void)
void lazy_initialize_conv(void)
{
if (!initialized) {
load_case_tables_library();
@ -72,434 +56,6 @@ void init_iconv(void)
true, global_iconv_handle);
}
/**
* Convert string from one encoding to another, making error checking etc
* Slow path version - uses (slow) iconv.
*
* @param src pointer to source string (multibyte or singlebyte)
* @param srclen length of the source string in bytes
* @param dest pointer to destination string (multibyte or singlebyte)
* @param destlen maximal length allowed for string
* @param converted size is the number of bytes occupied in the destination
*
* @returns false and sets errno on fail, true on success.
*
* Ensure the srclen contains the terminating zero.
*
**/
static bool convert_string_internal(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen, size_t *converted_size)
{
size_t i_len, o_len;
size_t retval;
const char* inbuf = (const char*)src;
char* outbuf = (char*)dest;
smb_iconv_t descriptor;
struct smb_iconv_handle *ic;
lazy_initialize_conv();
ic = get_iconv_handle();
descriptor = get_conv_handle(ic, from, to);
if (srclen == (size_t)-1) {
if (from == CH_UTF16LE || from == CH_UTF16BE) {
srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
} else {
srclen = strlen((const char *)src)+1;
}
}
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
errno = EINVAL;
return false;
}
i_len=srclen;
o_len=destlen;
retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
if (retval == (size_t)-1) {
return false;
}
*converted_size = destlen-o_len;
return true;
}
/**
* Convert string from one encoding to another, making error checking etc
* Fast path version - handles ASCII first.
*
* @param src pointer to source string (multibyte or singlebyte)
* @param srclen length of the source string in bytes, or -1 for nul terminated.
* @param dest pointer to destination string (multibyte or singlebyte)
* @param destlen maximal length allowed for string - *NEVER* -1.
* @param converted size is the number of bytes occupied in the destination
*
* @returns false and sets errno on fail, true on success.
*
* Ensure the srclen contains the terminating zero.
*
* This function has been hand-tuned to provide a fast path.
* Don't change unless you really know what you are doing. JRA.
**/
bool convert_string_error(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen,
size_t *converted_size)
{
/*
* NB. We deliberately don't do a strlen here if srclen == -1.
* This is very expensive over millions of calls and is taken
* care of in the slow path in convert_string_internal. JRA.
*/
#ifdef DEVELOPER
SMB_ASSERT(destlen != (size_t)-1);
#endif
if (srclen == 0) {
*converted_size = 0;
return true;
}
if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
size_t retval = 0;
/* If all characters are ascii, fast path here. */
while (slen && dlen) {
if ((lastp = *p) <= 0x7f) {
*q++ = *p++;
if (slen != (size_t)-1) {
slen--;
}
dlen--;
retval++;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
/* If all characters are ascii, fast path here. */
while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
*q++ = *p;
if (slen != (size_t)-1) {
slen -= 2;
}
p += 2;
dlen--;
retval++;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
/* If all characters are ascii, fast path here. */
while (slen && (dlen >= 2)) {
if ((lastp = *p) <= 0x7F) {
*q++ = *p++;
*q++ = '\0';
if (slen != (size_t)-1) {
slen--;
}
dlen -= 2;
retval += 2;
if (!lastp)
break;
} else {
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
*converted_size += retval;
return ret;
#endif
}
}
*converted_size = retval;
if (!dlen) {
/* Even if we fast path we should note if we ran out of room. */
if (((slen != (size_t)-1) && slen) ||
((slen == (size_t)-1) && lastp)) {
errno = E2BIG;
return false;
}
}
return true;
}
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
general_case:
#endif
return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
}
bool convert_string(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen,
size_t *converted_size)
{
bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
if(ret==false) {
const char *reason="unknown error";
switch(errno) {
case EINVAL:
reason="Incomplete multibyte sequence";
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
case E2BIG:
{
struct smb_iconv_handle *ic;
lazy_initialize_conv();
ic = get_iconv_handle();
reason="No more room";
if (from == CH_UNIX) {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
} else {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen));
}
break;
}
case EILSEQ:
reason="Illegal multibyte sequence";
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
default:
DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
reason, (const char *)src));
break;
}
/* smb_panic(reason); */
}
return ret;
}
/**
* Convert between character sets, allocating a new buffer using talloc for the result.
*
* @param srclen length of source buffer.
* @param dest always set at least to NULL
* @parm converted_size set to the number of bytes occupied by the string in
* the destination on success.
* @note -1 is not accepted for srclen.
*
* @return true if new buffer was correctly allocated, and string was
* converted.
*
* Ensure the srclen contains the terminating zero.
*
* I hate the goto's in this function. It's embarressing.....
* There has to be a cleaner way to do this. JRA.
*/
bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
void const *src, size_t srclen, void *dst,
size_t *converted_size)
{
size_t i_len, o_len, destlen = (srclen * 3) / 2;
size_t retval;
const char *inbuf = (const char *)src;
char *outbuf = NULL, *ob = NULL;
smb_iconv_t descriptor;
void **dest = (void **)dst;
struct smb_iconv_handle *ic;
*dest = NULL;
if (src == NULL || srclen == (size_t)-1) {
errno = EINVAL;
return false;
}
if (srclen == 0) {
/* We really should treat this as an error, but
there are too many callers that need this to
return a NULL terminated string in the correct
character set. */
if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
destlen = 2;
} else {
destlen = 1;
}
ob = talloc_zero_array(ctx, char, destlen);
if (ob == NULL) {
errno = ENOMEM;
return false;
}
*converted_size = destlen;
*dest = ob;
return true;
}
lazy_initialize_conv();
ic = get_iconv_handle();
descriptor = get_conv_handle(ic, from, to);
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
errno = EOPNOTSUPP;
return false;
}
convert:
/* +2 is for ucs2 null termination. */
if ((destlen*2)+2 < destlen) {
/* wrapped ! abort. */
DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
TALLOC_FREE(outbuf);
errno = EOPNOTSUPP;
return false;
} else {
destlen = destlen * 2;
}
/* +2 is for ucs2 null termination. */
ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
if (!ob) {
DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
errno = ENOMEM;
return false;
}
outbuf = ob;
i_len = srclen;
o_len = destlen;
retval = smb_iconv(descriptor,
&inbuf, &i_len,
&outbuf, &o_len);
if(retval == (size_t)-1) {
const char *reason="unknown error";
switch(errno) {
case EINVAL:
reason="Incomplete multibyte sequence";
DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
break;
case E2BIG:
goto convert;
case EILSEQ:
reason="Illegal multibyte sequence";
DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
break;
}
DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
/* smb_panic(reason); */
TALLOC_FREE(ob);
return false;
}
destlen = destlen - o_len;
/* Don't shrink unless we're reclaiming a lot of
* space. This is in the hot codepath and these
* reallocs *cost*. JRA.
*/
if (o_len > 1024) {
/* We're shrinking here so we know the +2 is safe from wrap. */
ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
}
if (destlen && !ob) {
DEBUG(0, ("convert_string_talloc: out of memory!\n"));
errno = ENOMEM;
return false;
}
*dest = ob;
/* Must ucs2 null terminate in the extra space we allocated. */
ob[destlen] = '\0';
ob[destlen+1] = '\0';
/* Ensure we can never return a *converted_size of zero. */
if (destlen == 0) {
/* As we're now returning false on a bad smb_iconv call,
this should never happen. But be safe anyway. */
if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
destlen = 2;
} else {
destlen = 1;
}
}
*converted_size = destlen;
return true;
}
bool unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
size_t size;

View File

@ -963,7 +963,7 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap3',
vars=locals())
bld.SAMBA3_SUBSYSTEM('CHARSET3',
source='''lib/util_str.c lib/charcnv.c lib/fstring.c''',
source='''lib/util_str.c lib/charcnv.c ../lib/util/charset/convert_string.c lib/fstring.c''',
public_deps='ICONV_WRAPPER CODEPOINTS',
deps='DYNCONFIG')