1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-26 10:04:02 +03:00

Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS !

(rant off :-). Inspired by work from Benjamin Riefenstahl <Benjamin.Riefenstahl@epost.de>.
Also add MacOSX/Darwin configure fixes.
Jerry - can we put this in 3.0 release ? :-).
Jeremy.
This commit is contained in:
Jeremy Allison -
parent f23c9d36b0
commit f23acb4ca5
3 changed files with 91 additions and 28 deletions

View File

@ -402,7 +402,6 @@ case "$host_os" in
*freebsd*)
AC_DEFINE(FREEBSD, 1, [Whether the host os is FreeBSD])
;;
#
# VOS may need to have POSIX support and System V compatibility enabled.
#
@ -503,6 +502,26 @@ main() {
AC_MSG_RESULT([$LINUX_LFS_SUPPORT])
;;
#
# MacOS X is the *only* system that uses compose character in utf8. This
# is so horribly broken....
#
*darwin*)
AC_DEFINE(BROKEN_UNICODE_COMPOSE_CHARACTERS, 1, [Does this system use unicode compose characters])
# Add Fink directories for various packages, like dlcompat.
# Note: iconv does that explicitly below, but other packages
# don't.
CPPFLAGS="$CPPFLAGS -I/sw/include"
LDFLAGS="$LDFLAGS -L/sw/lib"
# If we have dlsym_prepend_underscore (from Fink's dlcompat),
# use that instead of plain dlsym.
AC_CHECK_LIB(dl,dlopen)
AC_CHECK_FUNCS(dlsym_prepend_underscore,
[CPPFLAGS="$CPPFLAGS -Ddlsym=dlsym_prepend_underscore"])
;;
*hurd*)
AC_MSG_CHECKING([for LFS support])
old_CPPFLAGS="$CPPFLAGS"
@ -1162,6 +1181,14 @@ if test "$enable_shared" = "yes"; then
BLDSHARED="false"
LDSHFLAGS=""
;;
darwin*) AC_DEFINE(DARWINOS,1,[Whether the host os is Darwin/MacOSX])
BLDSHARED="true"
LDSHFLAGS="-bundle -flat_namespace -undefined suppress"
SHLIBEXT="dylib"
AC_DEFINE(STAT_ST_BLOCKSIZE,512)
;;
*)
AC_DEFINE(STAT_ST_BLOCKSIZE,512)
;;

View File

@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to,
descriptor = conv_handles[from][to];
if (srclen == (size_t)-1) {
if (from == CH_UCS2) {
srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
} else {
srclen = strlen((const char *)src)+1;
}
}
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
if (!conv_silent)
DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to,
void const *src, size_t srclen,
void *dest, size_t destlen)
{
/*
* NB. We deliberately don't do a strlen here is srclen == -1.
* This is very expensive over millions of calls and is taken
* care of in the slow path in convert_string_internal. JRA.
*/
if (srclen == 0)
return 0;
if (from != CH_UCS2 && to != CH_UCS2) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp;
size_t retval = 0;
/* If all characters are ascii, fast path here. */
while (srclen && destlen) {
while (slen && dlen) {
if ((lastp = *p) <= 0x7f) {
*q++ = *p++;
if (srclen != (size_t)-1) {
srclen--;
if (slen != (size_t)-1) {
slen--;
}
destlen--;
dlen--;
retval++;
if (!lastp)
break;
} else {
if (srclen == (size_t)-1) {
srclen = strlen(p)+1;
}
return retval + convert_string_internal(from, to, p, srclen, q, destlen);
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
return retval + convert_string_internal(from, to, p, slen, q, dlen);
#endif
}
}
return retval;
@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to,
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp;
/* If all characters are ascii, fast path here. */
while ((srclen >= 2) && destlen) {
while ((slen >= 2) && dlen) {
if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
*q++ = *p;
if (srclen != (size_t)-1) {
srclen -= 2;
if (slen != (size_t)-1) {
slen -= 2;
}
p += 2;
destlen--;
dlen--;
retval++;
if (!lastp)
break;
} else {
if (srclen == (size_t)-1) {
srclen = (strlen_w((const smb_ucs2_t *)p)+1) * 2;
}
return retval + convert_string_internal(from, to, p, srclen, q, destlen);
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
return retval + convert_string_internal(from, to, p, slen, q, dlen);
#endif
}
}
return retval;
@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to,
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
size_t retval = 0;
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp;
/* If all characters are ascii, fast path here. */
while (srclen && (destlen >= 2)) {
while (slen && (dlen >= 2)) {
if ((lastp = *p) <= 0x7F) {
*q++ = *p++;
*q++ = '\0';
if (srclen != (size_t)-1) {
srclen--;
if (slen != (size_t)-1) {
slen--;
}
destlen -= 2;
dlen -= 2;
retval += 2;
if (!lastp)
break;
} else {
if (srclen == (size_t)-1) {
srclen = strlen(p)+1;
}
return retval + convert_string_internal(from, to, p, srclen, q, destlen);
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
goto general_case;
#else
return retval + convert_string_internal(from, to, p, slen, q, dlen);
#endif
}
}
return retval;
}
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
general_case:
#endif
return convert_string_internal(from, to, src, srclen, dest, destlen);
}

View File

@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc)
return;
/* Slow (mb) path. */
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
/* With compose characters we must restart from the beginning. JRA. */
p = s;
#endif
push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE);
string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc));
pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE);
@ -1175,26 +1179,31 @@ char *string_truncate(char *s, unsigned int length)
We convert via ucs2 for now.
**/
char *strchr_m(const char *s, char c)
char *strchr_m(const char *src, char c)
{
wpstring ws;
pstring s2;
smb_ucs2_t *p;
const char *s;
/* this is quite a common operation, so we want it to be
fast. We optimise for the ascii case, knowing that all our
supported multi-byte character sets are ascii-compatible
(ie. they match for the first 128 chars) */
while (*s && !(((unsigned char)s[0]) & 0x80)) {
for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
if (*s == c)
return s;
s++;
}
if (!*s)
return NULL;
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
/* With compose characters we must restart from the beginning. JRA. */
s = src;
#endif
push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE);
p = strchr_w(ws, UCS2_CHAR(c));
if (!p)