From f23acb4ca5feac8ad2acfa1baf7df31283aba3ea Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Sat, 13 Sep 2003 22:41:21 +0000 Subject: [PATCH] Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS ! (rant off :-). Inspired by work from Benjamin Riefenstahl . Also add MacOSX/Darwin configure fixes. Jerry - can we put this in 3.0 release ? :-). Jeremy. --- source/configure.in | 29 ++++++++++++++++- source/lib/charcnv.c | 75 +++++++++++++++++++++++++++++-------------- source/lib/util_str.c | 15 +++++++-- 3 files changed, 91 insertions(+), 28 deletions(-) diff --git a/source/configure.in b/source/configure.in index a2e04b5d480..10f1b2f7fc5 100644 --- a/source/configure.in +++ b/source/configure.in @@ -402,7 +402,6 @@ case "$host_os" in *freebsd*) AC_DEFINE(FREEBSD, 1, [Whether the host os is FreeBSD]) ;; - # # VOS may need to have POSIX support and System V compatibility enabled. # @@ -503,6 +502,26 @@ main() { AC_MSG_RESULT([$LINUX_LFS_SUPPORT]) ;; +# +# MacOS X is the *only* system that uses compose character in utf8. This +# is so horribly broken.... +# + *darwin*) + AC_DEFINE(BROKEN_UNICODE_COMPOSE_CHARACTERS, 1, [Does this system use unicode compose characters]) +# Add Fink directories for various packages, like dlcompat. +# Note: iconv does that explicitly below, but other packages +# don't. + CPPFLAGS="$CPPFLAGS -I/sw/include" + LDFLAGS="$LDFLAGS -L/sw/lib" + +# If we have dlsym_prepend_underscore (from Fink's dlcompat), +# use that instead of plain dlsym. + + AC_CHECK_LIB(dl,dlopen) + AC_CHECK_FUNCS(dlsym_prepend_underscore, + [CPPFLAGS="$CPPFLAGS -Ddlsym=dlsym_prepend_underscore"]) + + ;; *hurd*) AC_MSG_CHECKING([for LFS support]) old_CPPFLAGS="$CPPFLAGS" @@ -1162,6 +1181,14 @@ if test "$enable_shared" = "yes"; then BLDSHARED="false" LDSHFLAGS="" ;; + + darwin*) AC_DEFINE(DARWINOS,1,[Whether the host os is Darwin/MacOSX]) + BLDSHARED="true" + LDSHFLAGS="-bundle -flat_namespace -undefined suppress" + SHLIBEXT="dylib" + AC_DEFINE(STAT_ST_BLOCKSIZE,512) + ;; + *) AC_DEFINE(STAT_ST_BLOCKSIZE,512) ;; diff --git a/source/lib/charcnv.c b/source/lib/charcnv.c index e8ae40dbe29..dafc88fb77a 100644 --- a/source/lib/charcnv.c +++ b/source/lib/charcnv.c @@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to, descriptor = conv_handles[from][to]; + if (srclen == (size_t)-1) { + if (from == CH_UCS2) { + srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; + } else { + srclen = strlen((const char *)src)+1; + } + } + if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) DEBUG(0,("convert_string_internal: Conversion not supported.\n")); @@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to, void const *src, size_t srclen, void *dest, size_t destlen) { + /* + * NB. We deliberately don't do a strlen here is srclen == -1. + * This is very expensive over millions of calls and is taken + * care of in the slow path in convert_string_internal. JRA. + */ + if (srclen == 0) return 0; if (from != CH_UCS2 && to != CH_UCS2) { const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; size_t retval = 0; /* If all characters are ascii, fast path here. */ - while (srclen && destlen) { + while (slen && dlen) { if ((lastp = *p) <= 0x7f) { *q++ = *p++; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while ((srclen >= 2) && destlen) { + while ((slen >= 2) && dlen) { if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { *q++ = *p; - if (srclen != (size_t)-1) { - srclen -= 2; + if (slen != (size_t)-1) { + slen -= 2; } p += 2; - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = (strlen_w((const smb_ucs2_t *)p)+1) * 2; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while (srclen && (destlen >= 2)) { + while (slen && (dlen >= 2)) { if ((lastp = *p) <= 0x7F) { *q++ = *p++; *q++ = '\0'; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen -= 2; + dlen -= 2; retval += 2; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; } + +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + general_case: +#endif return convert_string_internal(from, to, src, srclen, dest, destlen); } diff --git a/source/lib/util_str.c b/source/lib/util_str.c index c065bfe9db6..15ac1639a9a 100644 --- a/source/lib/util_str.c +++ b/source/lib/util_str.c @@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc) return; /* Slow (mb) path. */ +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + p = s; +#endif push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE); string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc)); pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE); @@ -1175,26 +1179,31 @@ char *string_truncate(char *s, unsigned int length) We convert via ucs2 for now. **/ -char *strchr_m(const char *s, char c) +char *strchr_m(const char *src, char c) { wpstring ws; pstring s2; smb_ucs2_t *p; + const char *s; /* this is quite a common operation, so we want it to be fast. We optimise for the ascii case, knowing that all our supported multi-byte character sets are ascii-compatible (ie. they match for the first 128 chars) */ - while (*s && !(((unsigned char)s[0]) & 0x80)) { + for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { if (*s == c) return s; - s++; } if (!*s) return NULL; +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + s = src; +#endif + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); p = strchr_w(ws, UCS2_CHAR(c)); if (!p)