samba-mirror/charset_macosxfs.c at ac5f0785c83d971a043cc99369dd491bee2be302

mirror of https://github.com/samba-team/samba.git synced 2024-12-25 23:21:54 +03:00

Jeremy Allison b1b4d67f65 Patch from Benjamin Riefenstahl <Benjamin.Riefenstahl@epost.de> to add

MacOSX (Darwin) specific charset module code. Also had to add AC_CHECK_CPP
to configure.in (this took a *long* time to track down) to make autoconf
work correctly on Fedora Core 1.
Jeremy.
(This used to be commit a571194342)

2003-11-26 20:58:51 +00:00

603 lines

16 KiB

C

Raw Blame History

 /*
    Unix SMB/CIFS implementation.
    Samba charset module for Mac OS X/Darwin
    Copyright (C) Benjamin Riefenstahl 2003
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 /*
  * modules/charset_macosxfs.c
  *
  * A Samba charset module to use on Mac OS X/Darwin as the filesystem
  * and display encoding.
  *
  * Actually two implementations are provided here.  The default
  * implementation is based on the official CFString API.  The other is
  * based on internal CFString APIs as defined in the OpenDarwin
  * source.
  */
 #include "includes.h"
 /*
  * Include OS frameworks.  These are only needed in this module.
  */
 #include <CoreFoundation/CFString.h>
 /*
  * See if autoconf has found us the internal headers in some form.
  */
 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
 #	include <Corefoundation/CFStringEncodingConverter.h>
 #	include <Corefoundation/CFUnicodePrecomposition.h>
 #	define USE_INTERNAL_API 1
 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
 #	include <CFStringEncodingConverter.h>
 #	include <CFUnicodePrecomposition.h>
 #	define USE_INTERNAL_API 1
 #endif
 /*
  * Compile time configuration: Do we want debug output?
  */
 /* #define DEBUG_STRINGS 1 */
 /*
  * A simple, but efficient memory provider for our buffers.
  */
 static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
 {
 	if (newsize > *size) {
 		*size = newsize + 128;
 		buffer = realloc(buffer, *size);
 	}
 	return buffer;
 }
 /*
  * While there is a version of OpenDarwin for intel, the usual case is
  * big-endian PPC.  So we need byte swapping to handle the
  * little-endian byte order of the network protocol.  We also need an
  * additional dynamic buffer to do this work for incoming data blocks,
  * because we have to consider the original data as constant.
  *
  * We abstract the differences away by providing a simple facade with
  * these functions/macros:
  *
  *	le_to_native(dst,src,len)
  *	native_to_le(cp,len)
  *	set_ucbuffer_with_le(buffer,bufsize,data,size)
  *	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
  */
 #ifdef WORDS_BIGENDIAN
 static inline void swap_bytes (char * dst, const char * src, size_t len)
 {
 	const char *srcend = src + len;
 	while (src < srcend) {
 		dst[0] = src[1];
 		dst[1] = src[0];
 		dst += 2;
 		src += 2;
 	}
 }
 static inline void swap_bytes_inplace (char * cp, size_t len)
 {
 	char temp;
 	char *end = cp + len;
 	while (cp  < end) {
 		temp = cp[1];
 		cp[1] = cp[0];
 		cp[0] = temp;
 		cp += 2;
 	}
 }
 #define le_to_native(dst,src,len)	swap_bytes(dst,src,len)
 #define native_to_le(cp,len)		swap_bytes_inplace(cp,len)
 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
 	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
 #else	/* ! WORDS_BIGENDIAN */
 #define le_to_native(dst,src,len)	memcpy(dst,src,len)
 #define native_to_le(cp,len)		/* nothing */
 #define	set_ucbuffer_with_le(buffer,bufsize,data,size) \
 	(((void)(bufsize)),(UniChar*)(data))
 #endif
 static inline UniChar *set_ucbuffer_with_le_copy (
 	UniChar *buffer, size_t *bufsize,
 	const void *data, size_t size, size_t reserve)
 {
 	buffer = resize_buffer(buffer, bufsize, size+reserve);
 	le_to_native((char*)buffer,data,size);
 	return buffer;
 }
 /*
  * A simple hexdump function for debugging error conditions.
  */
 #define	debug_out(s)	DEBUG(0,(s))
 #ifdef DEBUG_STRINGS
 static void hexdump( const char * label, const char * s, size_t len )
 {
 	size_t restlen = len;
 	debug_out("<<<<<<<\n");
 	debug_out(label);
 	debug_out("\n");
 	while (restlen > 0) {
 		char line[100];
 		size_t i, j;
 		char * d = line;
 #undef sprintf
 		d += sprintf(d, "%04X ", (unsigned)(len-restlen));
 		*d++ = ' ';
 		for( i = 0; i<restlen && i<8; ++i ) {
 			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
 		}
 		for( j = i; j<8; ++j ) {
 			d += sprintf(d, "   ");
 		}
 		*d++ = ' ';
 		for( i = 8; i<restlen && i<16; ++i ) {
 			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
 		}
 		for( j = i; j<16; ++j ) {
 			d += sprintf(d, "   ");
 		}
 		*d++ = ' ';
 		for( i = 0; i<restlen && i<16; ++i ) {
 			if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
 				*d++ = '.';
 			else
 				*d++ = s[i];
 		}
 		*d++ = '\n';
 		*d = 0;
 		restlen -= i;
 		s += i;
 		debug_out(line);
 	}
 	debug_out(">>>>>>>\n");
 }
 #else	/* !DEBUG_STRINGS */
 #define hexdump(label,s,len) /* nothing */
 #endif
 #if !USE_INTERNAL_API
 /*
  * An implementation based on documented Mac OS X APIs.
  *
  * This does a certain amount of memory management, creating and
  * manipulating CFString objects.  We try to minimize the impact by
  * keeping those objects around and re-using them.  We also use
  * external backing store for the CFStrings where this is possible and
  * benficial.
  *
  * The Unicode normalizations forms available at this level are
  * generic, not specifically for the file system.  So they may not be
  * perfect fits.
  */
 static size_t macosxfs_encoding_pull(
 	void *cd,				/* Encoder handle */
 	char **inbuf, size_t *inbytesleft,	/* Script string */
 	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
 {
 	static const int script_code = kCFStringEncodingUTF8;
 	static CFMutableStringRef cfstring = NULL;
 	size_t outsize;
 	CFRange range;
 	(void) cd; /* UNUSED */
 	if (0 == *inbytesleft) {
 		return 0;
 	}
 	if (NULL == cfstring) {
 		/*
 		 * A version with an external backing store as in the
 		 * push function should have been more efficient, but
 		 * testing shows, that it is actually slower (!).
 		 * Maybe kCFAllocatorDefault gets shortcut evaluation
 		 * internally, while kCFAllocatorNull doesn't.
 		 */
 		cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
 	}
 	/*
 	 * Three methods of appending to a CFString, choose the most
 	 * efficient.
 	 */
 	if (0 == (*inbuf)[*inbytesleft-1]) {
 		CFStringAppendCString(cfstring, *inbuf, script_code);
 	} else if (*inbytesleft <= 255) {
 		Str255 buffer;
 		buffer[0] = *inbytesleft;
 		memcpy(buffer+1, *inbuf, buffer[0]);
 		CFStringAppendPascalString(cfstring, buffer, script_code);
 	} else {
 		/*
 		 * We would like to use a fixed buffer and a loop
 		 * here, but than we can't garantee that the input is
 		 * well-formed UTF-8, as we are supposed to do.
 		 */
 		static char *buffer = NULL;
 		static size_t buflen = 0;
 		buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
 		memcpy(buffer, *inbuf, *inbytesleft);
 		buffer[*inbytesleft] = 0;
 		CFStringAppendCString(cfstring, *inbuf, script_code);
 	}
 	/*
 	 * Compose characters, using the non-canonical composition
 	 * form.
 	 */
 	CFStringNormalize(cfstring, kCFStringNormalizationFormC);
 	outsize = CFStringGetLength(cfstring);
 	range = CFRangeMake(0,outsize);
 	if (outsize == 0) {
 		/*
 		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
 		 * errors here.  That function will always pass 2
 		 * characters.  smbd/open.c:check_for_pipe() cuts a
 		 * patchname to 10 characters blindly.  Suppress the
 		 * debug output in those cases.
 		 */
 		if(2 != *inbytesleft && 10 != *inbytesleft) {
 			debug_out("String conversion: "
 				  "An unknown error occurred\n");
 			hexdump("UTF8->UTF16LE (old) input",
 				*inbuf, *inbytesleft);
 		}
 		errno = EILSEQ; /* Not sure, but this is what we have
 				 * actually seen. */
 		return -1;
 	}
 	if (outsize*2 > *outbytesleft) {
 		CFStringDelete(cfstring, range);
 		debug_out("String conversion: "
 			  "Output buffer too small\n");
 		hexdump("UTF8->UTF16LE (old) input",
 			*inbuf, *inbytesleft);
 		errno = E2BIG;
 		return -1;
 	}
         CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
 	CFStringDelete(cfstring, range);
 	native_to_le(*outbuf, outsize*2);
 	/*
 	 * Add a converted null byte, if the CFString conversions
 	 * prevented that until now.
 	 */
 	if (0 == (*inbuf)[*inbytesleft-1] &&
 	    (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
 		if ((outsize*2+2) > *outbytesleft) {
 			debug_out("String conversion: "
 				  "Output buffer too small\n");
 			hexdump("UTF8->UTF16LE (old) input",
 				*inbuf, *inbytesleft);
 			errno = E2BIG;
 			return -1;
 		}
 		(*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
 		outsize += 2;
 	}
 	*inbuf += *inbytesleft;
 	*inbytesleft = 0;
 	*outbuf += outsize*2;
 	*outbytesleft -= outsize*2;
 	return 0;
 }
 static size_t macosxfs_encoding_push(
 	void *cd,				/* Encoder handle */
 	char **inbuf, size_t *inbytesleft,	/* UTF-16-LE string */
 	char **outbuf, size_t *outbytesleft)	/* Script string */
 {
 	static const int script_code = kCFStringEncodingUTF8;
 	static CFMutableStringRef cfstring = NULL;
 	static UniChar *buffer = NULL;
 	static size_t buflen = 0;
 	CFIndex outsize, cfsize, charsconverted;
 	(void) cd; /* UNUSED */
 	if (0 == *inbytesleft) {
 		return 0;
 	}
 	/*
 	 * We need a buffer that can hold 4 times the original data,
 	 * because that is the theoretical maximum that decomposition
 	 * can create currently (in Unicode 4.0).
 	 */
 	buffer = set_ucbuffer_with_le_copy(
 		buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
 	if (NULL == cfstring) {
 		cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
 			kCFAllocatorDefault,
 			buffer, *inbytesleft/2, buflen/2,
 			kCFAllocatorNull);
 	} else {
 		CFStringSetExternalCharactersNoCopy(
 			cfstring,
 			buffer, *inbytesleft/2, buflen/2);
 	}
 	/*
 	 * Decompose characters, using the non-canonical decomposition
 	 * form.
 	 *
 	 * NB: This isn't exactly what HFS+ wants (see note on
 	 * kCFStringEncodingUseHFSPlusCanonical in
 	 * CFStringEncodingConverter.h), but AFAIK it's the best that
 	 * the official API can do.
 	 */
 	CFStringNormalize(cfstring, kCFStringNormalizationFormD);
 	cfsize = CFStringGetLength(cfstring);
 	charsconverted = CFStringGetBytes(
 		cfstring, CFRangeMake(0,cfsize),
 		script_code, 0, False,
 		*outbuf, *outbytesleft, &outsize);
 	if (0 == charsconverted) {
 		debug_out("String conversion: "
 			  "Buffer too small or not convertable\n");
 		hexdump("UTF16LE->UTF8 (old) input",
 			*inbuf, *inbytesleft);
 		errno = EILSEQ; /* Probably more likely. */
 		return -1;
 	}
 	/*
 	 * Add a converted null byte, if the CFString conversions
 	 * prevented that until now.
 	 */
 	if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
 	    (0 != (*outbuf)[outsize-1])) {
 		if (((size_t)outsize+1) > *outbytesleft) {
 			debug_out("String conversion: "
 				  "Output buffer too small\n");
 			hexdump("UTF16LE->UTF8 (old) input",
 				*inbuf, *inbytesleft);
 			errno = E2BIG;
 			return -1;
 		}
 		(*outbuf)[outsize] = 0;
 		++outsize;
 	}
 	*inbuf += *inbytesleft;
 	*inbytesleft = 0;
 	*outbuf += outsize;
 	*outbytesleft -= outsize;
 	return 0;
 }
 #else /* USE_INTERNAL_API */
 /*
  * An implementation based on internal code as known from the
  * OpenDarwin CVS.
  *
  * This code doesn't need much memory management because it uses
  * functions that operate on the raw memory directly.
  *
  * The push routine here is faster and more compatible with HFS+ than
  * the other implementation above.  The pull routine is only faster
  * for some strings, slightly slower for others.  The pull routine
  * looses because it has to iterate over the data twice, once to
  * decode UTF-8 and than to do the character composition required by
  * Windows.
  */
 static size_t macosxfs_encoding_pull(
 	void *cd,				/* Encoder handle */
 	char **inbuf, size_t *inbytesleft,	/* Script string */
 	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
 {
 	static const int script_code = kCFStringEncodingUTF8;
 	UInt32 srcCharsUsed = 0;
 	UInt32 dstCharsUsed = 0;
 	UInt32 result;
 	uint32_t dstDecomposedUsed = 0;
 	uint32_t dstPrecomposedUsed = 0;
 	(void) cd; /* UNUSED */
 	if (0 == *inbytesleft) {
 		return 0;
 	}
         result = CFStringEncodingBytesToUnicode(
 		script_code, kCFStringEncodingComposeCombinings,
 		*inbuf, *inbytesleft, &srcCharsUsed,
 		(UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
 	switch(result) {
 	case kCFStringEncodingConversionSuccess:
 		if (*inbytesleft == srcCharsUsed)
 			break;
 		else
 			; /*fall through*/
 	case kCFStringEncodingInsufficientOutputBufferLength:
 		debug_out("String conversion: "
 			  "Output buffer too small\n");
 		hexdump("UTF8->UTF16LE (new) input",
 			*inbuf, *inbytesleft);
 		errno = E2BIG;
 		return -1;
 	case kCFStringEncodingInvalidInputStream:
 		/*
 		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
 		 * errors here.  That function will always pass 2
 		 * characters.  smbd/open.c:check_for_pipe() cuts a
 		 * patchname to 10 characters blindly.  Suppress the
 		 * debug output in those cases.
 		 */
 		if(2 != *inbytesleft && 10 != *inbytesleft) {
 			debug_out("String conversion: "
 				  "Invalid input sequence\n");
 			hexdump("UTF8->UTF16LE (new) input",
 				*inbuf, *inbytesleft);
 		}
 		errno = EILSEQ;
 		return -1;
 	case kCFStringEncodingConverterUnavailable:
 		debug_out("String conversion: "
 			  "Unknown encoding\n");
 		hexdump("UTF8->UTF16LE (new) input",
 			*inbuf, *inbytesleft);
 		errno = EINVAL;
 		return -1;
 	}
 	/*
 	 * It doesn't look like CFStringEncodingBytesToUnicode() can
 	 * produce precomposed characters (flags=ComposeCombinings
 	 * doesn't do it), so we need another pass over the data here.
 	 * We can do this in-place, as the string can only get
 	 * shorter.
 	 *
 	 * (Actually in theory there should be an internal
 	 * decomposition and reordering before the actual composition
 	 * step.  But we should be able to rely on that we always get
 	 * fully decomposed strings for input, so this can't create
 	 * problems in reality.)
 	 */
 	CFUniCharPrecompose(
 		(const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
 		(UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
 	native_to_le(*outbuf, dstPrecomposedUsed*2);
 	*inbuf += srcCharsUsed;
 	*inbytesleft -= srcCharsUsed;
 	*outbuf += dstPrecomposedUsed*2;
 	*outbytesleft -= dstPrecomposedUsed*2;
 	return 0;
 }
 static size_t macosxfs_encoding_push(
 	void *cd,				/* Encoder handle */
 	char **inbuf, size_t *inbytesleft,	/* UTF-16-LE string */
 	char **outbuf, size_t *outbytesleft)	/* Script string */
 {
 	static const int script_code = kCFStringEncodingUTF8;
 	static UniChar *buffer = NULL;
 	static size_t buflen = 0;
 	UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
 	(void) cd; /* UNUSED */
 	if (0 == *inbytesleft) {
 		return 0;
 	}
 	buffer = set_ucbuffer_with_le(
 		buffer, &buflen, *inbuf, *inbytesleft);
 	result = CFStringEncodingUnicodeToBytes(
 		script_code, kCFStringEncodingUseHFSPlusCanonical,
 		buffer, *inbytesleft/2, &srcCharsUsed,
 		*outbuf, *outbytesleft, &dstCharsUsed);
 	switch(result) {
 	case kCFStringEncodingConversionSuccess:
 		if (*inbytesleft/2 == srcCharsUsed)
 			break;
 		else
 			; /*fall through*/
 	case kCFStringEncodingInsufficientOutputBufferLength:
 		debug_out("String conversion: "
 			  "Output buffer too small\n");
 		hexdump("UTF16LE->UTF8 (new) input",
 			*inbuf, *inbytesleft);
 		errno = E2BIG;
 		return -1;
 	case kCFStringEncodingInvalidInputStream:
 		/*
 		 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
 		 * cuts a pathname to 10 characters blindly.  Suppress
 		 * the debug output in those cases.
 		 */
 		if(10 != *inbytesleft) {
 			debug_out("String conversion: "
 				  "Invalid input sequence\n");
 			hexdump("UTF16LE->UTF8 (new) input",
 				*inbuf, *inbytesleft);
 		}
 		errno = EILSEQ;
 		return -1;
 	case kCFStringEncodingConverterUnavailable:
 		debug_out("String conversion: "
 			  "Unknown encoding\n");
 		hexdump("UTF16LE->UTF8 (new) input",
 			*inbuf, *inbytesleft);
 		errno = EINVAL;
 		return -1;
 	}
 	*inbuf += srcCharsUsed*2;
 	*inbytesleft -= srcCharsUsed*2;
 	*outbuf += dstCharsUsed;
 	*outbytesleft -= dstCharsUsed;
 	return 0;
 }
 #endif /* USE_INTERNAL_API */
 /*
  * For initialization, actually install the encoding as "macosxfs".
  */
 static struct charset_functions macosxfs_encoding_functions = {
 	"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
 };
 NTSTATUS init_module(void)
 {
 	return smb_register_charset(&macosxfs_encoding_functions);
 }
 /* eof */

603 lines 16 KiB C Raw Blame History

603 lines

16 KiB

C

Raw Blame History