samba-mirror/util_unistr.c at 2bf0cdd01cf399bf28125f9e2a0d419f4e94996c

mirror of https://github.com/samba-team/samba.git synced 2024-12-28 07:21:54 +03:00

Jelmer Vernooij d891c0c74a r26429: Avoid use of global_smb_iconv_convenience.

(This used to be commit d37136b7ab)

2007-12-21 05:49:56 +01:00

651 lines

13 KiB

C

Raw Blame History

 /*
    Unix SMB/CIFS implementation.
    Samba utility functions
    Copyright (C) Andrew Tridgell 1992-2001
    Copyright (C) Simo Sorce 2001
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
 #include "system/locale.h"
 #include "dynconfig.h"
 #include "param/param.h"
 /**
  * @file
  * @brief Unicode string manipulation
  */
 /* these 2 tables define the unicode case handling.  They are loaded
    at startup either via mmap() or read() from the lib directory */
 static void *upcase_table;
 static void *lowcase_table;
 /*******************************************************************
 load the case handling tables
 ********************************************************************/
 static void load_case_tables(void)
 {
 	TALLOC_CTX *mem_ctx;
 	mem_ctx = talloc_init("load_case_tables");
 	if (!mem_ctx) {
 		smb_panic("No memory for case_tables");
 	}
 	upcase_table = map_file(talloc_asprintf(mem_ctx, "%s/upcase.dat", dyn_DATADIR), 0x20000);
 	lowcase_table = map_file(talloc_asprintf(mem_ctx, "%s/lowcase.dat", dyn_DATADIR), 0x20000);
 	talloc_free(mem_ctx);
 	if (upcase_table == NULL) {
 		/* try also under codepages for testing purposes */
 		upcase_table = map_file("codepages/upcase.dat", 0x20000);
 		if (upcase_table == NULL) {
 			upcase_table = (void *)-1;
 		}
 	}
 	if (lowcase_table == NULL) {
 		/* try also under codepages for testing purposes */
 		lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
 		if (lowcase_table == NULL) {
 			lowcase_table = (void *)-1;
 		}
 	}
 }
 /**
  Convert a codepoint_t to upper case.
 **/
 codepoint_t toupper_w(codepoint_t val)
 {
 	if (val < 128) {
 		return toupper(val);
 	}
 	if (upcase_table == NULL) {
 		load_case_tables();
 	}
 	if (upcase_table == (void *)-1) {
 		return val;
 	}
 	if (val & 0xFFFF0000) {
 		return val;
 	}
 	return SVAL(upcase_table, val*2);
 }
 /**
  Convert a codepoint_t to lower case.
 **/
 codepoint_t tolower_w(codepoint_t val)
 {
 	if (val < 128) {
 		return tolower(val);
 	}
 	if (lowcase_table == NULL) {
 		load_case_tables();
 	}
 	if (lowcase_table == (void *)-1) {
 		return val;
 	}
 	if (val & 0xFFFF0000) {
 		return val;
 	}
 	return SVAL(lowcase_table, val*2);
 }
 /**
   compare two codepoints case insensitively
 */
 int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
 {
 	if (c1 == c2 ||
 	    toupper_w(c1) == toupper_w(c2)) {
 		return 0;
 	}
 	return c1 - c2;
 }
 /**
  Case insensitive string compararison
 **/
 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
 {
 	codepoint_t c1=0, c2=0;
 	size_t size1, size2;
 	/* handle null ptr comparisons to simplify the use in qsort */
 	if (s1 == s2) return 0;
 	if (s1 == NULL) return -1;
 	if (s2 == NULL) return 1;
 	while (*s1 && *s2) {
 		c1 = next_codepoint(lp_iconv_convenience(global_loadparm), s1, &size1);
 		c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s2, &size2);
 		s1 += size1;
 		s2 += size2;
 		if (c1 == c2) {
 			continue;
 		}
 		if (c1 == INVALID_CODEPOINT ||
 		    c2 == INVALID_CODEPOINT) {
 			/* what else can we do?? */
 			return strcasecmp(s1, s2);
 		}
 		if (toupper_w(c1) != toupper_w(c2)) {
 			return c1 - c2;
 		}
 	}
 	return *s1 - *s2;
 }
 /**
  * Get the next token from a string, return False if none found.
  * Handles double-quotes.
  *
  * Based on a routine by GJC@VILLAGE.COM.
  * Extensively modified by Andrew.Tridgell@anu.edu.au
  **/
 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
 {
 	const char *s;
 	bool quoted;
 	size_t len=1;
 	if (!ptr)
 		return false;
 	s = *ptr;
 	/* default to simple separators */
 	if (!sep)
 		sep = " \t\n\r";
 	/* find the first non sep char */
 	while (*s && strchr_m(sep,*s))
 		s++;
 	/* nothing left? */
 	if (!*s)
 		return false;
 	/* copy over the token */
 	for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
 		if (*s == '\"') {
 			quoted = !quoted;
 		} else {
 			len++;
 			*buff++ = *s;
 		}
 	}
 	*ptr = (*s) ? s+1 : s;
 	*buff = 0;
 	return true;
 }
 /**
  Case insensitive string compararison, length limited
 **/
 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
 {
 	codepoint_t c1=0, c2=0;
 	size_t size1, size2;
 	/* handle null ptr comparisons to simplify the use in qsort */
 	if (s1 == s2) return 0;
 	if (s1 == NULL) return -1;
 	if (s2 == NULL) return 1;
 	while (*s1 && *s2 && n) {
 		n--;
 		c1 = next_codepoint(lp_iconv_convenience(global_loadparm), s1, &size1);
 		c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s2, &size2);
 		s1 += size1;
 		s2 += size2;
 		if (c1 == c2) {
 			continue;
 		}
 		if (c1 == INVALID_CODEPOINT ||
 		    c2 == INVALID_CODEPOINT) {
 			/* what else can we do?? */
 			return strcasecmp(s1, s2);
 		}
 		if (toupper_w(c1) != toupper_w(c2)) {
 			return c1 - c2;
 		}
 	}
 	if (n == 0) {
 		return 0;
 	}
 	return *s1 - *s2;
 }
 /**
  * Compare 2 strings.
  *
  * @note The comparison is case-insensitive.
  **/
 _PUBLIC_ bool strequal_w(const char *s1, const char *s2)
 {
 	return strcasecmp_m(s1,s2) == 0;
 }
 /**
  Compare 2 strings (case sensitive).
 **/
 _PUBLIC_ bool strcsequal_w(const char *s1,const char *s2)
 {
 	if (s1 == s2)
 		return true;
 	if (!s1 || !s2)
 		return false;
 	return strcmp(s1,s2) == 0;
 }
 /**
  String replace.
  NOTE: oldc and newc must be 7 bit characters
 **/
 _PUBLIC_ void string_replace_w(char *s, char oldc, char newc)
 {
 	while (s && *s) {
 		size_t size;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
 		if (c == oldc) {
 			*s = newc;
 		}
 		s += size;
 	}
 }
 /**
  Paranoid strcpy into a buffer of given length (includes terminating
  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
  and replaces with '_'. Deliberately does *NOT* check for multibyte
  characters. Don't change it !
 **/
 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 {
 	size_t len, i;
 	if (maxlength == 0) {
 		/* can't fit any bytes at all! */
 		return NULL;
 	}
 	if (!dest) {
 		DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 		return NULL;
 	}
 	if (!src) {
 		*dest = 0;
 		return dest;
 	}
 	len = strlen(src);
 	if (len >= maxlength)
 		len = maxlength - 1;
 	if (!other_safe_chars)
 		other_safe_chars = "";
 	for(i = 0; i < len; i++) {
 		int val = (src[i] & 0xff);
 		if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 			dest[i] = src[i];
 		else
 			dest[i] = '_';
 	}
 	dest[i] = '\0';
 	return dest;
 }
 /**
  Count the number of UCS2 characters in a string. Normally this will
  be the same as the number of bytes in a string for single byte strings,
  but will be different for multibyte.
 **/
 _PUBLIC_ size_t strlen_m(const char *s)
 {
 	size_t count = 0;
 	if (!s) {
 		return 0;
 	}
 	while (*s && !(((uint8_t)*s) & 0x80)) {
 		s++;
 		count++;
 	}
 	if (!*s) {
 		return count;
 	}
 	while (*s) {
 		size_t c_size;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &c_size);
 		if (c < 0x10000) {
 			count += 1;
 		} else {
 			count += 2;
 		}
 		s += c_size;
 	}
 	return count;
 }
 /**
    Work out the number of multibyte chars in a string, including the NULL
    terminator.
 **/
 _PUBLIC_ size_t strlen_m_term(const char *s)
 {
 	if (!s) {
 		return 0;
 	}
 	return strlen_m(s) + 1;
 }
 /**
  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 **/
 _PUBLIC_ char *strchr_m(const char *s, char c)
 {
 	/* characters below 0x3F are guaranteed to not appear in
 	   non-initial position in multi-byte charsets */
 	if ((c & 0xC0) == 0) {
 		return strchr(s, c);
 	}
 	while (*s) {
 		size_t size;
 		codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
 		if (c2 == c) {
 			return discard_const_p(char, s);
 		}
 		s += size;
 	}
 	return NULL;
 }
 /**
  * Multibyte-character version of strrchr
  */
 _PUBLIC_ char *strrchr_m(const char *s, char c)
 {
 	char *ret = NULL;
 	/* characters below 0x3F are guaranteed to not appear in
 	   non-initial position in multi-byte charsets */
 	if ((c & 0xC0) == 0) {
 		return strrchr(s, c);
 	}
 	while (*s) {
 		size_t size;
 		codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
 		if (c2 == c) {
 			ret = discard_const_p(char, s);
 		}
 		s += size;
 	}
 	return ret;
 }
 /**
   return True if any (multi-byte) character is lower case
 */
 _PUBLIC_ bool strhaslower(const char *string)
 {
 	while (*string) {
 		size_t c_size;
 		codepoint_t s;
 		codepoint_t t;
 		s = next_codepoint(lp_iconv_convenience(global_loadparm), string, &c_size);
 		string += c_size;
 		t = toupper_w(s);
 		if (s != t) {
 			return true; /* that means it has lower case chars */
 		}
 	}
 	return false;
 }
 /**
   return True if any (multi-byte) character is upper case
 */
 _PUBLIC_ bool strhasupper(const char *string)
 {
 	while (*string) {
 		size_t c_size;
 		codepoint_t s;
 		codepoint_t t;
 		s = next_codepoint(lp_iconv_convenience(global_loadparm), string, &c_size);
 		string += c_size;
 		t = tolower_w(s);
 		if (s != t) {
 			return true; /* that means it has upper case chars */
 		}
 	}
 	return false;
 }
 /**
  Convert a string to lower case, allocated with talloc
 **/
 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 {
 	size_t size=0;
 	char *dest;
 	/* this takes advantage of the fact that upper/lower can't
 	   change the length of a character by more than 1 byte */
 	dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 	if (dest == NULL) {
 		return NULL;
 	}
 	while (*src) {
 		size_t c_size;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), src, &c_size);
 		src += c_size;
 		c = tolower_w(c);
 		c_size = push_codepoint(lp_iconv_convenience(global_loadparm), dest+size, c);
 		if (c_size == -1) {
 			talloc_free(dest);
 			return NULL;
 		}
 		size += c_size;
 	}
 	dest[size] = 0;
 	/* trim it so talloc_append_string() works */
 	dest = talloc_realloc(ctx, dest, char, size+1);
 	talloc_set_name_const(dest, dest);
 	return dest;
 }
 /**
  Convert a string to UPPER case, allocated with talloc
 **/
 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 {
 	size_t size=0;
 	char *dest;
 	if (!src) {
 		return NULL;
 	}
 	/* this takes advantage of the fact that upper/lower can't
 	   change the length of a character by more than 1 byte */
 	dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 	if (dest == NULL) {
 		return NULL;
 	}
 	while (*src) {
 		size_t c_size;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), src, &c_size);
 		src += c_size;
 		c = toupper_w(c);
 		c_size = push_codepoint(lp_iconv_convenience(global_loadparm), dest+size, c);
 		if (c_size == -1) {
 			talloc_free(dest);
 			return NULL;
 		}
 		size += c_size;
 	}
 	dest[size] = 0;
 	/* trim it so talloc_append_string() works */
 	dest = talloc_realloc(ctx, dest, char, size+1);
 	talloc_set_name_const(dest, dest);
 	return dest;
 }
 /**
  Convert a string to lower case.
 **/
 _PUBLIC_ void strlower_m(char *s)
 {
 	char *d;
 	/* this is quite a common operation, so we want it to be
 	   fast. We optimise for the ascii case, knowing that all our
 	   supported multi-byte character sets are ascii-compatible
 	   (ie. they match for the first 128 chars) */
 	while (*s && !(((uint8_t)*s) & 0x80)) {
 		*s = tolower((uint8_t)*s);
 		s++;
 	}
 	if (!*s)
 		return;
 	d = s;
 	while (*s) {
 		size_t c_size, c_size2;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &c_size);
 		c_size2 = push_codepoint(lp_iconv_convenience(global_loadparm), d, tolower_w(c));
 		if (c_size2 > c_size) {
 			DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 				 c, tolower_w(c), (int)c_size, (int)c_size2));
 			smb_panic("codepoint expansion in strlower_m\n");
 		}
 		s += c_size;
 		d += c_size2;
 	}
 	*d = 0;
 }
 /**
  Convert a string to UPPER case.
 **/
 _PUBLIC_ void strupper_m(char *s)
 {
 	char *d;
 	/* this is quite a common operation, so we want it to be
 	   fast. We optimise for the ascii case, knowing that all our
 	   supported multi-byte character sets are ascii-compatible
 	   (ie. they match for the first 128 chars) */
 	while (*s && !(((uint8_t)*s) & 0x80)) {
 		*s = toupper((uint8_t)*s);
 		s++;
 	}
 	if (!*s)
 		return;
 	d = s;
 	while (*s) {
 		size_t c_size, c_size2;
 		codepoint_t c = next_codepoint(lp_iconv_convenience(global_loadparm), s, &c_size);
 		c_size2 = push_codepoint(lp_iconv_convenience(global_loadparm), d, toupper_w(c));
 		if (c_size2 > c_size) {
 			DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 				 c, toupper_w(c), (int)c_size, (int)c_size2));
 			smb_panic("codepoint expansion in strupper_m\n");
 		}
 		s += c_size;
 		d += c_size2;
 	}
 	*d = 0;
 }
 /**
  Find the number of 'c' chars in a string
 **/
 _PUBLIC_ size_t count_chars_w(const char *s, char c)
 {
 	size_t count = 0;
 	while (*s) {
 		size_t size;
 		codepoint_t c2 = next_codepoint(lp_iconv_convenience(global_loadparm), s, &size);
 		if (c2 == c) count++;
 		s += size;
 	}
 	return count;
 }

651 lines 13 KiB C Raw Blame History

651 lines

13 KiB

C

Raw Blame History