samba-mirror/ldb_utf8.c at master

mirror of https://github.com/samba-team/samba.git synced 2025-08-30 17:49:30 +03:00

Files

Douglas Bagnall b22e1d3207 ldb: don't cast to unsigned for ldb_ascii_toupper()

Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>

2024-05-22 23:12:32 +00:00

228 lines

5.5 KiB

C

Raw Permalink Blame History

 /*
    ldb database library
    Copyright (C) Andrew Tridgell  2004
      ** NOTE! The following LGPL license applies to the ldb
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 3 of the License, or (at your option) any later version.
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 /*
  *  Name: ldb
  *
  *  Component: ldb utf8 handling
  *
  *  Description: case folding and case comparison for UTF8 strings
  *
  *  Author: Andrew Tridgell
  */
 #include "ldb_private.h"
 #include "system/locale.h"
 /*
  * Set functions for comparing and case-folding case-insensitive ldb val
  * strings.
  */
 void ldb_set_utf8_functions(struct ldb_context *ldb,
 			    void *context,
 			    char *(*casefold)(void *, void *, const char *, size_t),
 			    int (*casecmp)(void *ctx,
 					   const struct ldb_val *v1,
 					   const struct ldb_val *v2))
 {
 	if (context) {
 		ldb->utf8_fns.context = context;
 	}
 	if (casefold) {
 		ldb->utf8_fns.casefold = casefold;
 	}
 	if (casecmp) {
 		ldb->utf8_fns.casecmp = casecmp;
 	}
 }
 /*
   this allow the user to pass in a caseless comparison
   function to handle utf8 caseless comparisons
  */
 void ldb_set_utf8_fns(struct ldb_context *ldb,
 		      void *context,
 		      char *(*casefold)(void *, void *, const char *, size_t))
 {
 	ldb_set_utf8_functions(ldb, context, casefold, NULL);
 }
 /*
   a simple case folding function
   NOTE: does not handle UTF8
 */
 char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
 {
 	size_t i;
 	char *ret = talloc_strndup(mem_ctx, s, n);
 	if (!s) {
 		errno = ENOMEM;
 		return NULL;
 	}
 	for (i=0;ret[i];i++) {
 		ret[i] = ldb_ascii_toupper(ret[i]);
 	}
 	return ret;
 }
 /*
  * The default comparison fold function only knows ASCII. Multiple
  * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
  * other bytes are compared without casefolding.
  *
  * Note that as well as not handling UTF-8, this function does not exactly
  * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
  */
 int ldb_comparison_fold_ascii(void *ignored,
 			      const struct ldb_val *v1,
 			      const struct ldb_val *v2)
 {
 	const uint8_t *s1 = v1->data;
 	const uint8_t *s2 = v2->data;
 	size_t n1 = v1->length, n2 = v2->length;
 	while (n1 && *s1 == ' ') { s1++; n1--; };
 	while (n2 && *s2 == ' ') { s2++; n2--; };
 	while (n1 && n2 && *s1 && *s2) {
 		if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
 			break;
 		}
 		if (*s1 == ' ') {
 			while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
 			while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
 		}
 		s1++; s2++;
 		n1--; n2--;
 	}
 	/* check for trailing spaces only if the other pointers has
 	 * reached the end of the strings otherwise we can
 	 * mistakenly match.  ex. "domain users" <->
 	 * "domainUpdates"
 	 */
 	if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
 		while (n1 && *s1 == ' ') { s1++; n1--; }
 	}
 	if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
 		while (n2 && *s2 == ' ') { s2++; n2--; }
 	}
 	if (n1 == 0 && n2 != 0) {
 		return *s2 ? -1 : 0;
 	}
 	if (n2 == 0 && n1 != 0) {
 		return *s1 ? 1 : 0;
 	}
 	if (n1 == 0 && n2 == 0) {
 		return 0;
 	}
 	return NUMERIC_CMP(*s1, *s2);
 }
 void ldb_set_utf8_default(struct ldb_context *ldb)
 {
 	ldb_set_utf8_functions(ldb, NULL,
 			  ldb_casefold_default,
 			  ldb_comparison_fold_ascii);
 }
 char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
 {
 	return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
 }
 /*
   check the attribute name is valid according to rfc2251
   returns 1 if the name is ok
  */
 int ldb_valid_attr_name(const char *s)
 {
 	size_t i;
 	if (!s || !s[0])
 		return 0;
 	/* handle special ldb_tdb wildcard */
 	if (strcmp(s, "*") == 0) return 1;
 	for (i = 0; s[i]; i++) {
 		if (! isascii(s[i])) {
 			return 0;
 		}
 		if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
 			if (! (isalpha(s[i]) || (s[i] == '@'))) {
 				return 0;
 			}
 		} else {
 			if (! (isalnum(s[i]) || (s[i] == '-'))) {
 				return 0;
 			}
 		}
 	}
 	return 1;
 }
 char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
 {
 	size_t i;
 	char *ret = talloc_strdup(mem_ctx, s);
 	if (!ret) {
 		errno = ENOMEM;
 		return NULL;
 	}
 	for (i = 0; ret[i]; i++) {
 		ret[i] = ldb_ascii_toupper(ret[i]);
 	}
 	return ret;
 }
 /*
   we accept either 'dn' or 'distinguishedName' for a distinguishedName
 */
 int ldb_attr_dn(const char *attr)
 {
 	if (ldb_attr_cmp(attr, "dn") == 0 ||
 	    ldb_attr_cmp(attr, "distinguishedName") == 0) {
 		return 0;
 	}
 	return -1;
 }
 _PRIVATE_ char ldb_ascii_toupper(char c) {
 	/*
 	 * We are aiming for a 1970s C-locale toupper(), when all letters
 	 * were 7-bit and behaved with true American spirit.
 	 *
 	 * For example, we don't want the "i" in "<guid=" to be upper-cased to
 	 * "İ" as would happen in some locales, or we won't be able to parse
 	 * that properly. This is unfortunate for cases where we are dealing
 	 * with real text; a search for the name "Ali" would need to be
 	 * written "Alİ" to match.
 	 */
 	return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
 }

228 lines 5.5 KiB C Raw Permalink Blame History

228 lines

5.5 KiB

C

Raw Permalink Blame History