1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-03 13:47:25 +03:00

r13347: - Now we compare values with an optimized utf8

safe function if the user provides an utf8
compliant casefold function to ldb.

- Fix toupper_m and tolower_m to not crash if
the case tables are not found

- Let load_case_table() search into the correct
directory in the search tree for the case
tables so that we can test utf8

Simo
This commit is contained in:
Simo Sorce 2006-02-04 16:44:27 +00:00 committed by Gerald (Jerry) Carter
parent 67837dbd2b
commit e12f070958
2 changed files with 50 additions and 9 deletions

View File

@ -145,15 +145,24 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx,
compare two case insensitive strings, ignoring multiple whitespaces
and leading and trailing whitespaces
see rfc2252 section 8.1
try to optimize for the ascii case,
but if we find out an utf8 codepoint revert to slower but correct function
*/
static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
const struct ldb_val *v1, const struct ldb_val *v2)
{
const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
char *b1, *b2, *u1, *u2;
int ret;
while (*s1 == ' ') s1++;
while (*s2 == ' ') s2++;
/* TODO: make utf8 safe, possibly with helper function from application */
while (*s1 && *s2) {
/* the first 127 (0x7F) chars are ascii and utf8 guarantes they
* never appear in multibyte sequences */
if (((unsigned char)s1[0]) & 0x80) goto utf8str;
if (((unsigned char)s2[0]) & 0x80) goto utf8str;
if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2))
break;
if (*s1 == ' ') {
@ -163,7 +172,7 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
s1++; s2++;
}
if (! (*s1 && *s2)) {
/* remove trailing spaces only if one of the pointers
/* check for trailing spaces only if one of the pointers
* has reached the end of the strings otherwise we
* can mistakenly match.
* ex. "domain users" <-> "domainUpdates"
@ -172,6 +181,30 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
while (*s2 == ' ') s2++;
}
return (int)(toupper(*s1)) - (int)(toupper(*s2));
utf8str:
/* non need to recheck from the start, just from the first utf8 char found */
b1 = u1 = ldb_casefold(ldb, mem_ctx, s1);
b2 = u2 = ldb_casefold(ldb, mem_ctx, s2);
while (*u1 & *u2) {
if (*u1 != *u2)
break;
if (*u1 == ' ') {
while (u1[0] == u1[1]) u1++;
while (u2[0] == u2[1]) u2++;
}
u1++; u2++;
}
if (! (*u1 && *u2)) {
while (*u1 == ' ') u1++;
while (*u2 == ' ') u2++;
}
ret = (int)(*u1 - *u2);
talloc_free(b1);
talloc_free(b2);
return ret;
}
/*

View File

@ -43,10 +43,18 @@ static void load_case_tables(void)
lowcase_table = map_file(lib_path(mem_ctx, "lowcase.dat"), 0x20000);
talloc_free(mem_ctx);
if (upcase_table == NULL) {
upcase_table = (void *)-1;
/* try also under codepages for testing purposes */
upcase_table = map_file("codepages/upcase.dat", 0x20000);
if (upcase_table == NULL) {
upcase_table = (void *)-1;
}
}
if (lowcase_table == NULL) {
lowcase_table = (void *)-1;
/* try also under codepages for testing purposes */
lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
if (lowcase_table == NULL) {
lowcase_table = (void *)-1;
}
}
}
@ -58,12 +66,12 @@ codepoint_t toupper_w(codepoint_t val)
if (val < 128) {
return toupper(val);
}
if (upcase_table == (void *)-1) {
return val;
}
if (upcase_table == NULL) {
load_case_tables();
}
if (upcase_table == (void *)-1) {
return val;
}
if (val & 0xFFFF0000) {
return val;
}
@ -78,12 +86,12 @@ codepoint_t tolower_w(codepoint_t val)
if (val < 128) {
return tolower(val);
}
if (lowcase_table == (void *)-1) {
return val;
}
if (lowcase_table == NULL) {
load_case_tables();
}
if (lowcase_table == (void *)-1) {
return val;
}
if (val & 0xFFFF0000) {
return val;
}