1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-12 09:18:10 +03:00
samba-mirror/source4/lib/ms_fnmatch.c
Andrew Tridgell 7d32679e96 r2857: this commit gets rid of smb_ucs2_t, wpstring and fpstring, plus lots of associated functions.
The motivation for this change was to avoid having to convert to/from
ucs2 strings for so many operations. Doing that was slow, used many
static buffers, and was also incorrect as it didn't cope properly with
unicode codepoints above 65536 (which could not be represented
correctly as smb_ucs2_t chars)

The two core functions that allowed this change are next_codepoint()
and push_codepoint(). These functions allow you to correctly walk a
arbitrary multi-byte string a character at a time without converting
the whole string to ucs2.

While doing this cleanup I also fixed several ucs2 string handling
bugs. See the commit for details.

The following code (which counts the number of occuraces of 'c' in a
string) shows how to use the new interface:

size_t count_chars(const char *s, char c)
{
	size_t count = 0;

	while (*s) {
		size_t size;
		codepoint_t c2 = next_codepoint(s, &size);
		if (c2 == c) count++;
		s += size;
	}

	return count;
}
(This used to be commit 814881f0e5)
2007-10-10 12:59:39 -05:00

220 lines
5.1 KiB
C

/*
Unix SMB/CIFS implementation.
filename matching routine
Copyright (C) Andrew Tridgell 1992-2004
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
This module was originally based on fnmatch.c copyright by the Free
Software Foundation. It bears little (if any) resemblence to that
code now
*/
#include "includes.h"
static int null_match(const char *p)
{
for (;*p;p++) {
if (*p != '*' &&
*p != '<' &&
*p != '"' &&
*p != '>') return -1;
}
return 0;
}
/*
the max_n structure is purely for efficiency, it doesn't contribute
to the matching algorithm except by ensuring that the algorithm does
not grow exponentially
*/
struct max_n {
const char *predot;
const char *postdot;
};
/*
p and n are the pattern and string being matched. The max_n array is
an optimisation only. The ldot pointer is NULL if the string does
not contain a '.', otherwise it points at the last dot in 'n'.
*/
static int ms_fnmatch_core(const char *p, const char *n,
struct max_n *max_n, const char *ldot)
{
codepoint_t c, c2;
int i;
size_t size, size_n;
while ((c = next_codepoint(p, &size))) {
p += size;
switch (c) {
case '*':
/* a '*' matches zero or more characters of any type */
if (max_n->predot && max_n->predot <= n) {
return null_match(p);
}
for (i=0; n[i]; i += size_n) {
next_codepoint(n+i, &size_n);
if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) {
return 0;
}
}
if (!max_n->predot || max_n->predot > n) max_n->predot = n;
return null_match(p);
case '<':
/* a '<' matches zero or more characters of
any type, but stops matching at the last
'.' in the string. */
if (max_n->predot && max_n->predot <= n) {
return null_match(p);
}
if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
return -1;
}
for (i=0; n[i]; i += size_n) {
next_codepoint(n+i, &size_n);
if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) return 0;
if (n+i == ldot) {
if (ms_fnmatch_core(p, n+i+size_n, max_n+1, ldot) == 0) return 0;
if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
return -1;
}
}
if (!max_n->predot || max_n->predot > n) max_n->predot = n;
return null_match(p);
case '?':
/* a '?' matches any single character */
if (! *n) {
return -1;
}
next_codepoint(n, &size_n);
n += size_n;
break;
case '>':
/* a '?' matches any single character, but
treats '.' specially */
if (n[0] == '.') {
if (! n[1] && null_match(p) == 0) {
return 0;
}
break;
}
if (! *n) return null_match(p);
next_codepoint(n, &size_n);
n += size_n;
break;
case '"':
/* a bit like a soft '.' */
if (*n == 0 && null_match(p) == 0) {
return 0;
}
if (*n != '.') return -1;
next_codepoint(n, &size_n);
n += size_n;
break;
default:
c2 = next_codepoint(n, &size_n);
if (c != c2 && codepoint_cmpi(c, c2) != 0) {
return -1;
}
n += size_n;
break;
}
}
if (! *n) {
return 0;
}
return -1;
}
int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol)
{
int ret, count, i;
struct max_n *max_n = NULL;
if (strcmp(string, "..") == 0) {
string = ".";
}
if (strpbrk(pattern, "<>*?\"") == NULL) {
/* this is not just an optmisation - it is essential
for LANMAN1 correctness */
return StrCaseCmp(pattern, string);
}
if (protocol <= PROTOCOL_LANMAN2) {
char *p = talloc_strdup(NULL, pattern);
if (p == NULL) {
return -1;
}
/*
for older negotiated protocols it is possible to
translate the pattern to produce a "new style"
pattern that exactly matches w2k behaviour
*/
for (i=0;p[i];i++) {
if (p[i] == '?') {
p[i] = '>';
} else if (p[i] == '.' &&
(p[i+1] == '?' ||
p[i+1] == '*' ||
p[i+1] == 0)) {
p[i] = '"';
} else if (p[i] == '*' &&
p[i+1] == '.') {
p[i] = '<';
}
}
ret = ms_fnmatch(p, string, PROTOCOL_NT1);
talloc_free(p);
return ret;
}
for (count=i=0;pattern[i];i++) {
if (pattern[i] == '*' || pattern[i] == '<') count++;
}
max_n = talloc_array_p(NULL, struct max_n, count);
if (!max_n) {
return -1;
}
memset(max_n, 0, sizeof(struct max_n) * count);
ret = ms_fnmatch_core(pattern, string, max_n, strrchr(string, '.'));
talloc_free(max_n);
return ret;
}
/* a generic fnmatch function - uses for non-CIFS pattern matching */
int gen_fnmatch(const char *pattern, const char *string)
{
return ms_fnmatch(pattern, string, PROTOCOL_NT1);
}