samba-mirror/ms_fnmatch.c at 72093ce62f1e09db86452720fe8280ad66824cde

mirror of https://github.com/samba-team/samba.git synced 2025-01-12 09:18:10 +03:00

Andrew Tridgell 7d32679e96 r2857: this commit gets rid of smb_ucs2_t, wpstring and fpstring, plus lots of associated functions.

The motivation for this change was to avoid having to convert to/from
ucs2 strings for so many operations. Doing that was slow, used many
static buffers, and was also incorrect as it didn't cope properly with
unicode codepoints above 65536 (which could not be represented
correctly as smb_ucs2_t chars)

The two core functions that allowed this change are next_codepoint()
and push_codepoint(). These functions allow you to correctly walk a
arbitrary multi-byte string a character at a time without converting
the whole string to ucs2.

While doing this cleanup I also fixed several ucs2 string handling
bugs. See the commit for details.

The following code (which counts the number of occuraces of 'c' in a
string) shows how to use the new interface:

size_t count_chars(const char *s, char c)
{
	size_t count = 0;

	while (*s) {
		size_t size;
		codepoint_t c2 = next_codepoint(s, &size);
		if (c2 == c) count++;
		s += size;
	}

	return count;
}
(This used to be commit 814881f0e5)

2007-10-10 12:59:39 -05:00

220 lines

5.1 KiB

C

Raw Blame History

 /*
    Unix SMB/CIFS implementation.
    filename matching routine
    Copyright (C) Andrew Tridgell 1992-2004
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 /*
    This module was originally based on fnmatch.c copyright by the Free
    Software Foundation. It bears little (if any) resemblence to that
    code now
 */
 #include "includes.h"
 static int null_match(const char *p)
 {
 	for (;*p;p++) {
 		if (*p != '*' &&
 		    *p != '<' &&
 		    *p != '"' &&
 		    *p != '>') return -1;
 	}
 	return 0;
 }
 /*
   the max_n structure is purely for efficiency, it doesn't contribute
   to the matching algorithm except by ensuring that the algorithm does
   not grow exponentially
 */
 struct max_n {
 	const char *predot;
 	const char *postdot;
 };
 /*
   p and n are the pattern and string being matched. The max_n array is
   an optimisation only. The ldot pointer is NULL if the string does
   not contain a '.', otherwise it points at the last dot in 'n'.
 */
 static int ms_fnmatch_core(const char *p, const char *n,
 			   struct max_n *max_n, const char *ldot)
 {
 	codepoint_t c, c2;
 	int i;
 	size_t size, size_n;
 	while ((c = next_codepoint(p, &size))) {
 		p += size;
 		switch (c) {
 		case '*':
 			/* a '*' matches zero or more characters of any type */
 			if (max_n->predot && max_n->predot <= n) {
 				return null_match(p);
 			}
 			for (i=0; n[i]; i += size_n) {
 				next_codepoint(n+i, &size_n);
 				if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) {
 					return 0;
 				}
 			}
 			if (!max_n->predot || max_n->predot > n) max_n->predot = n;
 			return null_match(p);
 		case '<':
 			/* a '<' matches zero or more characters of
 			   any type, but stops matching at the last
 			   '.' in the string. */
 			if (max_n->predot && max_n->predot <= n) {
 				return null_match(p);
 			}
 			if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
 				return -1;
 			}
 			for (i=0; n[i]; i += size_n) {
 				next_codepoint(n+i, &size_n);
 				if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) return 0;
 				if (n+i == ldot) {
 					if (ms_fnmatch_core(p, n+i+size_n, max_n+1, ldot) == 0) return 0;
 					if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
 					return -1;
 				}
 			}
 			if (!max_n->predot || max_n->predot > n) max_n->predot = n;
 			return null_match(p);
 		case '?':
 			/* a '?' matches any single character */
 			if (! *n) {
 				return -1;
 			}
 			next_codepoint(n, &size_n);
 			n += size_n;
 			break;
 		case '>':
 			/* a '?' matches any single character, but
 			   treats '.' specially */
 			if (n[0] == '.') {
 				if (! n[1] && null_match(p) == 0) {
 					return 0;
 				}
 				break;
 			}
 			if (! *n) return null_match(p);
 			next_codepoint(n, &size_n);
 			n += size_n;
 			break;
 		case '"':
 			/* a bit like a soft '.' */
 			if (*n == 0 && null_match(p) == 0) {
 				return 0;
 			}
 			if (*n != '.') return -1;
 			next_codepoint(n, &size_n);
 			n += size_n;
 			break;
 		default:
 			c2 = next_codepoint(n, &size_n);
 			if (c != c2 && codepoint_cmpi(c, c2) != 0) {
 				return -1;
 			}
 			n += size_n;
 			break;
 		}
 	}
 	if (! *n) {
 		return 0;
 	}
 	return -1;
 }
 int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol)
 {
 	int ret, count, i;
 	struct max_n *max_n = NULL;
 	if (strcmp(string, "..") == 0) {
 		string = ".";
 	}
 	if (strpbrk(pattern, "<>*?\"") == NULL) {
 		/* this is not just an optmisation - it is essential
 		   for LANMAN1 correctness */
 		return StrCaseCmp(pattern, string);
 	}
 	if (protocol <= PROTOCOL_LANMAN2) {
 		char *p = talloc_strdup(NULL, pattern);
 		if (p == NULL) {
 			return -1;
 		}
 		/*
 		  for older negotiated protocols it is possible to
 		  translate the pattern to produce a "new style"
 		  pattern that exactly matches w2k behaviour
 		*/
 		for (i=0;p[i];i++) {
 			if (p[i] == '?') {
 				p[i] = '>';
 			} else if (p[i] == '.' &&
 				   (p[i+1] == '?' ||
 				    p[i+1] == '*' ||
 				    p[i+1] == 0)) {
 				p[i] = '"';
 			} else if (p[i] == '*' &&
 				   p[i+1] == '.') {
 				p[i] = '<';
 			}
 		}
 		ret = ms_fnmatch(p, string, PROTOCOL_NT1);
 		talloc_free(p);
 		return ret;
 	}
 	for (count=i=0;pattern[i];i++) {
 		if (pattern[i] == '*' || pattern[i] == '<') count++;
 	}
 	max_n = talloc_array_p(NULL, struct max_n, count);
 	if (!max_n) {
 		return -1;
 	}
 	memset(max_n, 0, sizeof(struct max_n) * count);
 	ret = ms_fnmatch_core(pattern, string, max_n, strrchr(string, '.'));
 	talloc_free(max_n);
 	return ret;
 }
 /* a generic fnmatch function - uses for non-CIFS pattern matching */
 int gen_fnmatch(const char *pattern, const char *string)
 {
 	return ms_fnmatch(pattern, string, PROTOCOL_NT1);
 }

220 lines 5.1 KiB C Raw Blame History

220 lines

5.1 KiB

C

Raw Blame History