1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-10 01:18:15 +03:00
samba-mirror/source/include/charset.h
Andrew Tridgell 814881f0e5 r2857: this commit gets rid of smb_ucs2_t, wpstring and fpstring, plus lots of associated functions.
The motivation for this change was to avoid having to convert to/from
ucs2 strings for so many operations. Doing that was slow, used many
static buffers, and was also incorrect as it didn't cope properly with
unicode codepoints above 65536 (which could not be represented
correctly as smb_ucs2_t chars)

The two core functions that allowed this change are next_codepoint()
and push_codepoint(). These functions allow you to correctly walk a
arbitrary multi-byte string a character at a time without converting
the whole string to ucs2.

While doing this cleanup I also fixed several ucs2 string handling
bugs. See the commit for details.

The following code (which counts the number of occuraces of 'c' in a
string) shows how to use the new interface:

size_t count_chars(const char *s, char c)
{
	size_t count = 0;

	while (*s) {
		size_t size;
		codepoint_t c2 = next_codepoint(s, &size);
		if (c2 == c) count++;
		s += size;
	}

	return count;
}
2007-10-10 12:59:39 -05:00

45 lines
1.6 KiB
C

/*
Unix SMB/CIFS implementation.
charset defines
Copyright (C) Andrew Tridgell 2001
Copyright (C) Jelmer Vernooij 2002
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* this defines the charset types used in samba */
typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
#define NUM_CHARSETS 6
/*
* for each charset we have a function that pulls from that charset to
* a ucs2 buffer, and a function that pushes to a ucs2 buffer
* */
struct charset_functions {
const char *name;
size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft);
size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft);
struct charset_functions *prev, *next;
};
/* this type is used for manipulating unicode codepoints */
typedef uint32_t codepoint_t;
#define INVALID_CODEPOINT ((codepoint_t)-1)