mirror of
https://github.com/samba-team/samba.git
synced 2025-01-26 10:04:02 +03:00
7d32679e96
The motivation for this change was to avoid having to convert to/from ucs2 strings for so many operations. Doing that was slow, used many static buffers, and was also incorrect as it didn't cope properly with unicode codepoints above 65536 (which could not be represented correctly as smb_ucs2_t chars) The two core functions that allowed this change are next_codepoint() and push_codepoint(). These functions allow you to correctly walk a arbitrary multi-byte string a character at a time without converting the whole string to ucs2. While doing this cleanup I also fixed several ucs2 string handling bugs. See the commit for details. The following code (which counts the number of occuraces of 'c' in a string) shows how to use the new interface: size_t count_chars(const char *s, char c) { size_t count = 0; while (*s) { size_t size; codepoint_t c2 = next_codepoint(s, &size); if (c2 == c) count++; s += size; } return count; } (This used to be commit 814881f0e50019196b3aa9fbe4aeadbb98172040)
45 lines
1.6 KiB
C
45 lines
1.6 KiB
C
/*
|
|
Unix SMB/CIFS implementation.
|
|
charset defines
|
|
Copyright (C) Andrew Tridgell 2001
|
|
Copyright (C) Jelmer Vernooij 2002
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
/* this defines the charset types used in samba */
|
|
typedef enum {CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4, CH_UTF16BE=5} charset_t;
|
|
|
|
#define NUM_CHARSETS 6
|
|
|
|
/*
|
|
* for each charset we have a function that pulls from that charset to
|
|
* a ucs2 buffer, and a function that pushes to a ucs2 buffer
|
|
* */
|
|
|
|
struct charset_functions {
|
|
const char *name;
|
|
size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
|
|
char **outbuf, size_t *outbytesleft);
|
|
size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
|
|
char **outbuf, size_t *outbytesleft);
|
|
struct charset_functions *prev, *next;
|
|
};
|
|
|
|
/* this type is used for manipulating unicode codepoints */
|
|
typedef uint32_t codepoint_t;
|
|
|
|
#define INVALID_CODEPOINT ((codepoint_t)-1)
|