linux/fs/cifs/cifs_unicode.c

/*
 *   fs/cifs/cifs_unicode.c
 *
 *   Copyright (c) International Business Machines  Corp., 2000,2005
 *   Modified by Steve French (sfrench@us.ibm.com)
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
#include "cifs_unicode.h"
#include "cifs_uniupr.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"

/*
 * cifs_ucs2_bytes - how long will a string be after conversion?
 * @ucs - pointer to input string
 * @maxbytes - don't go past this many bytes of input string
 * @codepage - destination codepage
 *
 * Walk a ucs2le string and return the number of bytes that the string will
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 */
int
cifs_ucs2_bytes(const __le16 *from, int maxbytes,
		const struct nls_table *codepage)
{
	int i;
	int charlen, outlen = 0;
	int maxwords = maxbytes / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];

	for (i = 0; from[i] && i < maxwords; i++) {
		charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
					     NLS_MAX_CHARSET_SIZE);
		if (charlen > 0)
			outlen += charlen;
		else
			outlen++;
	}

	return outlen;
}

/*
 * cifs_mapchar - convert a little-endian char to proper char in codepage
 * @target - where converted character should be copied
 * @src_char - 2 byte little-endian source character
 * @cp - codepage to which character should be converted
 * @mapchar - should character be mapped according to mapchars mount option?
 *
 * This function handles the conversion of a single character. It is the
 * responsibility of the caller to ensure that the target buffer is large
 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 */
static int
cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
	     bool mapchar)
{
	int len = 1;

	if (!mapchar)
		goto cp_convert;

	/*
	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
	 *     build_path_from_dentry are modified, as they use slash as
	 *     separator.
	 */
	switch (le16_to_cpu(src_char)) {
	case UNI_COLON:
		*target = ':';
		break;
	case UNI_ASTERIK:
		*target = '*';
		break;
	case UNI_QUESTION:
		*target = '?';
		break;
	case UNI_PIPE:
		*target = '|';
		break;
	case UNI_GRTRTHAN:
		*target = '>';
		break;
	case UNI_LESSTHAN:
		*target = '<';
		break;
	default:
		goto cp_convert;
	}

out:
	return len;

cp_convert:
	len = cp->uni2char(le16_to_cpu(src_char), target,
			   NLS_MAX_CHARSET_SIZE);
	if (len <= 0) {
		*target = '?';
		len = 1;
	}
	goto out;
}

/*
 * cifs_from_ucs2 - convert utf16le string to local charset
 * @to - destination buffer
 * @from - source buffer
 * @tolen - destination buffer size (in bytes)
 * @fromlen - source buffer size (in bytes)
 * @codepage - codepage to which characters should be converted
 * @mapchar - should characters be remapped according to the mapchars option?
 *
 * Convert a little-endian ucs2le string (as sent by the server) to a string
 * in the provided codepage. The tolen and fromlen parameters are to ensure
 * that the code doesn't walk off of the end of the buffer (which is always
 * a danger if the alignment of the source buffer is off). The destination
 * string is always properly null terminated and fits in the destination
 * buffer. Returns the length of the destination string in bytes (including
 * null terminator).
 *
 * Note that some windows versions actually send multiword UTF-16 characters
 * instead of straight UCS-2. The linux nls routines however aren't able to
 * deal with those characters properly. In the event that we get some of
 * those characters, they won't be translated properly.
 */
int
cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
		 const struct nls_table *codepage, bool mapchar)
{
	int i, charlen, safelen;
	int outlen = 0;
	int nullsize = nls_nullsize(codepage);
	int fromwords = fromlen / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];

	/*
	 * because the chars can be of varying widths, we need to take care
	 * not to overflow the destination buffer when we get close to the
	 * end of it. Until we get to this offset, we don't need to check
	 * for overflow however.
	 */
	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);

	for (i = 0; i < fromwords && from[i]; i++) {
		/*
		 * check to see if converting this character might make the
		 * conversion bleed into the null terminator
		 */
		if (outlen >= safelen) {
			charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);
			if ((outlen + charlen) > (tolen - nullsize))
				break;
		}

		/* put converted char into 'to' buffer */
		charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);
		outlen += charlen;
	}

	/* properly null-terminate string */
	for (i = 0; i < nullsize; i++)
		to[outlen++] = 0;

	return outlen;
}

/*
 * NAME:	cifs_strfromUCS()
 *
 * FUNCTION:	Convert little-endian unicode string to character string
 *
 */
int
cifs_strfromUCS_le(char *to, const __le16 *from,
		   int len, const struct nls_table *codepage)
{
	int i;
	int outlen = 0;

	for (i = 0; (i < len) && from[i]; i++) {
		int charlen;
		/* 2.4.0 kernel or greater */
		charlen =
		    codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],
				       NLS_MAX_CHARSET_SIZE);
		if (charlen > 0) {
			outlen += charlen;
		} else {
			to[outlen++] = '?';
		}
	}
	to[outlen] = 0;
	return outlen;
}

/*
 * NAME:	cifs_strtoUCS()
 *
 * FUNCTION:	Convert character string to unicode string
 *
 */
int
cifs_strtoUCS(__le16 *to, const char *from, int len,
	      const struct nls_table *codepage)
{
	int charlen;
	int i;
	wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */

	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {

		/* works for 2.4.0 kernel or later */
		charlen = codepage->char2uni(from, len, &wchar_to[i]);
		if (charlen < 1) {
			cERROR(1,
			       ("strtoUCS: char2uni of %d returned %d",
				(int)*from, charlen));
			/* A question mark */
			to[i] = cpu_to_le16(0x003f);
			charlen = 1;
		} else
			to[i] = cpu_to_le16(wchar_to[i]);

	}

	to[i] = 0;
	return i;
}
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/*`
			`* fs/cifs/cifs_unicode.c`
			`*`
[CIFS] Cleanup sparse warnings for unicode little endian casts Following Shaggy's suggestion, do a better job on the unicode string handling routines in cifs in specifying that the wchar_t are really little endian widechars (__le16). Signed-off-by: Steve French <sfrench@us.ibm.com> 2005-11-12 02:18:19 +03:00			`* Copyright (c) International Business Machines Corp., 2000,2005`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`* Modified by Steve French (sfrench@us.ibm.com)`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
[CIFS] whitespace cleanup part 2 Various coding style problems found by running the new checkpatch.pl script against fs/cifs. 3 more files fixed up. Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-06-06 00:35:06 +04:00			`* the Free Software Foundation; either version 2 of the License, or`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`* (at your option) any later version.`
[CIFS] whitespace cleanup part 2 Various coding style problems found by running the new checkpatch.pl script against fs/cifs. 3 more files fixed up. Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-06-06 00:35:06 +04:00			`*`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See`
			`* the GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
[CIFS] whitespace cleanup part 2 Various coding style problems found by running the new checkpatch.pl script against fs/cifs. 3 more files fixed up. Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-06-06 00:35:06 +04:00			`* along with this program; if not, write to the Free Software`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
			`*/`
			`#include <linux/fs.h>`
			`#include "cifs_unicode.h"`
			`#include "cifs_uniupr.h"`
			`#include "cifspdu.h"`
[CIFS] Support for setting up SMB sessions to legacy lanman servers 2006-06-01 02:40:51 +04:00			`#include "cifsglob.h"`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`#include "cifs_debug.h"`

cifs: add new function to get unicode string length in bytes Working in units of words means we do a lot of unnecessary conversion back and forth. Standardize on bytes instead since that's more useful for allocating buffers and such. Also, remove hostlen_fromUCS since the new function has a similar purpose. Signed-off-by: Jeff Layton <jlayton@redhat.com> Acked-by: Suresh Jayaraman <sjayaraman@suse.de> Signed-off-by: Steve French <sfrench@us.ibm.com> 2009-04-30 14:46:32 +04:00			`/*`
			`* cifs_ucs2_bytes - how long will a string be after conversion?`
			`* @ucs - pointer to input string`
			`* @maxbytes - don't go past this many bytes of input string`
			`* @codepage - destination codepage`
			`*`
			`* Walk a ucs2le string and return the number of bytes that the string will`
			`* be after being converted to the given charset, not including any null`
			`* termination required. Don't walk past maxbytes in the source buffer.`
			`*/`
			`int`
			`cifs_ucs2_bytes(const __le16 *from, int maxbytes,`
			`const struct nls_table *codepage)`
			`{`
			`int i;`
			`int charlen, outlen = 0;`
			`int maxwords = maxbytes / 2;`
			`char tmp[NLS_MAX_CHARSET_SIZE];`

			`for (i = 0; from[i] && i < maxwords; i++) {`
			`charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,`
			`NLS_MAX_CHARSET_SIZE);`
			`if (charlen > 0)`
			`outlen += charlen;`
			`else`
			`outlen++;`
			`}`

			`return outlen;`
			`}`

cifs: add replacement for cifs_strtoUCS_le called cifs_from_ucs2 Add a replacement function for cifs_strtoUCS_le. cifs_from_ucs2 takes args for the source and destination length so that we can ensure that the function is confined within the intended buffers. Signed-off-by: Jeff Layton <jlayton@redhat.com> Acked-by: Suresh Jayaraman <sjayaraman@suse.de> Signed-off-by: Steve French <sfrench@us.ibm.com> 2009-04-30 14:46:15 +04:00			`/*`
			`* cifs_mapchar - convert a little-endian char to proper char in codepage`
			`* @target - where converted character should be copied`
			`* @src_char - 2 byte little-endian source character`
			`* @cp - codepage to which character should be converted`
			`* @mapchar - should character be mapped according to mapchars mount option?`
			`*`
			`* This function handles the conversion of a single character. It is the`
			`* responsibility of the caller to ensure that the target buffer is large`
			`* enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).`
			`*/`
			`static int`
			`cifs_mapchar(char target, const __le16 src_char, const struct nls_table cp,`
			`bool mapchar)`
			`{`
			`int len = 1;`

			`if (!mapchar)`
			`goto cp_convert;`

			`/*`
			`* BB: Cannot handle remapping UNI_SLASH until all the calls to`
			`* build_path_from_dentry are modified, as they use slash as`
			`* separator.`
			`*/`
			`switch (le16_to_cpu(src_char)) {`
			`case UNI_COLON:`
			`*target = ':';`
			`break;`
			`case UNI_ASTERIK:`
			`target = '';`
			`break;`
			`case UNI_QUESTION:`
			`*target = '?';`
			`break;`
			`case UNI_PIPE:`
			`*target = '\|';`
			`break;`
			`case UNI_GRTRTHAN:`
			`*target = '>';`
			`break;`
			`case UNI_LESSTHAN:`
			`*target = '<';`
			`break;`
			`default:`
			`goto cp_convert;`
			`}`

			`out:`
			`return len;`

			`cp_convert:`
			`len = cp->uni2char(le16_to_cpu(src_char), target,`
			`NLS_MAX_CHARSET_SIZE);`
			`if (len <= 0) {`
			`*target = '?';`
			`len = 1;`
			`}`
			`goto out;`
			`}`

			`/*`
			`* cifs_from_ucs2 - convert utf16le string to local charset`
			`* @to - destination buffer`
			`* @from - source buffer`
			`* @tolen - destination buffer size (in bytes)`
			`* @fromlen - source buffer size (in bytes)`
			`* @codepage - codepage to which characters should be converted`
			`* @mapchar - should characters be remapped according to the mapchars option?`
			`*`
			`* Convert a little-endian ucs2le string (as sent by the server) to a string`
			`* in the provided codepage. The tolen and fromlen parameters are to ensure`
			`* that the code doesn't walk off of the end of the buffer (which is always`
			`* a danger if the alignment of the source buffer is off). The destination`
			`* string is always properly null terminated and fits in the destination`
			`* buffer. Returns the length of the destination string in bytes (including`
			`* null terminator).`
			`*`
			`* Note that some windows versions actually send multiword UTF-16 characters`
			`* instead of straight UCS-2. The linux nls routines however aren't able to`
			`* deal with those characters properly. In the event that we get some of`
			`* those characters, they won't be translated properly.`
			`*/`
			`int`
			`cifs_from_ucs2(char to, const __le16 from, int tolen, int fromlen,`
			`const struct nls_table *codepage, bool mapchar)`
			`{`
			`int i, charlen, safelen;`
			`int outlen = 0;`
			`int nullsize = nls_nullsize(codepage);`
			`int fromwords = fromlen / 2;`
			`char tmp[NLS_MAX_CHARSET_SIZE];`

			`/*`
			`* because the chars can be of varying widths, we need to take care`
			`* not to overflow the destination buffer when we get close to the`
			`* end of it. Until we get to this offset, we don't need to check`
			`* for overflow however.`
			`*/`
			`safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);`

			`for (i = 0; i < fromwords && from[i]; i++) {`
			`/*`
			`* check to see if converting this character might make the`
			`* conversion bleed into the null terminator`
			`*/`
			`if (outlen >= safelen) {`
			`charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);`
			`if ((outlen + charlen) > (tolen - nullsize))`
			`break;`
			`}`

			`/* put converted char into 'to' buffer */`
			`charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);`
			`outlen += charlen;`
			`}`

			`/* properly null-terminate string */`
			`for (i = 0; i < nullsize; i++)`
			`to[outlen++] = 0;`

			`return outlen;`
			`}`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/*`
			`* NAME: cifs_strfromUCS()`
			`*`
			`* FUNCTION: Convert little-endian unicode string to character string`
			`*`
			`*/`
			`int`
[CIFS] reduce checkpatch warnings Signed-off-by: Steve French <sfrench@us.ibm.com> 2008-02-08 02:25:02 +03:00			`cifs_strfromUCS_le(char to, const __le16 from,`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`int len, const struct nls_table *codepage)`
			`{`
			`int i;`
			`int outlen = 0;`

			`for (i = 0; (i < len) && from[i]; i++) {`
			`int charlen;`
			`/* 2.4.0 kernel or greater */`
			`charlen =`
			`codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],`
			`NLS_MAX_CHARSET_SIZE);`
			`if (charlen > 0) {`
			`outlen += charlen;`
			`} else {`
			`to[outlen++] = '?';`
			`}`
			`}`
			`to[outlen] = 0;`
			`return outlen;`
			`}`

			`/*`
			`* NAME: cifs_strtoUCS()`
			`*`
			`* FUNCTION: Convert character string to unicode string`
			`*`
			`*/`
			`int`
[CIFS] reduce checkpatch warnings Signed-off-by: Steve French <sfrench@us.ibm.com> 2008-02-08 02:25:02 +03:00			`cifs_strtoUCS(__le16 to, const char from, int len,`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`const struct nls_table *codepage)`
			`{`
			`int charlen;`
			`int i;`
[CIFS] whitespace/formatting fixes This should be the last big batch of whitespace/formatting fixes. checkpatch warnings for the cifs directory are down about 90% and many of the remaining ones are harder to remove or make the code harder to read. Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-07-13 04:33:32 +04:00			`wchar_t wchar_to = (wchar_t )to; /* needed to quiet sparse */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
			`for (i = 0; len && *from; i++, from += charlen, len -= charlen) {`

			`/* works for 2.4.0 kernel or later */`
[CIFS] Cleanup sparse warnings for unicode little endian casts Following Shaggy's suggestion, do a better job on the unicode string handling routines in cifs in specifying that the wchar_t are really little endian widechars (__le16). Signed-off-by: Steve French <sfrench@us.ibm.com> 2005-11-12 02:18:19 +03:00			`charlen = codepage->char2uni(from, len, &wchar_to[i]);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`if (charlen < 1) {`
			`cERROR(1,`
[CIFS] Remove unnecessary parm to cifs_reopen_file Also expand debug entry to show which character on a failed Unicode mapping. Acked-by: Shaggy <shaggy@us.ibm.com> Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-04-04 21:10:24 +04:00			`("strtoUCS: char2uni of %d returned %d",`
			`(int)*from, charlen));`
[CIFS] Reduce sparse endian warnings Signed-off-by: Steve French <sfrench@us.ibm.com> 2005-11-11 06:28:44 +03:00			`/* A question mark */`
[CIFS] Cleanup sparse warnings for unicode little endian casts Following Shaggy's suggestion, do a better job on the unicode string handling routines in cifs in specifying that the wchar_t are really little endian widechars (__le16). Signed-off-by: Steve French <sfrench@us.ibm.com> 2005-11-12 02:18:19 +03:00			`to[i] = cpu_to_le16(0x003f);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`charlen = 1;`
[CIFS] whitespace cleanup part 2 Various coding style problems found by running the new checkpatch.pl script against fs/cifs. 3 more files fixed up. Signed-off-by: Steve French <sfrench@us.ibm.com> 2007-06-06 00:35:06 +04:00			`} else`
[CIFS] Cleanup sparse warnings for unicode little endian casts Following Shaggy's suggestion, do a better job on the unicode string handling routines in cifs in specifying that the wchar_t are really little endian widechars (__le16). Signed-off-by: Steve French <sfrench@us.ibm.com> 2005-11-12 02:18:19 +03:00			`to[i] = cpu_to_le16(wchar_to[i]);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
			`}`

			`to[i] = 0;`
			`return i;`
			`}`