linux/lib/strncpy_from_user.c

#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/errno.h>

#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>

#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
#define IS_UNALIGNED(src, dst)	0
#else
#define IS_UNALIGNED(src, dst)	\
	(((long) dst | (long) src) & (sizeof(long) - 1))
#endif

/*
 * Do a strncpy, return length of string without final '\0'.
 * 'count' is the user-supplied count (return 'count' if we
 * hit it), 'max' is the address space maximum (and we return
 * -EFAULT if we hit it).
 */
static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
{
	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
	long res = 0;

	/*
	 * Truncate 'max' to the user-specified limit, so that
	 * we only have one limit we need to check in the loop
	 */
	if (max > count)
		max = count;

	if (IS_UNALIGNED(src, dst))
		goto byte_at_a_time;

	while (max >= sizeof(unsigned long)) {
		unsigned long c, data;

		/* Fall back to byte-at-a-time if we get a page fault */
		if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
			break;
		*(unsigned long *)(dst+res) = c;
		if (has_zero(c, &data, &constants)) {
			data = prep_zero_mask(c, data, &constants);
			data = create_zero_mask(data);
			return res + find_zero(data);
		}
		res += sizeof(unsigned long);
		max -= sizeof(unsigned long);
	}

byte_at_a_time:
	while (max) {
		char c;

		if (unlikely(unsafe_get_user(c,src+res)))
			return -EFAULT;
		dst[res] = c;
		if (!c)
			return res;
		res++;
		max--;
	}

	/*
	 * Uhhuh. We hit 'max'. But was that the user-specified maximum
	 * too? If so, that's ok - we got as much as the user asked for.
	 */
	if (res >= count)
		return res;

	/*
	 * Nope: we hit the address space limit, and we still had more
	 * characters the caller would have wanted. That's an EFAULT.
	 */
	return -EFAULT;
}

/**
 * strncpy_from_user: - Copy a NUL terminated string from userspace.
 * @dst:   Destination address, in kernel space.  This buffer must be at
 *         least @count bytes long.
 * @src:   Source address, in user space.
 * @count: Maximum number of bytes to copy, including the trailing NUL.
 *
 * Copies a NUL-terminated string from userspace to kernel space.
 *
 * On success, returns the length of the string (not including the trailing
 * NUL).
 *
 * If access to userspace fails, returns -EFAULT (some data may have been
 * copied).
 *
 * If @count is smaller than the length of the string, copies @count bytes
 * and returns @count.
 */
long strncpy_from_user(char *dst, const char __user *src, long count)
{
	unsigned long max_addr, src_addr;

	if (unlikely(count <= 0))
		return 0;

	max_addr = user_addr_max();
	src_addr = (unsigned long)src;
	if (likely(src_addr < max_addr)) {
		unsigned long max = max_addr - src_addr;
		long retval;

		user_access_begin();
		retval = do_strncpy_from_user(dst, src, count, max);
		user_access_end();
		return retval;
	}
	return -EFAULT;
}
EXPORT_SYMBOL(strncpy_from_user);
lib/strncpy_from_user.c: replace module.h include strncpy_from_user.c only needs EXPORT_SYMBOL, so just include compiler.h and export.h instead of the whole module.h machinery. Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-13 02:03:16 +03:00			`#include <linux/compiler.h>`
			`#include <linux/export.h>`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`#include <linux/uaccess.h>`
			`#include <linux/kernel.h>`
			`#include <linux/errno.h>`

			`#include <asm/byteorder.h>`
word-at-a-time: make the interfaces truly generic This changes the interfaces in <asm/word-at-a-time.h> to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized <asm/word-at-a-time.h> file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The <asm/word-at-a-time.h> functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an exact mask of which byte had the first zero. This is run directly outside the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly which byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-26 21:43:17 +04:00			`#include <asm/word-at-a-time.h>`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00
			`#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS`
			`#define IS_UNALIGNED(src, dst) 0`
			`#else`
			`#define IS_UNALIGNED(src, dst) \`
			`(((long) dst \| (long) src) & (sizeof(long) - 1))`
			`#endif`

			`/*`
			`* Do a strncpy, return length of string without final '\0'.`
			`* 'count' is the user-supplied count (return 'count' if we`
			`* hit it), 'max' is the address space maximum (and we return`
			`* -EFAULT if we hit it).`
			`*/`
			`static inline long do_strncpy_from_user(char dst, const char __user src, long count, unsigned long max)`
			`{`
word-at-a-time: make the interfaces truly generic This changes the interfaces in <asm/word-at-a-time.h> to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized <asm/word-at-a-time.h> file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The <asm/word-at-a-time.h> functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an exact mask of which byte had the first zero. This is run directly outside the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly which byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-26 21:43:17 +04:00			`const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`long res = 0;`

			`/*`
			`* Truncate 'max' to the user-specified limit, so that`
			`* we only have one limit we need to check in the loop`
			`*/`
			`if (max > count)`
			`max = count;`

			`if (IS_UNALIGNED(src, dst))`
			`goto byte_at_a_time;`

			`while (max >= sizeof(unsigned long)) {`
word-at-a-time: make the interfaces truly generic This changes the interfaces in <asm/word-at-a-time.h> to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized <asm/word-at-a-time.h> file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The <asm/word-at-a-time.h> functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an exact mask of which byte had the first zero. This is run directly outside the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly which byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-26 21:43:17 +04:00			`unsigned long c, data;`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00
			`/* Fall back to byte-at-a-time if we get a page fault */`
Use the new batched user accesses in generic user string handling This converts the generic user string functions to use the batched user access functions. It makes a big difference on Skylake, which is the first x86 microarchitecture to implement SMAP. The STAC/CLAC instructions are not very fast, and doing them for each access inside the loop that copies strings from user space (which is what the pathname handling does for every pathname the kernel uses, for example) is very inefficient. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2015-12-17 21:05:19 +03:00			`if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`break;`
			`(unsigned long )(dst+res) = c;`
word-at-a-time: make the interfaces truly generic This changes the interfaces in <asm/word-at-a-time.h> to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized <asm/word-at-a-time.h> file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The <asm/word-at-a-time.h> functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an exact mask of which byte had the first zero. This is run directly outside the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly which byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-26 21:43:17 +04:00			`if (has_zero(c, &data, &constants)) {`
			`data = prep_zero_mask(c, data, &constants);`
			`data = create_zero_mask(data);`
			`return res + find_zero(data);`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`}`
			`res += sizeof(unsigned long);`
			`max -= sizeof(unsigned long);`
			`}`

			`byte_at_a_time:`
			`while (max) {`
			`char c;`

Use the new batched user accesses in generic user string handling This converts the generic user string functions to use the batched user access functions. It makes a big difference on Skylake, which is the first x86 microarchitecture to implement SMAP. The STAC/CLAC instructions are not very fast, and doing them for each access inside the loop that copies strings from user space (which is what the pathname handling does for every pathname the kernel uses, for example) is very inefficient. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2015-12-17 21:05:19 +03:00			`if (unlikely(unsafe_get_user(c,src+res)))`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`return -EFAULT;`
			`dst[res] = c;`
			`if (!c)`
			`return res;`
			`res++;`
			`max--;`
			`}`

			`/*`
			`* Uhhuh. We hit 'max'. But was that the user-specified maximum`
			`* too? If so, that's ok - we got as much as the user asked for.`
			`*/`
			`if (res >= count)`
			`return res;`

			`/*`
			`* Nope: we hit the address space limit, and we still had more`
			`* characters the caller would have wanted. That's an EFAULT.`
			`*/`
			`return -EFAULT;`
			`}`

			`/**`
			`* strncpy_from_user: - Copy a NUL terminated string from userspace.`
			`* @dst: Destination address, in kernel space. This buffer must be at`
			`* least @count bytes long.`
			`* @src: Source address, in user space.`
			`* @count: Maximum number of bytes to copy, including the trailing NUL.`
			`*`
			`* Copies a NUL-terminated string from userspace to kernel space.`
			`*`
			`* On success, returns the length of the string (not including the trailing`
			`* NUL).`
			`*`
			`* If access to userspace fails, returns -EFAULT (some data may have been`
			`* copied).`
			`*`
			`* If @count is smaller than the length of the string, copies @count bytes`
			`* and returns @count.`
			`*/`
			`long strncpy_from_user(char dst, const char __user src, long count)`
			`{`
			`unsigned long max_addr, src_addr;`

			`if (unlikely(count <= 0))`
			`return 0;`

			`max_addr = user_addr_max();`
			`src_addr = (unsigned long)src;`
			`if (likely(src_addr < max_addr)) {`
			`unsigned long max = max_addr - src_addr;`
Use the new batched user accesses in generic user string handling This converts the generic user string functions to use the batched user access functions. It makes a big difference on Skylake, which is the first x86 microarchitecture to implement SMAP. The STAC/CLAC instructions are not very fast, and doing them for each access inside the loop that copies strings from user space (which is what the pathname handling does for every pathname the kernel uses, for example) is very inefficient. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2015-12-17 21:05:19 +03:00			`long retval;`

			`user_access_begin();`
			`retval = do_strncpy_from_user(dst, src, count, max);`
			`user_access_end();`
			`return retval;`
lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller <davem@davemloft.net> Acked-by: David Howells <dhowells@redhat.com> 2012-05-25 00:12:28 +04:00			`}`
			`return -EFAULT;`
			`}`
			`EXPORT_SYMBOL(strncpy_from_user);`