Use process_vm_readv instead of PTRACE_PEEKDATA to read data blocks

Currently, we use PTRACE_PEEKDATA to read things like filenames and
data passed by I/O syscalls.
PTRACE_PEEKDATA gets one word per syscall. This is VERY expensive.
For example, in order to print fstat syscall, we need to perform
more than twenty trips into kernel to fetch one struct stat!

Kernel 3.2 got a new syscall, process_vm_readv(), which can be used to
copy data blocks out of process' address space.

This change uses it in umoven() and umovestr() functions if possible,
with fallback to old method if process_vm_readv() fails.
If it returns ENOSYS, we don't try to use it anymore, eliminating
overhead of trying it on older kernels.

Result of "time strace -oLOG ls -l /usr/lib >/dev/null":
before patch: 0.372s
After patch:  0.262s

* util.c (process_vm_readv): Wrapper to call process_vm_readv syscall.
(umoven): Use process_vm_readv for block reads of tracee memory.
(umovestr): Likewise.
* linux/syscall.h: Declare new function sys_process_vm_readv.
* process.c (sys_process_vm_readv): Decoder for new syscall.
* linux/i386/syscallent.h: Add process_vm_readv, process_vm_writev syscalls.
* linux/x86_64/syscallent.h: Likewise.
* linux/powerpc/syscallent.h: Likewise.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2012-01-28 01:46:33 +01:00
parent 000b601439
commit 3af224c5cd
6 changed files with 137 additions and 4 deletions

View File

@ -377,8 +377,8 @@
{ 1, TD, sys_syncfs, "syncfs" }, /* 344 */
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 345 */
{ 2, TD, sys_setns, "setns" }, /* 346 */
{ 5, 0, printargs, "SYS_347" }, /* 347 */
{ 5, 0, printargs, "SYS_348" }, /* 348 */
{ 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 347 */
{ 6, 0, printargs, "process_vm_writev" }, /* 348 */
{ 5, 0, printargs, "SYS_349" }, /* 349 */
{ 5, 0, printargs, "SYS_350" }, /* 350 */
{ 5, 0, printargs, "SYS_351" }, /* 351 */

View File

@ -379,8 +379,8 @@
{ 1, TD, sys_syncfs, "syncfs" }, /* 348 */
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 349 */
{ 2, TD, sys_setns, "setns" }, /* 350 */
{ 5, 0, printargs, "SYS_351" }, /* 351 */
{ 5, 0, printargs, "SYS_352" }, /* 352 */
{ 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 351 */
{ 6, 0, printargs, "process_vm_writev" }, /* 352 */
{ 5, 0, printargs, "SYS_353" }, /* 353 */
{ 5, 0, printargs, "SYS_354" }, /* 354 */
{ 5, 0, printargs, "SYS_355" }, /* 355 */

View File

@ -178,6 +178,7 @@ int sys_pread64();
int sys_preadv();
int sys_pselect6();
int sys_ptrace();
int sys_process_vm_readv();
int sys_putpmsg();
int sys_pwrite();
int sys_pwrite64();

View File

@ -308,3 +308,5 @@
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 307 */
{ 2, TD, sys_setns, "setns" }, /* 308 */
{ 3, 0, sys_getcpu, "getcpu" }, /* 309 */
{ 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 310 */
{ 6, 0, printargs, "process_vm_writev" }, /* 311 */

View File

@ -3456,4 +3456,30 @@ sys_getcpu(struct tcb *tcp)
return 0;
}
int
sys_process_vm_readv(struct tcb *tcp)
{
if (entering(tcp)) {
/* arg 1: pid */
tprintf("%ld, ", tcp->u_arg[0]);
} else {
/* args 2,3: local iov,cnt */
if (syserror(tcp)) {
tprintf("%#lx, %lu",
tcp->u_arg[1], tcp->u_arg[2]);
} else {
tprint_iov(tcp, tcp->u_arg[2], tcp->u_arg[1], 1);
}
tprints(", ");
/* args 4,5: remote iov,cnt */
if (syserror(tcp)) {
tprintf("%#lx, %lu", tcp->u_arg[3], tcp->u_arg[4]);
} else {
tprint_iov(tcp, tcp->u_arg[4], tcp->u_arg[3], 0);
}
/* arg 6: flags */
tprintf(", %lu", tcp->u_arg[5]);
}
return 0;
}
#endif /* LINUX */

104
util.c
View File

@ -769,6 +769,39 @@ dumpstr(struct tcb *tcp, long addr, int len)
}
}
/* Need to do this since process_vm_readv() is not yet available in libc.
* When libc is be updated, only "static bool process_vm_readv_not_supported"
* line should remain.
*/
#if !defined(__NR_process_vm_readv)
# if defined(I386)
# define __NR_process_vm_readv 347
# elif defined(X86_64)
# define __NR_process_vm_readv 310
# elif defined(POWERPC)
# define __NR_process_vm_readv 351
# endif
#endif
#if defined(__NR_process_vm_readv)
static bool process_vm_readv_not_supported = 0;
static ssize_t process_vm_readv(pid_t pid,
const struct iovec *lvec,
unsigned long liovcnt,
const struct iovec *rvec,
unsigned long riovcnt,
unsigned long flags)
{
return syscall(__NR_process_vm_readv, (long)pid, lvec, liovcnt, rvec, riovcnt, flags);
}
#else
static bool process_vm_readv_not_supported = 1;
# define process_vm_readv(...) (errno = ENOSYS, -1)
#endif
/* end of hack */
#define PAGMASK (~(PAGSIZ - 1))
/*
* move `len' bytes of data from process `pid'
@ -786,6 +819,29 @@ umoven(struct tcb *tcp, long addr, int len, char *laddr)
char x[sizeof(long)];
} u;
if (!process_vm_readv_not_supported) {
struct iovec local[1], remote[1];
int r;
local[0].iov_base = laddr;
remote[0].iov_base = (void*)addr;
local[0].iov_len = remote[0].iov_len = len;
r = process_vm_readv(pid,
local, 1,
remote, 1,
/*flags:*/ 0
);
if (r < 0) {
if (errno == ENOSYS)
process_vm_readv_not_supported = 1;
else /* strange... */
perror("process_vm_readv");
goto vm_readv_didnt_work;
}
return r;
}
vm_readv_didnt_work:
#if SUPPORTED_PERSONALITIES > 1
if (personality_wordsize[current_personality] < sizeof(addr))
addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
@ -925,6 +981,54 @@ umovestr(struct tcb *tcp, long addr, int len, char *laddr)
addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
#endif
if (!process_vm_readv_not_supported) {
struct iovec local[1], remote[1];
local[0].iov_base = laddr;
remote[0].iov_base = (void*)addr;
while (len > 0) {
int end_in_page;
int r;
int chunk_len;
/* Don't read kilobytes: most strings are short */
chunk_len = len;
if (chunk_len > 256)
chunk_len = 256;
/* Don't cross pages. I guess otherwise we can get EFAULT
* and fail to notice that terminating NUL lies
* in the existing (first) page.
* (I hope there aren't arches with pages < 4K)
*/
end_in_page = ((addr + chunk_len) & 4095);
r = chunk_len - end_in_page;
if (r > 0) /* if chunk_len > end_in_page */
chunk_len = r; /* chunk_len -= end_in_page */
local[0].iov_len = remote[0].iov_len = chunk_len;
r = process_vm_readv(pid,
local, 1,
remote, 1,
/*flags:*/ 0
);
if (r < 0) {
if (errno == ENOSYS)
process_vm_readv_not_supported = 1;
else /* strange... */
perror("process_vm_readv");
goto vm_readv_didnt_work;
}
if (memchr(local[0].iov_base, '\0', r))
return 1;
local[0].iov_base += r;
remote[0].iov_base += r;
len -= r;
}
return 0;
}
vm_readv_didnt_work:
started = 0;
if (addr & (sizeof(long) - 1)) {
/* addr not a multiple of sizeof(long) */