summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2012-01-28 01:46:33 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2012-01-28 01:46:33 +0100
commit3af224c5cd8a64a6af3f875549ff821e2b5cb211 (patch)
tree831870d336af5bdbe280348c934a56bf607713b6
parent000b601439d249a4afa2ceb6096850a702612d1e (diff)
downloadstrace-3af224c5cd8a64a6af3f875549ff821e2b5cb211.tar.gz
Use process_vm_readv instead of PTRACE_PEEKDATA to read data blocks
Currently, we use PTRACE_PEEKDATA to read things like filenames and data passed by I/O syscalls. PTRACE_PEEKDATA gets one word per syscall. This is VERY expensive. For example, in order to print fstat syscall, we need to perform more than twenty trips into kernel to fetch one struct stat! Kernel 3.2 got a new syscall, process_vm_readv(), which can be used to copy data blocks out of process' address space. This change uses it in umoven() and umovestr() functions if possible, with fallback to old method if process_vm_readv() fails. If it returns ENOSYS, we don't try to use it anymore, eliminating overhead of trying it on older kernels. Result of "time strace -oLOG ls -l /usr/lib >/dev/null": before patch: 0.372s After patch: 0.262s * util.c (process_vm_readv): Wrapper to call process_vm_readv syscall. (umoven): Use process_vm_readv for block reads of tracee memory. (umovestr): Likewise. * linux/syscall.h: Declare new function sys_process_vm_readv. * process.c (sys_process_vm_readv): Decoder for new syscall. * linux/i386/syscallent.h: Add process_vm_readv, process_vm_writev syscalls. * linux/x86_64/syscallent.h: Likewise. * linux/powerpc/syscallent.h: Likewise. Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--linux/i386/syscallent.h4
-rw-r--r--linux/powerpc/syscallent.h4
-rw-r--r--linux/syscall.h1
-rw-r--r--linux/x86_64/syscallent.h2
-rw-r--r--process.c26
-rw-r--r--util.c104
6 files changed, 137 insertions, 4 deletions
diff --git a/linux/i386/syscallent.h b/linux/i386/syscallent.h
index 43093028d..62597913a 100644
--- a/linux/i386/syscallent.h
+++ b/linux/i386/syscallent.h
@@ -377,8 +377,8 @@
{ 1, TD, sys_syncfs, "syncfs" }, /* 344 */
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 345 */
{ 2, TD, sys_setns, "setns" }, /* 346 */
- { 5, 0, printargs, "SYS_347" }, /* 347 */
- { 5, 0, printargs, "SYS_348" }, /* 348 */
+ { 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 347 */
+ { 6, 0, printargs, "process_vm_writev" }, /* 348 */
{ 5, 0, printargs, "SYS_349" }, /* 349 */
{ 5, 0, printargs, "SYS_350" }, /* 350 */
{ 5, 0, printargs, "SYS_351" }, /* 351 */
diff --git a/linux/powerpc/syscallent.h b/linux/powerpc/syscallent.h
index 6c569904b..697fce2eb 100644
--- a/linux/powerpc/syscallent.h
+++ b/linux/powerpc/syscallent.h
@@ -379,8 +379,8 @@
{ 1, TD, sys_syncfs, "syncfs" }, /* 348 */
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 349 */
{ 2, TD, sys_setns, "setns" }, /* 350 */
- { 5, 0, printargs, "SYS_351" }, /* 351 */
- { 5, 0, printargs, "SYS_352" }, /* 352 */
+ { 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 351 */
+ { 6, 0, printargs, "process_vm_writev" }, /* 352 */
{ 5, 0, printargs, "SYS_353" }, /* 353 */
{ 5, 0, printargs, "SYS_354" }, /* 354 */
{ 5, 0, printargs, "SYS_355" }, /* 355 */
diff --git a/linux/syscall.h b/linux/syscall.h
index 6087d42cc..fda85ed6c 100644
--- a/linux/syscall.h
+++ b/linux/syscall.h
@@ -178,6 +178,7 @@ int sys_pread64();
int sys_preadv();
int sys_pselect6();
int sys_ptrace();
+int sys_process_vm_readv();
int sys_putpmsg();
int sys_pwrite();
int sys_pwrite64();
diff --git a/linux/x86_64/syscallent.h b/linux/x86_64/syscallent.h
index 961ac0b5c..7b9397330 100644
--- a/linux/x86_64/syscallent.h
+++ b/linux/x86_64/syscallent.h
@@ -308,3 +308,5 @@
{ 4, TN, sys_sendmmsg, "sendmmsg" }, /* 307 */
{ 2, TD, sys_setns, "setns" }, /* 308 */
{ 3, 0, sys_getcpu, "getcpu" }, /* 309 */
+ { 6, 0, sys_process_vm_readv, "process_vm_readv" }, /* 310 */
+ { 6, 0, printargs, "process_vm_writev" }, /* 311 */
diff --git a/process.c b/process.c
index d93a19034..552e5a5bf 100644
--- a/process.c
+++ b/process.c
@@ -3456,4 +3456,30 @@ sys_getcpu(struct tcb *tcp)
return 0;
}
+int
+sys_process_vm_readv(struct tcb *tcp)
+{
+ if (entering(tcp)) {
+ /* arg 1: pid */
+ tprintf("%ld, ", tcp->u_arg[0]);
+ } else {
+ /* args 2,3: local iov,cnt */
+ if (syserror(tcp)) {
+ tprintf("%#lx, %lu",
+ tcp->u_arg[1], tcp->u_arg[2]);
+ } else {
+ tprint_iov(tcp, tcp->u_arg[2], tcp->u_arg[1], 1);
+ }
+ tprints(", ");
+ /* args 4,5: remote iov,cnt */
+ if (syserror(tcp)) {
+ tprintf("%#lx, %lu", tcp->u_arg[3], tcp->u_arg[4]);
+ } else {
+ tprint_iov(tcp, tcp->u_arg[4], tcp->u_arg[3], 0);
+ }
+ /* arg 6: flags */
+ tprintf(", %lu", tcp->u_arg[5]);
+ }
+ return 0;
+}
#endif /* LINUX */
diff --git a/util.c b/util.c
index d9ec1b3cf..85207cdf5 100644
--- a/util.c
+++ b/util.c
@@ -769,6 +769,39 @@ dumpstr(struct tcb *tcp, long addr, int len)
}
}
+
+/* Need to do this since process_vm_readv() is not yet available in libc.
+ * When libc is be updated, only "static bool process_vm_readv_not_supported"
+ * line should remain.
+ */
+#if !defined(__NR_process_vm_readv)
+# if defined(I386)
+# define __NR_process_vm_readv 347
+# elif defined(X86_64)
+# define __NR_process_vm_readv 310
+# elif defined(POWERPC)
+# define __NR_process_vm_readv 351
+# endif
+#endif
+
+#if defined(__NR_process_vm_readv)
+static bool process_vm_readv_not_supported = 0;
+static ssize_t process_vm_readv(pid_t pid,
+ const struct iovec *lvec,
+ unsigned long liovcnt,
+ const struct iovec *rvec,
+ unsigned long riovcnt,
+ unsigned long flags)
+{
+ return syscall(__NR_process_vm_readv, (long)pid, lvec, liovcnt, rvec, riovcnt, flags);
+}
+#else
+static bool process_vm_readv_not_supported = 1;
+# define process_vm_readv(...) (errno = ENOSYS, -1)
+#endif
+/* end of hack */
+
+
#define PAGMASK (~(PAGSIZ - 1))
/*
* move `len' bytes of data from process `pid'
@@ -786,6 +819,29 @@ umoven(struct tcb *tcp, long addr, int len, char *laddr)
char x[sizeof(long)];
} u;
+ if (!process_vm_readv_not_supported) {
+ struct iovec local[1], remote[1];
+ int r;
+
+ local[0].iov_base = laddr;
+ remote[0].iov_base = (void*)addr;
+ local[0].iov_len = remote[0].iov_len = len;
+ r = process_vm_readv(pid,
+ local, 1,
+ remote, 1,
+ /*flags:*/ 0
+ );
+ if (r < 0) {
+ if (errno == ENOSYS)
+ process_vm_readv_not_supported = 1;
+ else /* strange... */
+ perror("process_vm_readv");
+ goto vm_readv_didnt_work;
+ }
+ return r;
+ }
+ vm_readv_didnt_work:
+
#if SUPPORTED_PERSONALITIES > 1
if (personality_wordsize[current_personality] < sizeof(addr))
addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
@@ -925,6 +981,54 @@ umovestr(struct tcb *tcp, long addr, int len, char *laddr)
addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
#endif
+ if (!process_vm_readv_not_supported) {
+ struct iovec local[1], remote[1];
+
+ local[0].iov_base = laddr;
+ remote[0].iov_base = (void*)addr;
+
+ while (len > 0) {
+ int end_in_page;
+ int r;
+ int chunk_len;
+
+ /* Don't read kilobytes: most strings are short */
+ chunk_len = len;
+ if (chunk_len > 256)
+ chunk_len = 256;
+ /* Don't cross pages. I guess otherwise we can get EFAULT
+ * and fail to notice that terminating NUL lies
+ * in the existing (first) page.
+ * (I hope there aren't arches with pages < 4K)
+ */
+ end_in_page = ((addr + chunk_len) & 4095);
+ r = chunk_len - end_in_page;
+ if (r > 0) /* if chunk_len > end_in_page */
+ chunk_len = r; /* chunk_len -= end_in_page */
+
+ local[0].iov_len = remote[0].iov_len = chunk_len;
+ r = process_vm_readv(pid,
+ local, 1,
+ remote, 1,
+ /*flags:*/ 0
+ );
+ if (r < 0) {
+ if (errno == ENOSYS)
+ process_vm_readv_not_supported = 1;
+ else /* strange... */
+ perror("process_vm_readv");
+ goto vm_readv_didnt_work;
+ }
+ if (memchr(local[0].iov_base, '\0', r))
+ return 1;
+ local[0].iov_base += r;
+ remote[0].iov_base += r;
+ len -= r;
+ }
+ return 0;
+ }
+ vm_readv_didnt_work:
+
started = 0;
if (addr & (sizeof(long) - 1)) {
/* addr not a multiple of sizeof(long) */