Philippe Waroquiers
2018-11-03 20:19:29 UTC
Running the gdb testsuite under Valgrind started to fail after 100+ tests,
due to out of memory caused by lingering processes.
The lingering processes are caused by the combination
of a limitation in Valgrind signal handling when using PTRACE_TRACEME
and a (minor) bug in GDB.
The Valgrind limitation is : when a process is ptraced and raises
a signal, Valgrind will replace the raised signal by SIGSTOP as other
signals are masked by Valgrind when executing a system call.
Removing this limitation seems far to be trivial, valgrind signal
handling is very complex.
Due to this valgrind limitation, GDB linux_ptrace_test_ret_to_nx gets
a SIGSTOP signal instead of the expected SIGTRAP or SIGSEGV.
In such a case, linux_ptrace_test_ret_to_nx does an early return, but
does not kill the child (running under valgrind), child stays in a STOP-ped
state.
These lingering processes then eat the available system memory,
till launching a new process starts to fail.
This patch fixes the GDB minor bug by killing the child in case
linux_ptrace_test_ret_to_nx does an early return.
nat/linux-ptrace.c has 3 different logics to kill a child process.
So, this patch factorizes killing a child in the function kill_child.
The 3 different logics are:
* linux_ptrace_test_ret_to_nx is calling both kill (child, SIGKILL)
and ptrace (PTRACE_KILL, child, ...), and then is calling once
waitpid.
* linux_check_ptrace_features is calling ptrace (PTRACE_KILL, child, ...)
+ my_waitpid in a loop, as long as the waitpid status was WIFSTOPPED.
* linux_test_for_tracefork is calling once ptrace (PTRACE_KILL, child, ...)
+ my_waitpid.
The linux ptrace documentation indicates that PTRACE_KILL is deprecated,
and tells to not use it, as it might return success but not kill the tracee.
The documentation indicates to send SIGKILL directly.
I suspect that linux_ptrace_test_ret_to_nx calls both kill and ptrace just
to be sure ...
I suspect that linux_check_ptrace_features calls ptrace in a loop
to bypass the PTRACE_KILL limitation.
And it looks like linux_test_for_tracefork does not handle the PTRACE_KILL
limitation.
Also, 2 of the 3 logics are calling my_waitpid, which seems better,
as this is protecting the waitpid syscall against EINTR.
So, the logic in kill_child is just using kill (child, SIGKILL)
+ my_waitpid, and then does a few verifications to see everything worked
accordingly to the plan.
Tested on Debian/x86_64.
2018-11-03 Philippe Waroquiers <***@skynet.be>
* nat/linux-ptrace.c (kill_child): New function.
(linux_ptrace_test_ret_to_nx): Use kill_child instead of local code.
Add a call to kill_child in case of early return after fork.
(linux_check_ptrace_features): Use kill_child instead of local code.
(linux_test_for_tracefork): Likewise.
---
gdb/nat/linux-ptrace.c | 77 ++++++++++++++++++++++--------------------
1 file changed, 40 insertions(+), 37 deletions(-)
diff --git a/gdb/nat/linux-ptrace.c b/gdb/nat/linux-ptrace.c
index 1f21ef03a3..49a1d011bd 100644
--- a/gdb/nat/linux-ptrace.c
+++ b/gdb/nat/linux-ptrace.c
@@ -79,6 +79,39 @@ EXTERN_C void linux_ptrace_test_ret_to_nx_instr (void);
#endif /* defined __i386__ || defined __x86_64__ */
+/* Kill CHILD. WHO is used to report warnings. */
+
+static void
+kill_child (pid_t child, const char *who)
+{
+ pid_t got_pid;
+ int kill_status;
+
+ if (kill (child, SIGKILL) != 0)
+ {
+ warning (_("%s: failed to kill child pid %ld %s\n"),
+ who, (long) child, safe_strerror (errno));
+ return;
+ }
+
+ errno = 0;
+ got_pid = my_waitpid (child, &kill_status, 0);
+ if (got_pid != child)
+ {
+ warning (_("%s: "
+ "kill waitpid returned %ld: %s"),
+ who, (long) got_pid, safe_strerror (errno));
+ return;
+ }
+ if (!WIFSIGNALED (kill_status))
+ {
+ warning (_("%s: "
+ "kill status %d is not WIFSIGNALED!"),
+ who, kill_status);
+ return;
+ }
+}
+
/* Test broken off-trunk Linux kernel patchset for NX support on i386. It was
removed in Fedora kernel 88fa1f0332d188795ed73d7ac2b1564e11a0b4cd.
@@ -91,7 +124,7 @@ linux_ptrace_test_ret_to_nx (void)
pid_t child, got_pid;
gdb_byte *return_address, *pc;
long l;
- int status, kill_status;
+ int status;
elf_gregset_t regs;
return_address
@@ -169,6 +202,7 @@ linux_ptrace_test_ret_to_nx (void)
{
warning (_("linux_ptrace_test_ret_to_nx: status %d is not WIFSTOPPED!"),
status);
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
return;
}
@@ -178,6 +212,7 @@ linux_ptrace_test_ret_to_nx (void)
warning (_("linux_ptrace_test_ret_to_nx: "
"WSTOPSIG %d is neither SIGTRAP nor SIGSEGV!"),
(int) WSTOPSIG (status));
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
return;
}
@@ -195,26 +230,7 @@ linux_ptrace_test_ret_to_nx (void)
# error "!__i386__ && !__x86_64__"
#endif
- kill (child, SIGKILL);
- ptrace (PTRACE_KILL, child, (PTRACE_TYPE_ARG3) NULL,
- (PTRACE_TYPE_ARG4) NULL);
-
- errno = 0;
- got_pid = waitpid (child, &kill_status, 0);
- if (got_pid != child)
- {
- warning (_("linux_ptrace_test_ret_to_nx: "
- "PTRACE_KILL waitpid returned %ld: %s"),
- (long) got_pid, safe_strerror (errno));
- return;
- }
- if (!WIFSIGNALED (kill_status))
- {
- warning (_("linux_ptrace_test_ret_to_nx: "
- "PTRACE_KILL status %d is not WIFSIGNALED!"),
- status);
- return;
- }
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
/* + 1 is there as x86* stops after the 'int3' instruction. */
if (WSTOPSIG (status) == SIGTRAP && pc == return_address + 1)
@@ -352,16 +368,8 @@ linux_check_ptrace_features (void)
linux_test_for_exitkill (child_pid);
- /* Clean things up and kill any pending children. */
- do
- {
- ret = ptrace (PTRACE_KILL, child_pid, (PTRACE_TYPE_ARG3) 0,
- (PTRACE_TYPE_ARG4) 0);
- if (ret != 0)
- warning (_("linux_check_ptrace_features: failed to kill child"));
- my_waitpid (child_pid, &status, 0);
- }
- while (WIFSTOPPED (status));
+ /* Kill child_pid. */
+ kill_child (child_pid, "linux_check_ptrace_features");
}
/* Determine if PTRACE_O_TRACESYSGOOD can be used to catch
@@ -444,12 +452,7 @@ linux_test_for_tracefork (int child_pid)
/* Do some cleanup and kill the grandchild. */
my_waitpid (second_pid, &second_status, 0);
- ret = ptrace (PTRACE_KILL, second_pid, (PTRACE_TYPE_ARG3) 0,
- (PTRACE_TYPE_ARG4) 0);
- if (ret != 0)
- warning (_("linux_test_for_tracefork: "
- "failed to kill second child"));
- my_waitpid (second_pid, &status, 0);
+ kill_child (second_pid, "linux_test_for_tracefork");
}
}
else
due to out of memory caused by lingering processes.
The lingering processes are caused by the combination
of a limitation in Valgrind signal handling when using PTRACE_TRACEME
and a (minor) bug in GDB.
The Valgrind limitation is : when a process is ptraced and raises
a signal, Valgrind will replace the raised signal by SIGSTOP as other
signals are masked by Valgrind when executing a system call.
Removing this limitation seems far to be trivial, valgrind signal
handling is very complex.
Due to this valgrind limitation, GDB linux_ptrace_test_ret_to_nx gets
a SIGSTOP signal instead of the expected SIGTRAP or SIGSEGV.
In such a case, linux_ptrace_test_ret_to_nx does an early return, but
does not kill the child (running under valgrind), child stays in a STOP-ped
state.
These lingering processes then eat the available system memory,
till launching a new process starts to fail.
This patch fixes the GDB minor bug by killing the child in case
linux_ptrace_test_ret_to_nx does an early return.
nat/linux-ptrace.c has 3 different logics to kill a child process.
So, this patch factorizes killing a child in the function kill_child.
The 3 different logics are:
* linux_ptrace_test_ret_to_nx is calling both kill (child, SIGKILL)
and ptrace (PTRACE_KILL, child, ...), and then is calling once
waitpid.
* linux_check_ptrace_features is calling ptrace (PTRACE_KILL, child, ...)
+ my_waitpid in a loop, as long as the waitpid status was WIFSTOPPED.
* linux_test_for_tracefork is calling once ptrace (PTRACE_KILL, child, ...)
+ my_waitpid.
The linux ptrace documentation indicates that PTRACE_KILL is deprecated,
and tells to not use it, as it might return success but not kill the tracee.
The documentation indicates to send SIGKILL directly.
I suspect that linux_ptrace_test_ret_to_nx calls both kill and ptrace just
to be sure ...
I suspect that linux_check_ptrace_features calls ptrace in a loop
to bypass the PTRACE_KILL limitation.
And it looks like linux_test_for_tracefork does not handle the PTRACE_KILL
limitation.
Also, 2 of the 3 logics are calling my_waitpid, which seems better,
as this is protecting the waitpid syscall against EINTR.
So, the logic in kill_child is just using kill (child, SIGKILL)
+ my_waitpid, and then does a few verifications to see everything worked
accordingly to the plan.
Tested on Debian/x86_64.
2018-11-03 Philippe Waroquiers <***@skynet.be>
* nat/linux-ptrace.c (kill_child): New function.
(linux_ptrace_test_ret_to_nx): Use kill_child instead of local code.
Add a call to kill_child in case of early return after fork.
(linux_check_ptrace_features): Use kill_child instead of local code.
(linux_test_for_tracefork): Likewise.
---
gdb/nat/linux-ptrace.c | 77 ++++++++++++++++++++++--------------------
1 file changed, 40 insertions(+), 37 deletions(-)
diff --git a/gdb/nat/linux-ptrace.c b/gdb/nat/linux-ptrace.c
index 1f21ef03a3..49a1d011bd 100644
--- a/gdb/nat/linux-ptrace.c
+++ b/gdb/nat/linux-ptrace.c
@@ -79,6 +79,39 @@ EXTERN_C void linux_ptrace_test_ret_to_nx_instr (void);
#endif /* defined __i386__ || defined __x86_64__ */
+/* Kill CHILD. WHO is used to report warnings. */
+
+static void
+kill_child (pid_t child, const char *who)
+{
+ pid_t got_pid;
+ int kill_status;
+
+ if (kill (child, SIGKILL) != 0)
+ {
+ warning (_("%s: failed to kill child pid %ld %s\n"),
+ who, (long) child, safe_strerror (errno));
+ return;
+ }
+
+ errno = 0;
+ got_pid = my_waitpid (child, &kill_status, 0);
+ if (got_pid != child)
+ {
+ warning (_("%s: "
+ "kill waitpid returned %ld: %s"),
+ who, (long) got_pid, safe_strerror (errno));
+ return;
+ }
+ if (!WIFSIGNALED (kill_status))
+ {
+ warning (_("%s: "
+ "kill status %d is not WIFSIGNALED!"),
+ who, kill_status);
+ return;
+ }
+}
+
/* Test broken off-trunk Linux kernel patchset for NX support on i386. It was
removed in Fedora kernel 88fa1f0332d188795ed73d7ac2b1564e11a0b4cd.
@@ -91,7 +124,7 @@ linux_ptrace_test_ret_to_nx (void)
pid_t child, got_pid;
gdb_byte *return_address, *pc;
long l;
- int status, kill_status;
+ int status;
elf_gregset_t regs;
return_address
@@ -169,6 +202,7 @@ linux_ptrace_test_ret_to_nx (void)
{
warning (_("linux_ptrace_test_ret_to_nx: status %d is not WIFSTOPPED!"),
status);
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
return;
}
@@ -178,6 +212,7 @@ linux_ptrace_test_ret_to_nx (void)
warning (_("linux_ptrace_test_ret_to_nx: "
"WSTOPSIG %d is neither SIGTRAP nor SIGSEGV!"),
(int) WSTOPSIG (status));
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
return;
}
@@ -195,26 +230,7 @@ linux_ptrace_test_ret_to_nx (void)
# error "!__i386__ && !__x86_64__"
#endif
- kill (child, SIGKILL);
- ptrace (PTRACE_KILL, child, (PTRACE_TYPE_ARG3) NULL,
- (PTRACE_TYPE_ARG4) NULL);
-
- errno = 0;
- got_pid = waitpid (child, &kill_status, 0);
- if (got_pid != child)
- {
- warning (_("linux_ptrace_test_ret_to_nx: "
- "PTRACE_KILL waitpid returned %ld: %s"),
- (long) got_pid, safe_strerror (errno));
- return;
- }
- if (!WIFSIGNALED (kill_status))
- {
- warning (_("linux_ptrace_test_ret_to_nx: "
- "PTRACE_KILL status %d is not WIFSIGNALED!"),
- status);
- return;
- }
+ kill_child (child, "linux_ptrace_test_ret_to_nx");
/* + 1 is there as x86* stops after the 'int3' instruction. */
if (WSTOPSIG (status) == SIGTRAP && pc == return_address + 1)
@@ -352,16 +368,8 @@ linux_check_ptrace_features (void)
linux_test_for_exitkill (child_pid);
- /* Clean things up and kill any pending children. */
- do
- {
- ret = ptrace (PTRACE_KILL, child_pid, (PTRACE_TYPE_ARG3) 0,
- (PTRACE_TYPE_ARG4) 0);
- if (ret != 0)
- warning (_("linux_check_ptrace_features: failed to kill child"));
- my_waitpid (child_pid, &status, 0);
- }
- while (WIFSTOPPED (status));
+ /* Kill child_pid. */
+ kill_child (child_pid, "linux_check_ptrace_features");
}
/* Determine if PTRACE_O_TRACESYSGOOD can be used to catch
@@ -444,12 +452,7 @@ linux_test_for_tracefork (int child_pid)
/* Do some cleanup and kill the grandchild. */
my_waitpid (second_pid, &second_status, 0);
- ret = ptrace (PTRACE_KILL, second_pid, (PTRACE_TYPE_ARG3) 0,
- (PTRACE_TYPE_ARG4) 0);
- if (ret != 0)
- warning (_("linux_test_for_tracefork: "
- "failed to kill second child"));
- my_waitpid (second_pid, &status, 0);
+ kill_child (second_pid, "linux_test_for_tracefork");
}
}
else
--
2.19.1
2.19.1