Alan Hayward
2018-11-20 11:56:12 UTC
[Note: I hope to eventually isolate the exact kernel/hardware issue and
raise a bug elsewhere against that.]
On some heavily loaded AArch64 boxes, GDB will sometimes hang forever when
the inferior creates a thread. This hang happens inside the kernel during
the ptrace call to set hardware watchpoints or hardware breakpoints.
Currently, GDB will always set hw wp/bp at the start of each thread even if
there are none set in the process.
This patch works around the issue by avoiding setting hw wp/bp if there
are none set for the process.
On an effected machine, this fix drastically reduces the racy nature of the
gdb.threads test set. I ran the entire gdb test suite across all processors
for 100 iterations, then ran the results through the racy tests script.
Without the patch, 58 .exp files in gdb.threads were marked as racy. After
the patch this reduced to the same ~14 tests as the non effected boxes.
Clearly GDB will still be subject to hangs on an effect box if hw wp/bp's are
used prior to creating inferior threads on a heavily loaded system.
To enable this in gdbserver, the sequence in gdbserver add_lwp() is switched
to the same as gdb order as gdb, to ensure the thread is registered before
calling new_thread(). This allows aarch64_linux_new_thread() to read the
ptid.
gdb/ChangeLog:
2018-11-20 Alan Hayward <***@arm.com>
* nat/aarch64-linux-hw-point.c
(aarch64_linux_any_set_debug_regs_state): New function.
* nat/aarch64-linux-hw-point.h
(aarch64_linux_any_set_debug_regs_state): New declaration.
* nat/aarch64-linux.c (aarch64_linux_new_thread): Check if any
BPs or WPs are set.
gdb/gdbserver/ChangeLog:
2018-11-20 Alan Hayward <***@arm.com>
* linux-low.c (add_lwp): Switch ordering.
---
gdb/gdbserver/linux-low.c | 4 ++--
gdb/nat/aarch64-linux-hw-point.c | 23 +++++++++++++++++++++++
gdb/nat/aarch64-linux-hw-point.h | 6 ++++++
gdb/nat/aarch64-linux.c | 15 ++++++++++-----
4 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 701f3e863c..1c336efade 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -951,11 +951,11 @@ add_lwp (ptid_t ptid)
lwp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+ lwp->thread = add_thread (ptid, lwp);
+
if (the_low_target.new_thread != NULL)
the_low_target.new_thread (lwp);
- lwp->thread = add_thread (ptid, lwp);
-
return lwp;
}
diff --git a/gdb/nat/aarch64-linux-hw-point.c b/gdb/nat/aarch64-linux-hw-point.c
index 18b5af2d49..7f96fa2b7a 100644
--- a/gdb/nat/aarch64-linux-hw-point.c
+++ b/gdb/nat/aarch64-linux-hw-point.c
@@ -717,6 +717,29 @@ aarch64_linux_set_debug_regs (struct aarch64_debug_reg_state *state,
}
}
+/* See nat/aarch64-linux-hw-point.h. */
+
+bool
+aarch64_linux_any_set_debug_regs_state (struct aarch64_debug_reg_state *state,
+ bool watchpoint)
+{
+ int i, count;
+ const CORE_ADDR *addr;
+ const unsigned int *ctrl;
+
+ count = watchpoint ? aarch64_num_wp_regs : aarch64_num_bp_regs;
+ addr = watchpoint ? state->dr_addr_wp : state->dr_addr_bp;
+ ctrl = watchpoint ? state->dr_ctrl_wp : state->dr_ctrl_bp;
+ if (count == 0)
+ return false;
+
+ for (i = 0; i < count; i++)
+ if (addr[i] != 0 || ctrl[i] != 0)
+ return true;
+
+ return false;
+}
+
/* Print the values of the cached breakpoint/watchpoint registers. */
void
diff --git a/gdb/nat/aarch64-linux-hw-point.h b/gdb/nat/aarch64-linux-hw-point.h
index 1940b06a89..37107a888c 100644
--- a/gdb/nat/aarch64-linux-hw-point.h
+++ b/gdb/nat/aarch64-linux-hw-point.h
@@ -182,6 +182,12 @@ int aarch64_handle_watchpoint (enum target_hw_bp_type type, CORE_ADDR addr,
void aarch64_linux_set_debug_regs (struct aarch64_debug_reg_state *state,
int tid, int watchpoint);
+/* Return TRUE if there are any hardware breakpoints. If WATCHPOINT is TRUE,
+ check hardware watchpoints instead. */
+bool
+aarch64_linux_any_set_debug_regs_state (struct aarch64_debug_reg_state *state,
+ bool watchpoint);
+
void aarch64_show_debug_reg_state (struct aarch64_debug_reg_state *state,
const char *func, CORE_ADDR addr,
int len, enum target_hw_bp_type type);
diff --git a/gdb/nat/aarch64-linux.c b/gdb/nat/aarch64-linux.c
index 0f2c8011ea..48c59f6288 100644
--- a/gdb/nat/aarch64-linux.c
+++ b/gdb/nat/aarch64-linux.c
@@ -73,13 +73,18 @@ aarch64_linux_prepare_to_resume (struct lwp_info *lwp)
void
aarch64_linux_new_thread (struct lwp_info *lwp)
{
+ ptid_t ptid = ptid_of_lwp (lwp);
+ struct aarch64_debug_reg_state *state
+ = aarch64_get_debug_reg_state (ptid.pid ());
struct arch_lwp_info *info = XNEW (struct arch_lwp_info);
- /* Mark that all the hardware breakpoint/watchpoint register pairs
- for this thread need to be initialized (with data from
- aarch_process_info.debug_reg_state). */
- DR_MARK_ALL_CHANGED (info->dr_changed_bp, aarch64_num_bp_regs);
- DR_MARK_ALL_CHANGED (info->dr_changed_wp, aarch64_num_wp_regs);
+ /* If there are hardware breakpoints/watchpoints in the process then mark that
+ all the hardware breakpoint/watchpoint register pairs for this thread need
+ to be initialized (with data from aarch_process_info.debug_reg_state). */
+ if (aarch64_linux_any_set_debug_regs_state (state, false))
+ DR_MARK_ALL_CHANGED (info->dr_changed_bp, aarch64_num_bp_regs);
+ if (aarch64_linux_any_set_debug_regs_state (state, true))
+ DR_MARK_ALL_CHANGED (info->dr_changed_wp, aarch64_num_wp_regs);
lwp_set_arch_private_info (lwp, info);
}
raise a bug elsewhere against that.]
On some heavily loaded AArch64 boxes, GDB will sometimes hang forever when
the inferior creates a thread. This hang happens inside the kernel during
the ptrace call to set hardware watchpoints or hardware breakpoints.
Currently, GDB will always set hw wp/bp at the start of each thread even if
there are none set in the process.
This patch works around the issue by avoiding setting hw wp/bp if there
are none set for the process.
On an effected machine, this fix drastically reduces the racy nature of the
gdb.threads test set. I ran the entire gdb test suite across all processors
for 100 iterations, then ran the results through the racy tests script.
Without the patch, 58 .exp files in gdb.threads were marked as racy. After
the patch this reduced to the same ~14 tests as the non effected boxes.
Clearly GDB will still be subject to hangs on an effect box if hw wp/bp's are
used prior to creating inferior threads on a heavily loaded system.
To enable this in gdbserver, the sequence in gdbserver add_lwp() is switched
to the same as gdb order as gdb, to ensure the thread is registered before
calling new_thread(). This allows aarch64_linux_new_thread() to read the
ptid.
gdb/ChangeLog:
2018-11-20 Alan Hayward <***@arm.com>
* nat/aarch64-linux-hw-point.c
(aarch64_linux_any_set_debug_regs_state): New function.
* nat/aarch64-linux-hw-point.h
(aarch64_linux_any_set_debug_regs_state): New declaration.
* nat/aarch64-linux.c (aarch64_linux_new_thread): Check if any
BPs or WPs are set.
gdb/gdbserver/ChangeLog:
2018-11-20 Alan Hayward <***@arm.com>
* linux-low.c (add_lwp): Switch ordering.
---
gdb/gdbserver/linux-low.c | 4 ++--
gdb/nat/aarch64-linux-hw-point.c | 23 +++++++++++++++++++++++
gdb/nat/aarch64-linux-hw-point.h | 6 ++++++
gdb/nat/aarch64-linux.c | 15 ++++++++++-----
4 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 701f3e863c..1c336efade 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -951,11 +951,11 @@ add_lwp (ptid_t ptid)
lwp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+ lwp->thread = add_thread (ptid, lwp);
+
if (the_low_target.new_thread != NULL)
the_low_target.new_thread (lwp);
- lwp->thread = add_thread (ptid, lwp);
-
return lwp;
}
diff --git a/gdb/nat/aarch64-linux-hw-point.c b/gdb/nat/aarch64-linux-hw-point.c
index 18b5af2d49..7f96fa2b7a 100644
--- a/gdb/nat/aarch64-linux-hw-point.c
+++ b/gdb/nat/aarch64-linux-hw-point.c
@@ -717,6 +717,29 @@ aarch64_linux_set_debug_regs (struct aarch64_debug_reg_state *state,
}
}
+/* See nat/aarch64-linux-hw-point.h. */
+
+bool
+aarch64_linux_any_set_debug_regs_state (struct aarch64_debug_reg_state *state,
+ bool watchpoint)
+{
+ int i, count;
+ const CORE_ADDR *addr;
+ const unsigned int *ctrl;
+
+ count = watchpoint ? aarch64_num_wp_regs : aarch64_num_bp_regs;
+ addr = watchpoint ? state->dr_addr_wp : state->dr_addr_bp;
+ ctrl = watchpoint ? state->dr_ctrl_wp : state->dr_ctrl_bp;
+ if (count == 0)
+ return false;
+
+ for (i = 0; i < count; i++)
+ if (addr[i] != 0 || ctrl[i] != 0)
+ return true;
+
+ return false;
+}
+
/* Print the values of the cached breakpoint/watchpoint registers. */
void
diff --git a/gdb/nat/aarch64-linux-hw-point.h b/gdb/nat/aarch64-linux-hw-point.h
index 1940b06a89..37107a888c 100644
--- a/gdb/nat/aarch64-linux-hw-point.h
+++ b/gdb/nat/aarch64-linux-hw-point.h
@@ -182,6 +182,12 @@ int aarch64_handle_watchpoint (enum target_hw_bp_type type, CORE_ADDR addr,
void aarch64_linux_set_debug_regs (struct aarch64_debug_reg_state *state,
int tid, int watchpoint);
+/* Return TRUE if there are any hardware breakpoints. If WATCHPOINT is TRUE,
+ check hardware watchpoints instead. */
+bool
+aarch64_linux_any_set_debug_regs_state (struct aarch64_debug_reg_state *state,
+ bool watchpoint);
+
void aarch64_show_debug_reg_state (struct aarch64_debug_reg_state *state,
const char *func, CORE_ADDR addr,
int len, enum target_hw_bp_type type);
diff --git a/gdb/nat/aarch64-linux.c b/gdb/nat/aarch64-linux.c
index 0f2c8011ea..48c59f6288 100644
--- a/gdb/nat/aarch64-linux.c
+++ b/gdb/nat/aarch64-linux.c
@@ -73,13 +73,18 @@ aarch64_linux_prepare_to_resume (struct lwp_info *lwp)
void
aarch64_linux_new_thread (struct lwp_info *lwp)
{
+ ptid_t ptid = ptid_of_lwp (lwp);
+ struct aarch64_debug_reg_state *state
+ = aarch64_get_debug_reg_state (ptid.pid ());
struct arch_lwp_info *info = XNEW (struct arch_lwp_info);
- /* Mark that all the hardware breakpoint/watchpoint register pairs
- for this thread need to be initialized (with data from
- aarch_process_info.debug_reg_state). */
- DR_MARK_ALL_CHANGED (info->dr_changed_bp, aarch64_num_bp_regs);
- DR_MARK_ALL_CHANGED (info->dr_changed_wp, aarch64_num_wp_regs);
+ /* If there are hardware breakpoints/watchpoints in the process then mark that
+ all the hardware breakpoint/watchpoint register pairs for this thread need
+ to be initialized (with data from aarch_process_info.debug_reg_state). */
+ if (aarch64_linux_any_set_debug_regs_state (state, false))
+ DR_MARK_ALL_CHANGED (info->dr_changed_bp, aarch64_num_bp_regs);
+ if (aarch64_linux_any_set_debug_regs_state (state, true))
+ DR_MARK_ALL_CHANGED (info->dr_changed_wp, aarch64_num_wp_regs);
lwp_set_arch_private_info (lwp, info);
}
--
2.17.2 (Apple Git-113)
2.17.2 (Apple Git-113)