diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index 1f9b3fcf777493..eeca6fc472be37 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -83,8 +83,9 @@ struct _qsbr_shared { // Minimum observed read sequence of all QSBR thread states uint64_t rd_seq; - // Array of QSBR thread states. + // Array of QSBR thread states (aligned to 64 bytes). struct _qsbr_pad *array; + void *array_raw; // raw allocation pointer (for free) Py_ssize_t size; // Freelist of unused _qsbr_thread_states (protected by mutex) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 64b90710b8e664..eb2b0c84acdc7c 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -102,6 +102,12 @@ typedef struct _PyThreadStateImpl { #if _Py_TIER2 struct _PyJitTracerState *jit_tracer_state; #endif + +#ifdef Py_GIL_DISABLED + // gh-144438: Add padding to ensure that the fields above don't share a + // cache line with other allocations. + char __padding[64]; +#endif } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst new file mode 100644 index 00000000000000..1b19bbc7972d62 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-06-21-45-52.gh-issue-144438.GI_uB1LR.rst @@ -0,0 +1,2 @@ +Align the QSBR thread state array to a 64-byte cache line boundary to +avoid false sharing in the free-threaded build. diff --git a/Python/qsbr.c b/Python/qsbr.c index 6bf5b75f346690..e9d935bfb40d84 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -85,22 +85,29 @@ grow_thread_array(struct _qsbr_shared *shared) new_size = MIN_ARRAY_SIZE; } - struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array)); - if (array == NULL) { + // Overallocate by 63 bytes so we can align to a 64-byte boundary. + // This avoids potential false sharing between the first entry and other + // allocations. + size_t alignment = 64; + size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1; + void *raw = PyMem_RawCalloc(1, alloc_size); + if (raw == NULL) { return -1; } + struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment); - struct _qsbr_pad *old = shared->array; - if (old != NULL) { + void *old_raw = shared->array_raw; + if (shared->array != NULL) { memcpy(array, shared->array, shared->size * sizeof(*array)); } shared->array = array; + shared->array_raw = raw; shared->size = new_size; shared->freelist = NULL; initialize_new_array(shared); - PyMem_RawFree(old); + PyMem_RawFree(old_raw); return 0; } @@ -257,8 +264,9 @@ void _Py_qsbr_fini(PyInterpreterState *interp) { struct _qsbr_shared *shared = &interp->qsbr; - PyMem_RawFree(shared->array); + PyMem_RawFree(shared->array_raw); shared->array = NULL; + shared->array_raw = NULL; shared->size = 0; shared->freelist = NULL; }