From f4bc8eef792898d1b2e9b2e522925278db75d6e1 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 18 Feb 2026 16:16:10 +0100 Subject: [PATCH 1/2] Add ref to connection in relation wo it doesn't get closed prematurely --- .../pyconnection/pyconnection.hpp | 2 + .../include/duckdb_python/pyrelation.hpp | 7 ++ src/duckdb_py/pyconnection.cpp | 46 ++++++++----- src/duckdb_py/pyrelation.cpp | 68 ++++++++++++------- tests/fast/api/test_cursor.py | 20 ++++++ tests/fast/arrow/test_polars.py | 13 ++++ tests/fast/relational_api/test_rapi_close.py | 7 +- 7 files changed, 118 insertions(+), 45 deletions(-) diff --git a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp index 8117eda9..dd7c9d2e 100644 --- a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp +++ b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp @@ -355,6 +355,8 @@ struct DuckDBPyConnection : public enable_shared_from_this { static unique_ptr CompletePendingQuery(PendingQueryResult &pending_query); private: + unique_ptr CreateRelation(shared_ptr rel); + unique_ptr CreateRelation(shared_ptr result); PathLike GetPathLike(const py::object &object); ScalarFunction CreateScalarUDF(const string &name, const py::function &udf, const py::object ¶meters, const shared_ptr &return_type, bool vectorized, diff --git a/src/duckdb_py/include/duckdb_python/pyrelation.hpp b/src/duckdb_py/include/duckdb_python/pyrelation.hpp index b1975e7f..50f39b5f 100644 --- a/src/duckdb_py/include/duckdb_python/pyrelation.hpp +++ b/src/duckdb_py/include/duckdb_python/pyrelation.hpp @@ -262,6 +262,10 @@ struct DuckDBPyRelation { bool ContainsColumnByName(const string &name) const; + void SetConnectionOwner(py::object owner); + unique_ptr DeriveRelation(shared_ptr new_rel); + unique_ptr DeriveRelation(shared_ptr result); + private: string ToStringInternal(const BoxRendererConfig &config, bool invalidate_cache = false); string GenerateExpressionList(const string &function_name, const string &aggregated_columns, @@ -284,6 +288,9 @@ struct DuckDBPyRelation { unique_ptr ExecuteInternal(bool stream_result = false); private: + //! Prevents GC of the parent DuckDBPyConnection. + //! Declared first so it is destroyed last (reverse declaration order). + py::object connection_owner; //! Whether the relation has been executed at least once bool executed; shared_ptr rel; diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index e998eb4e..a4a01021 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -83,6 +83,20 @@ DuckDBPyConnection::~DuckDBPyConnection() { } } +unique_ptr DuckDBPyConnection::CreateRelation(shared_ptr rel) { + auto py_rel = make_uniq(std::move(rel)); + py::gil_scoped_acquire gil; + py_rel->SetConnectionOwner(py::cast(shared_from_this())); + return py_rel; +} + +unique_ptr DuckDBPyConnection::CreateRelation(shared_ptr result) { + auto py_rel = make_uniq(std::move(result)); + py::gil_scoped_acquire gil; + py_rel->SetConnectionOwner(py::cast(shared_from_this())); + return py_rel; +} + void DuckDBPyConnection::DetectEnvironment() { // Get the formatted Python version py::module_ sys = py::module_::import("sys"); @@ -513,8 +527,8 @@ shared_ptr DuckDBPyConnection::ExecuteMany(const py::object } // Set the internal 'result' object if (query_result) { - auto py_result = make_uniq(std::move(query_result)); - con.SetResult(make_uniq(std::move(py_result))); + auto py_result = make_shared_ptr(std::move(query_result)); + con.SetResult(CreateRelation(std::move(py_result))); } return shared_from_this(); @@ -713,8 +727,8 @@ shared_ptr DuckDBPyConnection::Execute(const py::object &que // Set the internal 'result' object if (res) { - auto py_result = make_uniq(std::move(res)); - con.SetResult(make_uniq(std::move(py_result))); + auto py_result = make_shared_ptr(std::move(res)); + con.SetResult(CreateRelation(std::move(py_result))); } return shared_from_this(); } @@ -982,7 +996,7 @@ unique_ptr DuckDBPyConnection::ReadJSON( if (file_like_object_wrapper) { read_json_relation->AddExternalDependency(std::move(file_like_object_wrapper)); } - return make_uniq(std::move(read_json_relation)); + return CreateRelation(std::move(read_json_relation)); } PathLike DuckDBPyConnection::GetPathLike(const py::object &object) { @@ -1553,7 +1567,7 @@ unique_ptr DuckDBPyConnection::ReadCSV(const py::object &name_ read_csv.AddExternalDependency(std::move(file_like_object_wrapper)); } - return make_uniq(read_csv_p->Alias(read_csv.alias)); + return CreateRelation(read_csv_p->Alias(read_csv.alias)); } void DuckDBPyConnection::ExecuteImmediately(vector> statements) { @@ -1639,7 +1653,7 @@ unique_ptr DuckDBPyConnection::RunQuery(const py::object &quer relation = make_shared_ptr(connection.context, materialized_result.TakeCollection(), res->names, alias); } - return make_uniq(std::move(relation)); + return CreateRelation(std::move(relation)); } unique_ptr DuckDBPyConnection::Table(const string &tname) { @@ -1649,8 +1663,7 @@ unique_ptr DuckDBPyConnection::Table(const string &tname) { qualified_name.schema = DEFAULT_SCHEMA; } try { - return make_uniq( - connection.Table(qualified_name.catalog, qualified_name.schema, qualified_name.name)); + return CreateRelation(connection.Table(qualified_name.catalog, qualified_name.schema, qualified_name.name)); } catch (const CatalogException &) { // CatalogException will be of the type '... is not a table' // Not a table in the database, make a query relation that can perform replacement scans @@ -1716,7 +1729,7 @@ unique_ptr DuckDBPyConnection::Values(const py::args &args) { py::handle first_arg = args[0]; if (arg_count == 1 && py::isinstance(first_arg)) { vector> values {DuckDBPyConnection::TransformPythonParamList(first_arg)}; - return make_uniq(connection.Values(values)); + return CreateRelation(connection.Values(values)); } else { vector>> expressions; if (py::isinstance(first_arg)) { @@ -1725,13 +1738,13 @@ unique_ptr DuckDBPyConnection::Values(const py::args &args) { auto values = ValueListFromExpressions(args); expressions.push_back(std::move(values)); } - return make_uniq(connection.Values(std::move(expressions))); + return CreateRelation(connection.Values(std::move(expressions))); } } unique_ptr DuckDBPyConnection::View(const string &vname) { auto &connection = con.GetConnection(); - return make_uniq(connection.View(vname)); + return CreateRelation(connection.View(vname)); } unique_ptr DuckDBPyConnection::TableFunction(const string &fname, py::object params) { @@ -1743,8 +1756,7 @@ unique_ptr DuckDBPyConnection::TableFunction(const string &fna throw InvalidInputException("'params' has to be a list of parameters"); } - return make_uniq( - connection.TableFunction(fname, DuckDBPyConnection::TransformPythonParamList(params))); + return CreateRelation(connection.TableFunction(fname, DuckDBPyConnection::TransformPythonParamList(params))); } unique_ptr DuckDBPyConnection::FromDF(const PandasDataFrame &value) { @@ -1757,7 +1769,7 @@ unique_ptr DuckDBPyConnection::FromDF(const PandasDataFrame &v auto tableref = PythonReplacementScan::ReplacementObject(value, name, *connection.context); D_ASSERT(tableref); auto rel = make_shared_ptr(connection.context, std::move(tableref), name); - return make_uniq(std::move(rel)); + return CreateRelation(std::move(rel)); } unique_ptr DuckDBPyConnection::FromParquetInternal(Value &&file_param, bool binary_as_string, @@ -1782,7 +1794,7 @@ unique_ptr DuckDBPyConnection::FromParquetInternal(Value &&fil } D_ASSERT(py::gil_check()); py::gil_scoped_release gil; - return make_uniq(connection.TableFunction("parquet_scan", params, named_parameters)->Alias(name)); + return CreateRelation(connection.TableFunction("parquet_scan", params, named_parameters)->Alias(name)); } unique_ptr DuckDBPyConnection::FromParquet(const string &file_glob, bool binary_as_string, @@ -1818,7 +1830,7 @@ unique_ptr DuckDBPyConnection::FromArrow(py::object &arrow_obj auto tableref = PythonReplacementScan::ReplacementObject(arrow_object, name, *connection.context, true); D_ASSERT(tableref); auto rel = make_shared_ptr(connection.context, std::move(tableref), name); - return make_uniq(std::move(rel)); + return CreateRelation(std::move(rel)); } unordered_set DuckDBPyConnection::GetTableNames(const string &query, bool qualified) { diff --git a/src/duckdb_py/pyrelation.cpp b/src/duckdb_py/pyrelation.cpp index d4322adf..1a711562 100644 --- a/src/duckdb_py/pyrelation.cpp +++ b/src/duckdb_py/pyrelation.cpp @@ -76,7 +76,7 @@ DuckDBPyRelation::DuckDBPyRelation(shared_ptr result_p) : rel(nu } unique_ptr DuckDBPyRelation::ProjectFromExpression(const string &expression) { - auto projected_relation = make_uniq(rel->Project(expression)); + auto projected_relation = DeriveRelation(rel->Project(expression)); for (auto &dep : this->rel->external_dependencies) { projected_relation->rel->AddExternalDependency(dep); } @@ -108,9 +108,9 @@ unique_ptr DuckDBPyRelation::Project(const py::args &args, con vector empty_aliases; if (groups.empty()) { // No groups provided - return make_uniq(rel->Project(std::move(expressions), empty_aliases)); + return DeriveRelation(rel->Project(std::move(expressions), empty_aliases)); } - return make_uniq(rel->Aggregate(std::move(expressions), groups)); + return DeriveRelation(rel->Aggregate(std::move(expressions), groups)); } } @@ -180,7 +180,7 @@ unique_ptr DuckDBPyRelation::EmptyResult(const shared_ptr DuckDBPyRelation::SetAlias(const string &expr) { - return make_uniq(rel->Alias(expr)); + return DeriveRelation(rel->Alias(expr)); } py::str DuckDBPyRelation::GetAlias() { @@ -197,19 +197,19 @@ unique_ptr DuckDBPyRelation::Filter(const py::object &expr) { throw InvalidInputException("Please provide either a string or a DuckDBPyExpression object to 'filter'"); } auto expr_p = expression->GetExpression().Copy(); - return make_uniq(rel->Filter(std::move(expr_p))); + return DeriveRelation(rel->Filter(std::move(expr_p))); } unique_ptr DuckDBPyRelation::FilterFromExpression(const string &expr) { - return make_uniq(rel->Filter(expr)); + return DeriveRelation(rel->Filter(expr)); } unique_ptr DuckDBPyRelation::Limit(int64_t n, int64_t offset) { - return make_uniq(rel->Limit(n, offset)); + return DeriveRelation(rel->Limit(n, offset)); } unique_ptr DuckDBPyRelation::Order(const string &expr) { - return make_uniq(rel->Order(expr)); + return DeriveRelation(rel->Order(expr)); } unique_ptr DuckDBPyRelation::Sort(const py::args &args) { @@ -228,7 +228,7 @@ unique_ptr DuckDBPyRelation::Sort(const py::args &args) { if (order_nodes.empty()) { throw InvalidInputException("Please provide at least one expression to sort on"); } - return make_uniq(rel->Order(std::move(order_nodes))); + return DeriveRelation(rel->Order(std::move(order_nodes))); } vector> GetExpressions(ClientContext &context, const py::object &expr) { @@ -259,9 +259,9 @@ unique_ptr DuckDBPyRelation::Aggregate(const py::object &expr, AssertRelation(); auto expressions = GetExpressions(*rel->context->GetContext(), expr); if (!groups.empty()) { - return make_uniq(rel->Aggregate(std::move(expressions), groups)); + return DeriveRelation(rel->Aggregate(std::move(expressions), groups)); } - return make_uniq(rel->Aggregate(std::move(expressions))); + return DeriveRelation(rel->Aggregate(std::move(expressions))); } void DuckDBPyRelation::AssertResult() const { @@ -354,7 +354,7 @@ unique_ptr DuckDBPyRelation::Describe() { DescribeAggregateInfo("stddev", true), DescribeAggregateInfo("min"), DescribeAggregateInfo("max"), DescribeAggregateInfo("median", true)}; auto expressions = CreateExpressionList(columns, aggregates); - return make_uniq(rel->Aggregate(expressions)); + return DeriveRelation(rel->Aggregate(expressions)); } string DuckDBPyRelation::ToSQL() { @@ -456,7 +456,7 @@ DuckDBPyRelation::GenericWindowFunction(const string &function_name, const strin const string &projected_columns) { auto expr = GenerateExpressionList(function_name, aggr_columns, "", function_parameters, ignore_nulls, projected_columns, window_spec); - return make_uniq(rel->Project(expr)); + return DeriveRelation(rel->Project(expr)); } unique_ptr DuckDBPyRelation::ApplyAggOrWin(const string &function_name, const string &agg_columns, @@ -722,7 +722,7 @@ py::tuple DuckDBPyRelation::Shape() { } unique_ptr DuckDBPyRelation::Unique(const string &std_columns) { - return make_uniq(rel->Project(std_columns)->Distinct()); + return DeriveRelation(rel->Project(std_columns)->Distinct()); } /* General-purpose window functions */ @@ -796,7 +796,7 @@ unique_ptr DuckDBPyRelation::NthValue(const string &column, co } unique_ptr DuckDBPyRelation::Distinct() { - return make_uniq(rel->Distinct()); + return DeriveRelation(rel->Distinct()); } duckdb::pyarrow::RecordBatchReader DuckDBPyRelation::FetchRecordBatchReader(idx_t rows_per_batch) { @@ -1064,6 +1064,22 @@ bool DuckDBPyRelation::ContainsColumnByName(const string &name) const { [&](const string &item) { return StringUtil::CIEquals(name, item); }) != names.end(); } +void DuckDBPyRelation::SetConnectionOwner(py::object owner) { + connection_owner = std::move(owner); +} + +unique_ptr DuckDBPyRelation::DeriveRelation(shared_ptr new_rel) { + auto result = make_uniq(std::move(new_rel)); + result->connection_owner = connection_owner; + return result; +} + +unique_ptr DuckDBPyRelation::DeriveRelation(shared_ptr result_p) { + auto result = make_uniq(std::move(result_p)); + result->connection_owner = connection_owner; + return result; +} + static bool ContainsStructFieldByName(LogicalType &type, const string &name) { if (type.id() != LogicalTypeId::STRUCT) { return false; @@ -1104,19 +1120,19 @@ unique_ptr DuckDBPyRelation::GetAttribute(const string &name) expressions.push_back(std::move(make_uniq(column_names))); vector aliases; aliases.push_back(name); - return make_uniq(rel->Project(std::move(expressions), aliases)); + return DeriveRelation(rel->Project(std::move(expressions), aliases)); } unique_ptr DuckDBPyRelation::Union(DuckDBPyRelation *other) { - return make_uniq(rel->Union(other->rel)); + return DeriveRelation(rel->Union(other->rel)); } unique_ptr DuckDBPyRelation::Except(DuckDBPyRelation *other) { - return make_uniq(rel->Except(other->rel)); + return DeriveRelation(rel->Except(other->rel)); } unique_ptr DuckDBPyRelation::Intersect(DuckDBPyRelation *other) { - return make_uniq(rel->Intersect(other->rel)); + return DeriveRelation(rel->Intersect(other->rel)); } namespace { @@ -1177,7 +1193,7 @@ unique_ptr DuckDBPyRelation::Join(DuckDBPyRelation *other, con } if (py::isinstance(condition)) { auto condition_string = std::string(py::cast(condition)); - return make_uniq(rel->Join(other->rel, condition_string, join_type)); + return DeriveRelation(rel->Join(other->rel, condition_string, join_type)); } vector using_list; if (py::is_list_like(condition)) { @@ -1193,7 +1209,7 @@ unique_ptr DuckDBPyRelation::Join(DuckDBPyRelation *other, con throw InvalidInputException("Please provide at least one string in the condition to create a USING clause"); } auto join_relation = make_shared_ptr(rel, other->rel, std::move(using_list), join_type); - return make_uniq(std::move(join_relation)); + return DeriveRelation(std::move(join_relation)); } shared_ptr condition_expr; if (!py::try_cast(condition, condition_expr)) { @@ -1202,11 +1218,11 @@ unique_ptr DuckDBPyRelation::Join(DuckDBPyRelation *other, con } vector> conditions; conditions.push_back(condition_expr->GetExpression().Copy()); - return make_uniq(rel->Join(other->rel, std::move(conditions), join_type)); + return DeriveRelation(rel->Join(other->rel, std::move(conditions), join_type)); } unique_ptr DuckDBPyRelation::Cross(DuckDBPyRelation *other) { - return make_uniq(rel->CrossProduct(other->rel)); + return DeriveRelation(rel->CrossProduct(other->rel)); } static Value NestedDictToStruct(const py::object &dictionary) { @@ -1502,7 +1518,7 @@ void DuckDBPyRelation::ToCSV(const string &filename, const py::object &sep, cons // should this return a rel with the new view? unique_ptr DuckDBPyRelation::CreateView(const string &view_name, bool replace) { rel->CreateView(view_name, replace); - return make_uniq(rel); + return DeriveRelation(rel); } static bool IsDescribeStatement(SQLStatement &statement) { @@ -1530,7 +1546,7 @@ unique_ptr DuckDBPyRelation::Query(const string &view_name, co auto select_statement = unique_ptr_cast(std::move(parser.statements[0])); auto query_relation = make_shared_ptr(rel->context->GetContext(), std::move(select_statement), sql_query, "query_relation"); - return make_uniq(std::move(query_relation)); + return DeriveRelation(std::move(query_relation)); } else if (IsDescribeStatement(statement)) { auto query = PragmaShow(view_name); return Query(view_name, query); @@ -1630,7 +1646,7 @@ unique_ptr DuckDBPyRelation::Map(py::function fun, Optional params; params.emplace_back(Value::POINTER(CastPointerToValue(fun.ptr()))); params.emplace_back(Value::POINTER(CastPointerToValue(schema.ptr()))); - auto relation = make_uniq(rel->TableFunction("python_map_function", params)); + auto relation = DeriveRelation(rel->TableFunction("python_map_function", params)); auto rel_dependency = make_uniq(); rel_dependency->AddDependency("map", PythonDependencyItem::Create(std::move(fun))); rel_dependency->AddDependency("schema", PythonDependencyItem::Create(std::move(schema))); diff --git a/tests/fast/api/test_cursor.py b/tests/fast/api/test_cursor.py index f0d7d332..044e736d 100644 --- a/tests/fast/api/test_cursor.py +++ b/tests/fast/api/test_cursor.py @@ -114,3 +114,23 @@ def test_cursor_used_after_close(self): cursor.close() with pytest.raises(duckdb.ConnectionException): cursor.execute("select [1,2,3,4]") + + def test_cursor_relapi_chaining(self): + """Cursor should stay alive while a relation derived from it exists (GH #315).""" + con = duckdb.connect(":memory:") + # Exact repro from the issue + res = con.cursor().sql("SELECT 1 AS foo").fetchall() + assert res == [(1,)] + + def test_cursor_relapi_chaining_filter(self): + """Derived relations should also keep the cursor alive.""" + con = duckdb.connect(":memory:") + res = con.cursor().sql("SELECT 1 AS foo").filter("foo = 1").fetchall() + assert res == [(1,)] + + def test_cursor_relapi_chaining_table(self): + """Other connection methods returning relations should keep cursor alive.""" + con = duckdb.connect(":memory:") + con.execute("CREATE TABLE tbl AS SELECT 42 AS i") + res = con.cursor().table("tbl").fetchall() + assert res == [(42,)] diff --git a/tests/fast/arrow/test_polars.py b/tests/fast/arrow/test_polars.py index a8ca488d..d1b0daa9 100644 --- a/tests/fast/arrow/test_polars.py +++ b/tests/fast/arrow/test_polars.py @@ -769,3 +769,16 @@ def test_explicit_cast_not_pushed_down(self): # pl.col("a").cast(pl.Int64) produces a Strict Cast node expr = pl.col("a").cast(pl.Int64) > 5 invalid_filter(expr) + + def test_polars_lazy_cursor_lifetime(self): + """Cursor should stay alive while a lazy polars frame derived from it exists (GH #161).""" + con = duckdb.connect(":memory:") + + def get_lazy_frame(con): + cur = con.cursor() + return cur.sql("SELECT 1 AS foo, 2 AS bar").pl(lazy=True) + + lf = get_lazy_frame(con) + # Cursor went out of scope, but the lazy frame should keep it alive + result = lf.collect() + assert result.to_dicts() == [{"foo": 1, "bar": 2}] diff --git a/tests/fast/relational_api/test_rapi_close.py b/tests/fast/relational_api/test_rapi_close.py index a5ed16cf..f8233aa2 100644 --- a/tests/fast/relational_api/test_rapi_close.py +++ b/tests/fast/relational_api/test_rapi_close.py @@ -1,3 +1,5 @@ +from decimal import Decimal + import pytest import duckdb @@ -182,5 +184,6 @@ def test_del_conn(self, duckdb_cursor): con.execute("INSERT INTO items VALUES ('jeans', 20.0, 1), ('hammer', 42.2, 2)") rel = con.table("items") del con - with pytest.raises(duckdb.ConnectionException, match="Connection has already been closed"): - print(rel) + # Relation keeps the connection alive via connection_owner + res = rel.fetchall() + assert res == [("jeans", Decimal("20.00"), 1), ("hammer", Decimal("42.20"), 2)] From e5814fde17add96529cbe94bb013318b78cad548 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 18 Feb 2026 22:18:22 +0100 Subject: [PATCH 2/2] prevent ref cycle --- src/duckdb_py/pyconnection.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index a4a01021..6883ba45 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -527,8 +527,9 @@ shared_ptr DuckDBPyConnection::ExecuteMany(const py::object } // Set the internal 'result' object if (query_result) { - auto py_result = make_shared_ptr(std::move(query_result)); - con.SetResult(CreateRelation(std::move(py_result))); + // Don't use CreateRelation here — the result is stored inside the connection, + // so setting connection_owner would create a ref cycle (connection → result → connection). + con.SetResult(make_uniq(make_shared_ptr(std::move(query_result)))); } return shared_from_this(); @@ -727,8 +728,9 @@ shared_ptr DuckDBPyConnection::Execute(const py::object &que // Set the internal 'result' object if (res) { - auto py_result = make_shared_ptr(std::move(res)); - con.SetResult(CreateRelation(std::move(py_result))); + // Don't use CreateRelation here — the result is stored inside the connection, + // so setting connection_owner would create a ref cycle (connection → result → connection). + con.SetResult(make_uniq(make_shared_ptr(std::move(res)))); } return shared_from_this(); }