From f41329d2a900d9bb22f167ad93f15ce3921980f4 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 19 Feb 2026 10:19:55 +0100 Subject: [PATCH 1/4] Fix numeric conversion logic --- src/duckdb_py/native/python_conversion.cpp | 82 +++++++++++++---- tests/fast/test_type_conversion.py | 102 +++++++++++++++++++++ 2 files changed, 164 insertions(+), 20 deletions(-) create mode 100644 tests/fast/test_type_conversion.py diff --git a/src/duckdb_py/native/python_conversion.cpp b/src/duckdb_py/native/python_conversion.cpp index 485a7881..c1274446 100644 --- a/src/duckdb_py/native/python_conversion.cpp +++ b/src/duckdb_py/native/python_conversion.cpp @@ -92,7 +92,7 @@ Value TransformDictionaryToStruct(const PyDictionary &dict, const LogicalType &t child_list_t struct_values; for (idx_t i = 0; i < dict.len; i++) { auto &key = struct_target ? StructType::GetChildName(target_type, i) : struct_keys[i]; - auto value_index = key_mapping[key]; + auto value_index = struct_target ? key_mapping[key] : i; auto &child_type = struct_target ? StructType::GetChildType(target_type, i) : LogicalType::UNKNOWN; auto val = TransformPythonValue(dict.values.attr("__getitem__")(value_index), child_type); struct_values.emplace_back(make_pair(std::move(key), std::move(val))); @@ -240,6 +240,49 @@ bool TryTransformPythonIntegerToDouble(Value &res, py::handle ele) { return true; } +// Converts a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value by decomposing it into upper +// and lower 64-bit components. Tries HUGEINT first; falls back to UHUGEINT for large positive values. +static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &target_type) { + auto ptr = ele.ptr(); + + // Extract lower 64 bits (two's complement, works for negative values too) + uint64_t lower = PyLong_AsUnsignedLongLongMask(ptr); + if (lower == static_cast(-1) && PyErr_Occurred()) { + PyErr_Clear(); + throw InvalidInputException("Failed to convert Python integer to 128-bit integer: %s", + std::string(py::str(ele))); + } + + // Extract upper bits by right-shifting by 64 + py::int_ shift_amount(64); + py::object upper_obj = py::reinterpret_steal(PyNumber_Rshift(ptr, shift_amount.ptr())); + + // Try signed 128-bit (hugeint) first + int overflow; + int64_t upper_signed = PyLong_AsLongLongAndOverflow(upper_obj.ptr(), &overflow); + if (overflow == 0 && !(upper_signed == -1 && PyErr_Occurred())) { + auto val = Value::HUGEINT(hugeint_t {upper_signed, lower}); + if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::HUGEINT) { + return val; + } + return val.DefaultCastAs(target_type); + } + PyErr_Clear(); + + // Try unsigned 128-bit (uhugeint) + uint64_t upper_unsigned = PyLong_AsUnsignedLongLong(upper_obj.ptr()); + if (PyErr_Occurred()) { + PyErr_Clear(); + throw InvalidInputException("Python integer too large for 128-bit integer type: %s", std::string(py::str(ele))); + } + + auto val = Value::UHUGEINT(uhugeint_t {upper_unsigned, lower}); + if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::UHUGEINT) { + return val; + } + return val.DefaultCastAs(target_type); +} + void TransformPythonUnsigned(uint64_t value, Value &res) { if (value > (uint64_t)std::numeric_limits::max()) { res = Value::UBIGINT(value); @@ -263,7 +306,6 @@ bool TrySniffPythonNumeric(Value &res, int64_t value) { return true; } -// TODO: add support for HUGEINT bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type) { auto ptr = ele.ptr(); @@ -275,9 +317,7 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64", std::string(pybind11::str(ele)))); } - auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::HUGEINT : target_type; - auto numeric_string = std::string(py::str(ele)); - res = Value(numeric_string).DefaultCastAs(cast_as); + res = TransformPythonLongToHugeInt(ele, target_type); return true; } else if (overflow == 1) { if (target_type.InternalType() == PhysicalType::INT64) { @@ -287,18 +327,18 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta uint64_t unsigned_value = PyLong_AsUnsignedLongLong(ptr); if (PyErr_Occurred()) { PyErr_Clear(); - return TryTransformPythonIntegerToDouble(res, ele); - } else { - TransformPythonUnsigned(unsigned_value, res); + res = TransformPythonLongToHugeInt(ele, target_type); + return true; } + TransformPythonUnsigned(unsigned_value, res); PyErr_Clear(); return true; - } else if (value == -1 && PyErr_Occurred()) { + } + if (value == -1 && PyErr_Occurred()) { return false; } // The value is int64_t or smaller - switch (target_type.id()) { case LogicalTypeId::UNKNOWN: return TrySniffPythonNumeric(res, value); @@ -476,13 +516,17 @@ struct PythonValueConversion { target_type.ToString()); } default: - throw ConversionException("Could not convert 'float' to type %s", target_type.ToString()); + result = Value::DOUBLE(val).DefaultCastAs(target_type); + break; } } static void HandleLongAsDouble(Value &result, const LogicalType &target_type, double val) { auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type; result = Value::DOUBLE(val).DefaultCastAs(cast_as); } + static void HandleLongOverflow(Value &result, const LogicalType &target_type, py::handle ele) { + result = TransformPythonLongToHugeInt(ele, target_type); + } static void HandleUnsignedBigint(Value &result, const LogicalType &target_type, uint64_t val) { auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::UBIGINT : target_type; result = Value::UBIGINT(val).DefaultCastAs(cast_as); @@ -648,14 +692,17 @@ struct PythonVectorConversion { break; } default: - throw TypeMismatchException( - LogicalType::DOUBLE, result.GetType(), - "Python Conversion Failure: Expected a value of type %s, but got a value of type double"); + FallbackValueConversion(result, result_offset, Value::DOUBLE(val).DefaultCastAs(result.GetType())); + break; } } static void HandleLongAsDouble(Vector &result, const idx_t &result_offset, double val) { FallbackValueConversion(result, result_offset, Value::DOUBLE(val)); } + static void HandleLongOverflow(Vector &result, const idx_t &result_offset, py::handle ele) { + Value result_val = TransformPythonLongToHugeInt(ele, result.GetType()); + FallbackValueConversion(result, result_offset, std::move(result_val)); + } static void HandleUnsignedBigint(Vector &result, const idx_t &result_offset, uint64_t value) { // this code path is only called for values in the range of [INT64_MAX...UINT64_MAX] switch (result.GetType().id()) { @@ -966,12 +1013,7 @@ void TransformPythonObjectInternal(py::handle ele, A &result, const B ¶m, bo conversion_target); } } - double number = PyLong_AsDouble(ele.ptr()); - if (number == -1.0 && PyErr_Occurred()) { - PyErr_Clear(); - throw InvalidInputException("An error occurred attempting to convert a python integer"); - } - OP::HandleLongAsDouble(result, param, number); + OP::HandleLongOverflow(result, param, ele); } else if (value == -1 && PyErr_Occurred()) { throw InvalidInputException("An error occurred attempting to convert a python integer"); } else { diff --git a/tests/fast/test_type_conversion.py b/tests/fast/test_type_conversion.py new file mode 100644 index 00000000..29475daf --- /dev/null +++ b/tests/fast/test_type_conversion.py @@ -0,0 +1,102 @@ +"""Regression tests for Python-to-DuckDB type conversion bugs. + +Issue #115: Float conversion error with UNION containing float +Issue #171: Dictionary key case sensitivity not respected for parameter bindings +Issue #330: Integers >64-bit lose precision via double conversion +""" + +import pytest + +import duckdb +from duckdb.sqltypes import BIGINT, DOUBLE, HUGEINT, UHUGEINT, VARCHAR, DuckDBPyType + + +class TestIssue115FloatToUnion: + """HandleDouble should use DefaultCastAs for unknown target types like UNION.""" + + def test_udf_float_to_union_type(self): + conn = duckdb.connect() + conn.create_function( + "return_float", + lambda: 1.5, + return_type=duckdb.union_type({"u1": VARCHAR, "u2": BIGINT, "u3": DOUBLE}), + ) + result = conn.sql("SELECT return_float()").fetchone()[0] + assert result == 1.5 + + def test_udf_dict_with_float_in_union_struct(self): + """Original repro from issue #115.""" + conn = duckdb.connect() + + arr = [{"a": 1, "b": 1.2}, {"a": 3, "b": 2.4}] + + def test(): + return arr + + return_type = DuckDBPyType(list[dict[str, int | float]]) + conn.create_function("test", test, return_type=return_type) + result = conn.sql("SELECT test()").fetchone()[0] + assert len(result) == 2 + assert result[0]["b"] == pytest.approx(1.2) + assert result[1]["b"] == pytest.approx(2.4) + + +class TestIssue171DictKeyCaseSensitivity: + """Dict keys differing only by case must preserve their individual values.""" + + def test_case_sensitive_dict_keys(self): + result = duckdb.execute("SELECT ?", [{"Key": "first", "key": "second"}]).fetchone()[0] + assert result["Key"] == "first" + assert result["key"] == "second" + + def test_case_sensitive_dict_keys_three_variants(self): + result = duckdb.execute("SELECT ?", [{"abc": 1, "ABC": 2, "Abc": 3}]).fetchone()[0] + assert result["abc"] == 1 + assert result["ABC"] == 2 + assert result["Abc"] == 3 + + +class TestIssue330LargeIntegerPrecision: + """Integers >64-bit must not lose precision via double conversion.""" + + # --- Parameter binding path (TryTransformPythonNumeric) --- + + def test_param_hugeint_large(self): + """Value with >52 significant bits must not lose precision.""" + value = (2**128 - 1) // 15 * 7 # 0x77777777777777777777777777777777 + result = duckdb.execute("SELECT ?::HUGEINT", [value]).fetchone()[0] + assert result == value + + def test_param_uhugeint_max(self): + """2**128-1 must not overflow when cast to UHUGEINT.""" + value = 2**128 - 1 + result = duckdb.execute("SELECT ?::UHUGEINT", [value]).fetchone()[0] + assert result == value + + def test_param_auto_sniff(self): + """2**64 without explicit cast should sniff as HUGEINT, not lose precision.""" + value = 2**64 + result = duckdb.execute("SELECT ?", [value]).fetchone()[0] + assert result == value + + def test_param_negative_hugeint_no_regression(self): + """Negative overflow path (already correct) must not regress.""" + value = -(2**64) + result = duckdb.execute("SELECT ?::HUGEINT", [value]).fetchone()[0] + assert result == value + + # --- UDF return path (TransformPythonObjectInternal template) --- + + def test_udf_return_large_hugeint(self): + value = (2**128 - 1) // 15 * 7 + conn = duckdb.connect() + conn.create_function("big_hugeint", lambda: value, return_type=HUGEINT) + result = conn.sql("SELECT big_hugeint()").fetchone()[0] + assert result == value + + def test_udf_return_large_uhugeint(self): + value = 2**128 - 1 + conn = duckdb.connect() + conn.create_function("big_uhugeint", lambda: value, return_type=UHUGEINT) + result = conn.sql("SELECT big_uhugeint()").fetchone()[0] + assert result == value From 47a1728caffe4b7d1132b07ee4bbdb820b365115 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 19 Feb 2026 10:58:17 +0100 Subject: [PATCH 2/4] Special case casting for UNIONs --- src/duckdb_py/native/python_conversion.cpp | 52 +++++++++++++++++----- tests/fast/test_type_conversion.py | 40 +++++++++++++++-- 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/src/duckdb_py/native/python_conversion.cpp b/src/duckdb_py/native/python_conversion.cpp index c1274446..8ce4a5a2 100644 --- a/src/duckdb_py/native/python_conversion.cpp +++ b/src/duckdb_py/native/python_conversion.cpp @@ -13,6 +13,36 @@ namespace duckdb { +// Like DefaultCastAs, but handles UNION targets by finding the first compatible member. DefaultCastAs raises a +// Conversion Error when multiple UNION members have the same type (e.g. UNION(u1 DOUBLE, u2 DOUBLE)), so for UNION +// targets we resolve the member ourselves. +static Value CastToTarget(Value val, const LogicalType &target_type) { + if (target_type.id() != LogicalTypeId::UNION) { + return val.DefaultCastAs(target_type); + } + + auto member_count = UnionType::GetMemberCount(target_type); + auto &source_type = val.type(); + + // First pass: if there's an exact type match we use that + for (idx_t i = 0; i < member_count; i++) { + if (UnionType::GetMemberType(target_type, i) == source_type) { + return Value::UNION(UnionType::CopyMemberTypes(target_type), NumericCast(i), std::move(val)); + } + } + + // Second pass: if there's a type we can implicitly cast to, we do that + for (idx_t i = 0; i < member_count; i++) { + auto member_type = UnionType::GetMemberType(target_type, i); + Value candidate = val; + if (candidate.DefaultTryCastAs(member_type)) { + return Value::UNION(UnionType::CopyMemberTypes(target_type), NumericCast(i), std::move(candidate)); + } + } + throw ConversionException("Could not convert value of type %s to %s", source_type.ToString(), + target_type.ToString()); +} + static Value EmptyMapValue() { auto map_type = LogicalType::MAP(LogicalType::SQLNULL, LogicalType::SQLNULL); return Value::MAP(ListType::GetChildType(map_type), vector()); @@ -265,7 +295,7 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::HUGEINT) { return val; } - return val.DefaultCastAs(target_type); + return CastToTarget(std::move(val), target_type); } PyErr_Clear(); @@ -280,7 +310,7 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::UHUGEINT) { return val; } - return val.DefaultCastAs(target_type); + return CastToTarget(std::move(val), target_type); } void TransformPythonUnsigned(uint64_t value, Value &res) { @@ -410,7 +440,7 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta if (!TrySniffPythonNumeric(res, value)) { return false; } - res = res.DefaultCastAs(target_type, true); + res = CastToTarget(std::move(res), target_type); return true; } } @@ -516,20 +546,20 @@ struct PythonValueConversion { target_type.ToString()); } default: - result = Value::DOUBLE(val).DefaultCastAs(target_type); + result = CastToTarget(Value::DOUBLE(val), target_type); break; } } static void HandleLongAsDouble(Value &result, const LogicalType &target_type, double val) { auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type; - result = Value::DOUBLE(val).DefaultCastAs(cast_as); + result = CastToTarget(Value::DOUBLE(val), cast_as); } static void HandleLongOverflow(Value &result, const LogicalType &target_type, py::handle ele) { result = TransformPythonLongToHugeInt(ele, target_type); } static void HandleUnsignedBigint(Value &result, const LogicalType &target_type, uint64_t val) { auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::UBIGINT : target_type; - result = Value::UBIGINT(val).DefaultCastAs(cast_as); + result = CastToTarget(Value::UBIGINT(val), cast_as); } static void HandleBigint(Value &res, const LogicalType &target_type, int64_t value) { switch (target_type.id()) { @@ -545,7 +575,7 @@ struct PythonValueConversion { break; } default: - res = Value::BIGINT(value).DefaultCastAs(target_type); + res = CastToTarget(Value::BIGINT(value), target_type); break; } } @@ -555,7 +585,7 @@ struct PythonValueConversion { (target_type.id() == LogicalTypeId::VARCHAR && !target_type.HasAlias())) { result = Value(value); } else { - result = Value(value).DefaultCastAs(target_type); + result = CastToTarget(Value(value), target_type); } } @@ -692,7 +722,7 @@ struct PythonVectorConversion { break; } default: - FallbackValueConversion(result, result_offset, Value::DOUBLE(val).DefaultCastAs(result.GetType())); + FallbackValueConversion(result, result_offset, CastToTarget(Value::DOUBLE(val), result.GetType())); break; } } @@ -716,7 +746,7 @@ struct PythonVectorConversion { FlatVector::GetData(result)[result_offset] = value; break; default: - FallbackValueConversion(result, result_offset, Value::UBIGINT(value)); + FallbackValueConversion(result, result_offset, CastToTarget(Value::UBIGINT(value), result.GetType())); break; } } @@ -787,7 +817,7 @@ struct PythonVectorConversion { break; } default: - FallbackValueConversion(result, result_offset, Value::BIGINT(value)); + FallbackValueConversion(result, result_offset, CastToTarget(Value::BIGINT(value), result.GetType())); break; } } diff --git a/tests/fast/test_type_conversion.py b/tests/fast/test_type_conversion.py index 29475daf..9bc2e6d2 100644 --- a/tests/fast/test_type_conversion.py +++ b/tests/fast/test_type_conversion.py @@ -5,10 +5,11 @@ Issue #330: Integers >64-bit lose precision via double conversion """ +import numpy as np import pytest import duckdb -from duckdb.sqltypes import BIGINT, DOUBLE, HUGEINT, UHUGEINT, VARCHAR, DuckDBPyType +from duckdb.sqltypes import BIGINT, DOUBLE, FLOAT, HUGEINT, UHUGEINT, VARCHAR, DuckDBPyType class TestIssue115FloatToUnion: @@ -24,8 +25,19 @@ def test_udf_float_to_union_type(self): result = conn.sql("SELECT return_float()").fetchone()[0] assert result == 1.5 + def test_udf_float_to_ambiguous_union_type(self): + """UNION with duplicate DOUBLE members (from np.float64 and float) must not raise ambiguity error.""" + conn = duckdb.connect() + conn.create_function( + "return_float", + lambda: 1.5, + return_type=duckdb.union_type({"u1": VARCHAR, "u2": BIGINT, "u3": DOUBLE, "u4": FLOAT, "u5": DOUBLE}), + ) + result = conn.sql("SELECT return_float()").fetchone()[0] + assert result == 1.5 + def test_udf_dict_with_float_in_union_struct(self): - """Original repro from issue #115.""" + """Original repro from issue #115 with ambiguous UNION members.""" conn = duckdb.connect() arr = [{"a": 1, "b": 1.2}, {"a": 3, "b": 2.4}] @@ -33,13 +45,35 @@ def test_udf_dict_with_float_in_union_struct(self): def test(): return arr - return_type = DuckDBPyType(list[dict[str, int | float]]) + return_type = DuckDBPyType(list[dict[str, str | int | np.float64 | np.float32 | float]]) conn.create_function("test", test, return_type=return_type) result = conn.sql("SELECT test()").fetchone()[0] assert len(result) == 2 assert result[0]["b"] == pytest.approx(1.2) assert result[1]["b"] == pytest.approx(2.4) + def test_udf_int_to_ambiguous_union_type(self): + """HandleBigint default branch: int into UNION with duplicate BIGINT members.""" + conn = duckdb.connect() + conn.create_function( + "return_int", + lambda: 42, + return_type=duckdb.union_type({"u1": VARCHAR, "u2": BIGINT, "u3": BIGINT}), + ) + result = conn.sql("SELECT return_int()").fetchone()[0] + assert result == 42 + + def test_udf_string_to_ambiguous_union_type(self): + """HandleString default branch: str into UNION with duplicate VARCHAR members.""" + conn = duckdb.connect() + conn.create_function( + "return_str", + lambda: "hello", + return_type=duckdb.union_type({"u1": VARCHAR, "u2": BIGINT, "u3": VARCHAR}), + ) + result = conn.sql("SELECT return_str()").fetchone()[0] + assert result == "hello" + class TestIssue171DictKeyCaseSensitivity: """Dict keys differing only by case must preserve their individual values.""" From 6262e0b539b326a5e816866de1f019f2e90e7431 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 19 Feb 2026 12:25:52 +0100 Subject: [PATCH 3/4] Fallback to varchar if int is too large --- src/duckdb_py/native/python_conversion.cpp | 39 ++++++++++++++-------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/duckdb_py/native/python_conversion.cpp b/src/duckdb_py/native/python_conversion.cpp index 8ce4a5a2..724cab09 100644 --- a/src/duckdb_py/native/python_conversion.cpp +++ b/src/duckdb_py/native/python_conversion.cpp @@ -270,17 +270,17 @@ bool TryTransformPythonIntegerToDouble(Value &res, py::handle ele) { return true; } -// Converts a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value by decomposing it into upper -// and lower 64-bit components. Tries HUGEINT first; falls back to UHUGEINT for large positive values. -static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &target_type) { +// Tries to convert a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value +// by decomposing it into upper and lower 64-bit components. Tries HUGEINT first; falls back to +// UHUGEINT for large positive values. Returns false if the value doesn't fit in 128 bits. +static bool TryTransformPythonLongToHugeInt(py::handle ele, const LogicalType &target_type, Value &result) { auto ptr = ele.ptr(); // Extract lower 64 bits (two's complement, works for negative values too) uint64_t lower = PyLong_AsUnsignedLongLongMask(ptr); if (lower == static_cast(-1) && PyErr_Occurred()) { PyErr_Clear(); - throw InvalidInputException("Failed to convert Python integer to 128-bit integer: %s", - std::string(py::str(ele))); + return false; } // Extract upper bits by right-shifting by 64 @@ -293,9 +293,11 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar if (overflow == 0 && !(upper_signed == -1 && PyErr_Occurred())) { auto val = Value::HUGEINT(hugeint_t {upper_signed, lower}); if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::HUGEINT) { - return val; + result = val; + } else { + result = CastToTarget(std::move(val), target_type); } - return CastToTarget(std::move(val), target_type); + return true; } PyErr_Clear(); @@ -303,14 +305,25 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar uint64_t upper_unsigned = PyLong_AsUnsignedLongLong(upper_obj.ptr()); if (PyErr_Occurred()) { PyErr_Clear(); - throw InvalidInputException("Python integer too large for 128-bit integer type: %s", std::string(py::str(ele))); + return false; } auto val = Value::UHUGEINT(uhugeint_t {upper_unsigned, lower}); if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::UHUGEINT) { - return val; + result = val; + } else { + result = CastToTarget(std::move(val), target_type); + } + return true; +} + +// Throwing wrapper for contexts that require a result (e.g. prepared statement parameters). +static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &target_type) { + Value result; + if (!TryTransformPythonLongToHugeInt(ele, target_type, result)) { + throw InvalidInputException("Python integer too large for 128-bit integer type: %s", std::string(py::str(ele))); } - return CastToTarget(std::move(val), target_type); + return result; } void TransformPythonUnsigned(uint64_t value, Value &res) { @@ -347,8 +360,7 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64", std::string(pybind11::str(ele)))); } - res = TransformPythonLongToHugeInt(ele, target_type); - return true; + return TryTransformPythonLongToHugeInt(ele, target_type, res); } else if (overflow == 1) { if (target_type.InternalType() == PhysicalType::INT64) { throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64", @@ -357,8 +369,7 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta uint64_t unsigned_value = PyLong_AsUnsignedLongLong(ptr); if (PyErr_Occurred()) { PyErr_Clear(); - res = TransformPythonLongToHugeInt(ele, target_type); - return true; + return TryTransformPythonLongToHugeInt(ele, target_type, res); } TransformPythonUnsigned(unsigned_value, res); PyErr_Clear(); From 16621e84f71d7f4cd08177a496c72e8124a6f305 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 19 Feb 2026 14:01:11 +0100 Subject: [PATCH 4/4] Simplify related code paths --- .../duckdb_python/python_conversion.hpp | 2 +- src/duckdb_py/native/python_conversion.cpp | 183 ++++-------------- src/duckdb_py/pandas/analyzer.cpp | 7 +- 3 files changed, 40 insertions(+), 152 deletions(-) diff --git a/src/duckdb_py/include/duckdb_python/python_conversion.hpp b/src/duckdb_py/include/duckdb_python/python_conversion.hpp index d3bfadba..bad518ef 100644 --- a/src/duckdb_py/include/duckdb_python/python_conversion.hpp +++ b/src/duckdb_py/include/duckdb_python/python_conversion.hpp @@ -45,7 +45,7 @@ enum class PythonObjectType { PythonObjectType GetPythonObjectType(py::handle &ele); -bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN); +LogicalType SniffPythonIntegerType(py::handle ele); bool DictionaryHasMapFormat(const PyDictionary &dict); void TransformPythonObject(py::handle ele, Vector &vector, idx_t result_offset, bool nan_as_null = true); Value TransformPythonValue(py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN, diff --git a/src/duckdb_py/native/python_conversion.cpp b/src/duckdb_py/native/python_conversion.cpp index 724cab09..a56ea73f 100644 --- a/src/duckdb_py/native/python_conversion.cpp +++ b/src/duckdb_py/native/python_conversion.cpp @@ -260,16 +260,6 @@ Value TransformTupleToStruct(py::handle ele, const LogicalType &target_type = Lo return result; } -bool TryTransformPythonIntegerToDouble(Value &res, py::handle ele) { - double number = PyLong_AsDouble(ele.ptr()); - if (number == -1.0 && PyErr_Occurred()) { - PyErr_Clear(); - return false; - } - res = Value::DOUBLE(number); - return true; -} - // Tries to convert a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value // by decomposing it into upper and lower 64-bit components. Tries HUGEINT first; falls back to // UHUGEINT for large positive values. Returns false if the value doesn't fit in 128 bits. @@ -326,135 +316,52 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar return result; } -void TransformPythonUnsigned(uint64_t value, Value &res) { - if (value > (uint64_t)std::numeric_limits::max()) { - res = Value::UBIGINT(value); - } else if (value > (int64_t)std::numeric_limits::max()) { - res = Value::UINTEGER(value); - } else if (value > (int64_t)std::numeric_limits::max()) { - res = Value::USMALLINT(value); - } else { - res = Value::UTINYINT(value); - } -} - -bool TrySniffPythonNumeric(Value &res, int64_t value) { +// Picks the tightest DuckDB integer type (>=INT32) for an int64 value when no target type is specified. +static Value SniffIntegerValue(int64_t value) { if (value < (int64_t)std::numeric_limits::min() || value > (int64_t)std::numeric_limits::max()) { - res = Value::BIGINT(value); - } else { - // To match default duckdb behavior, numeric values without a specified type should not become a smaller type - // than INT32 - res = Value::INTEGER(value); + return Value::BIGINT(value); } - return true; + return Value::INTEGER(value); } -bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type) { +// Sniffs the tightest DuckDB integer type for a Python integer. +// Progressively widens: int64 → uint64 → hugeint → uhugeint. +// Returns SQLNULL if the value doesn't fit in any DuckDB integer type (> 128-bit). +LogicalType SniffPythonIntegerType(py::handle ele) { auto ptr = ele.ptr(); + // Step 1: Try int64 int overflow; - int64_t value = PyLong_AsLongLongAndOverflow(ptr, &overflow); - if (overflow == -1) { - PyErr_Clear(); - if (target_type.id() == LogicalTypeId::BIGINT) { - throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64", - std::string(pybind11::str(ele)))); - } - return TryTransformPythonLongToHugeInt(ele, target_type, res); - } else if (overflow == 1) { - if (target_type.InternalType() == PhysicalType::INT64) { - throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64", - std::string(pybind11::str(ele)))); - } - uint64_t unsigned_value = PyLong_AsUnsignedLongLong(ptr); - if (PyErr_Occurred()) { - PyErr_Clear(); - return TryTransformPythonLongToHugeInt(ele, target_type, res); - } - TransformPythonUnsigned(unsigned_value, res); - PyErr_Clear(); - return true; - } - if (value == -1 && PyErr_Occurred()) { - return false; - } + const int64_t value = PyLong_AsLongLongAndOverflow(ptr, &overflow); - // The value is int64_t or smaller - switch (target_type.id()) { - case LogicalTypeId::UNKNOWN: - return TrySniffPythonNumeric(res, value); - case LogicalTypeId::HUGEINT: { - res = Value::HUGEINT(value); - return true; - } - case LogicalTypeId::UHUGEINT: { - if (value < 0) { - return false; - } - res = Value::UHUGEINT(value); - return true; - } - case LogicalTypeId::BIGINT: { - res = Value::BIGINT(value); - return true; - } - case LogicalTypeId::INTEGER: { - if (value < NumericLimits::Minimum() || value > NumericLimits::Maximum()) { - return false; - } - res = Value::INTEGER(value); - return true; - } - case LogicalTypeId::SMALLINT: { - if (value < NumericLimits::Minimum() || value > NumericLimits::Maximum()) { - return false; - } - res = Value::SMALLINT(value); - return true; - } - case LogicalTypeId::TINYINT: { - if (value < NumericLimits::Minimum() || value > NumericLimits::Maximum()) { - return false; - } - res = Value::TINYINT(value); - return true; - } - case LogicalTypeId::UBIGINT: { - if (value < 0) { - return false; - } - res = Value::UBIGINT(value); - return true; - } - case LogicalTypeId::UINTEGER: { - if (value < 0 || value > (int64_t)NumericLimits::Maximum()) { - return false; + if (overflow == 0) { + if (value == -1 && PyErr_Occurred()) { + PyErr_Clear(); + return LogicalType::SQLNULL; } - res = Value::UINTEGER(value); - return true; - } - case LogicalTypeId::USMALLINT: { - if (value < 0 || value > (int64_t)NumericLimits::Maximum()) { - return false; + if (value < static_cast(std::numeric_limits::min()) || + value > static_cast(std::numeric_limits::max())) { + return LogicalType::BIGINT; } - res = Value::USMALLINT(value); - return true; + return LogicalType::INTEGER; } - case LogicalTypeId::UTINYINT: { - if (value < 0 || value > (int64_t)NumericLimits::Maximum()) { - return false; - } - res = Value::UTINYINT(value); - return true; - } - default: { - if (!TrySniffPythonNumeric(res, value)) { - return false; + PyErr_Clear(); + + // Step 2: For positive overflow, try uint64 + if (overflow == 1) { + (void)PyLong_AsUnsignedLongLong(ptr); + if (!PyErr_Occurred()) { + return LogicalType::UBIGINT; } - res = CastToTarget(std::move(res), target_type); - return true; + PyErr_Clear(); } + + // Step 3: Try 128-bit (hugeint/uhugeint) + Value res; + if (!TryTransformPythonLongToHugeInt(ele, LogicalType::UNKNOWN, res)) { + return LogicalType::SQLNULL; } + return res.type(); } Value TransformDictionary(const PyDictionary &dict) { @@ -561,10 +468,6 @@ struct PythonValueConversion { break; } } - static void HandleLongAsDouble(Value &result, const LogicalType &target_type, double val) { - auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type; - result = CastToTarget(Value::DOUBLE(val), cast_as); - } static void HandleLongOverflow(Value &result, const LogicalType &target_type, py::handle ele) { result = TransformPythonLongToHugeInt(ele, target_type); } @@ -573,21 +476,10 @@ struct PythonValueConversion { result = CastToTarget(Value::UBIGINT(val), cast_as); } static void HandleBigint(Value &res, const LogicalType &target_type, int64_t value) { - switch (target_type.id()) { - case LogicalTypeId::UNKNOWN: { - if (value < (int64_t)std::numeric_limits::min() || - value > (int64_t)std::numeric_limits::max()) { - res = Value::BIGINT(value); - } else { - // To match default duckdb behavior, numeric values without a specified type should not become a smaller - // type than INT32 - res = Value::INTEGER(value); - } - break; - } - default: - res = CastToTarget(Value::BIGINT(value), target_type); - break; + if (target_type.id() == LogicalTypeId::UNKNOWN) { + res = SniffIntegerValue(value); + } else { + res = CastToTarget(SniffIntegerValue(value), target_type); } } @@ -737,9 +629,6 @@ struct PythonVectorConversion { break; } } - static void HandleLongAsDouble(Vector &result, const idx_t &result_offset, double val) { - FallbackValueConversion(result, result_offset, Value::DOUBLE(val)); - } static void HandleLongOverflow(Vector &result, const idx_t &result_offset, py::handle ele) { Value result_val = TransformPythonLongToHugeInt(ele, result.GetType()); FallbackValueConversion(result, result_offset, std::move(result_val)); diff --git a/src/duckdb_py/pandas/analyzer.cpp b/src/duckdb_py/pandas/analyzer.cpp index ee264524..a91bff51 100644 --- a/src/duckdb_py/pandas/analyzer.cpp +++ b/src/duckdb_py/pandas/analyzer.cpp @@ -363,12 +363,11 @@ LogicalType PandasAnalyzer::GetItemType(py::object ele, bool &can_convert) { case PythonObjectType::Bool: return LogicalType::BOOLEAN; case PythonObjectType::Integer: { - Value integer; - if (!TryTransformPythonNumeric(integer, ele)) { + auto type = SniffPythonIntegerType(ele); + if (type.id() == LogicalTypeId::SQLNULL) { can_convert = false; - return LogicalType::SQLNULL; } - return integer.type(); + return type; } case PythonObjectType::Float: if (std::isnan(PyFloat_AsDouble(ele.ptr()))) {