From 5482837846da1560d8fde1beb059f73b7e77fefb Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Thu, 5 Feb 2026 11:28:50 +0100 Subject: [PATCH 1/2] fix: Use right file prefixes on windows --- dataframely/_storage/__init__.py | 2 ++ dataframely/_storage/_fsspec.py | 14 ++++++++++++++ dataframely/_storage/parquet.py | 11 ++--------- dataframely/testing/storage.py | 17 ++--------------- 4 files changed, 20 insertions(+), 24 deletions(-) create mode 100644 dataframely/_storage/_fsspec.py diff --git a/dataframely/_storage/__init__.py b/dataframely/_storage/__init__.py index 780ec79..297aa02 100644 --- a/dataframely/_storage/__init__.py +++ b/dataframely/_storage/__init__.py @@ -2,7 +2,9 @@ # SPDX-License-Identifier: BSD-3-Clause from ._base import StorageBackend +from ._fsspec import get_file_prefix __all__ = [ "StorageBackend", + "get_file_prefix" ] diff --git a/dataframely/_storage/_fsspec.py b/dataframely/_storage/_fsspec.py new file mode 100644 index 0000000..d3c872e --- /dev/null +++ b/dataframely/_storage/_fsspec.py @@ -0,0 +1,14 @@ +from fsspec import AbstractFileSystem + +def get_file_prefix(fs: AbstractFileSystem) -> str: + match fs.protocol: + case "file": + return "" + case str(): + return f"{fs.protocol}://" + case ["file", *_]: + return "" + case [proto, *_]: + return f"{proto}://" + case _: + raise ValueError(f"Unexpected fs.protocol: {fs.protocol}") \ No newline at end of file diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py index fc8ba46..6f2256b 100644 --- a/dataframely/_storage/parquet.py +++ b/dataframely/_storage/parquet.py @@ -4,6 +4,7 @@ from collections.abc import Iterable from typing import Any +from dataframely._storage import get_file_prefix import polars as pl from fsspec import AbstractFileSystem, url_to_fs @@ -155,15 +156,7 @@ def _collection_from_parquet( if is_file: collection_types.append(_read_serialized_collection(source_path)) else: - prefix = ( - "" - if fs.protocol == "file" - else ( - f"{fs.protocol}://" - if isinstance(fs.protocol, str) - else f"{fs.protocol[0]}://" - ) - ) + prefix = get_file_prefix(fs) for file in fs.glob(fs.sep.join([source_path, "**", "*.parquet"])): collection_types.append( _read_serialized_collection(f"{prefix}{file}") diff --git a/dataframely/testing/storage.py b/dataframely/testing/storage.py index aa571a5..9981513 100644 --- a/dataframely/testing/storage.py +++ b/dataframely/testing/storage.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from typing import Any, Literal, TypeVar, overload +from dataframely._storage import get_file_prefix import polars as pl from fsspec import AbstractFileSystem, url_to_fs @@ -190,21 +191,7 @@ def set_metadata(self, path: str, metadata: dict[str, Any]) -> None: metadata.""" def _prefix_path(self, path: str, fs: AbstractFileSystem) -> str: - return f"{self._get_prefix(fs)}{path}" - - @staticmethod - def _get_prefix(fs: AbstractFileSystem) -> str: - match fs.protocol: - case "file": - return "" - case str(): - return f"{fs.protocol}://" - case ["file", *_]: - return "" - case [proto, *_]: - return f"{proto}://" - case _: - raise ValueError(f"Unexpected fs.protocol: {fs.protocol}") + return f"{get_file_prefix(fs)}{path}" class ParquetCollectionStorageTester(CollectionStorageTester): From ced96e52ae0b362bca588309452ce4496de2a24e Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Thu, 5 Feb 2026 11:37:27 +0100 Subject: [PATCH 2/2] precommit --- dataframely/_storage/__init__.py | 5 +---- dataframely/_storage/_fsspec.py | 6 +++++- dataframely/_storage/parquet.py | 3 ++- dataframely/testing/storage.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dataframely/_storage/__init__.py b/dataframely/_storage/__init__.py index 297aa02..25528dd 100644 --- a/dataframely/_storage/__init__.py +++ b/dataframely/_storage/__init__.py @@ -4,7 +4,4 @@ from ._base import StorageBackend from ._fsspec import get_file_prefix -__all__ = [ - "StorageBackend", - "get_file_prefix" -] +__all__ = ["StorageBackend", "get_file_prefix"] diff --git a/dataframely/_storage/_fsspec.py b/dataframely/_storage/_fsspec.py index d3c872e..24ad780 100644 --- a/dataframely/_storage/_fsspec.py +++ b/dataframely/_storage/_fsspec.py @@ -1,5 +1,9 @@ +# Copyright (c) QuantCo 2025-2026 +# SPDX-License-Identifier: BSD-3-Clause + from fsspec import AbstractFileSystem + def get_file_prefix(fs: AbstractFileSystem) -> str: match fs.protocol: case "file": @@ -11,4 +15,4 @@ def get_file_prefix(fs: AbstractFileSystem) -> str: case [proto, *_]: return f"{proto}://" case _: - raise ValueError(f"Unexpected fs.protocol: {fs.protocol}") \ No newline at end of file + raise ValueError(f"Unexpected fs.protocol: {fs.protocol}") diff --git a/dataframely/_storage/parquet.py b/dataframely/_storage/parquet.py index 6f2256b..bc870fb 100644 --- a/dataframely/_storage/parquet.py +++ b/dataframely/_storage/parquet.py @@ -4,10 +4,11 @@ from collections.abc import Iterable from typing import Any -from dataframely._storage import get_file_prefix import polars as pl from fsspec import AbstractFileSystem, url_to_fs +from dataframely._storage import get_file_prefix + from ._base import ( SerializedCollection, SerializedRules, diff --git a/dataframely/testing/storage.py b/dataframely/testing/storage.py index 9981513..e09c247 100644 --- a/dataframely/testing/storage.py +++ b/dataframely/testing/storage.py @@ -4,13 +4,13 @@ from abc import ABC, abstractmethod from typing import Any, Literal, TypeVar, overload -from dataframely._storage import get_file_prefix import polars as pl from fsspec import AbstractFileSystem, url_to_fs import dataframely as dy from dataframely import FailureInfo, Validation from dataframely._compat import deltalake +from dataframely._storage import get_file_prefix from dataframely._storage.delta import _to_delta_table # ----------------------------------- Schema -------------------------------------------