From 8cb40d1c54a19a561bbd7bcced4c7ed9f1bfb51a Mon Sep 17 00:00:00 2001 From: Michael Coady Date: Wed, 28 Jan 2026 12:42:57 +0000 Subject: [PATCH 1/4] fix broken `cabal build` in Windows --- dataframe.cabal | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dataframe.cabal b/dataframe.cabal index a47adea6..4c231092 100644 --- a/dataframe.cabal +++ b/dataframe.cabal @@ -157,8 +157,9 @@ executable dataframe directory >= 1.3.0.0 && < 2, filepath >= 1.4 && < 2, process >= 1.6 && < 2, - time >= 1.12 && < 2, - unix >= 2 && < 3 + time >= 1.12 && < 2 + if !os(windows) + build-depends: unix >= 2 && < 3 hs-source-dirs: app default-language: Haskell2010 ghc-options: -rtsopts -threaded -with-rtsopts=-N From 838c46e5a08e6f1dcf0a1547f78597825ced6459 Mon Sep 17 00:00:00 2001 From: Michael Coady Date: Wed, 28 Jan 2026 15:27:56 +0000 Subject: [PATCH 2/4] spin out `decodeSeparated` from `readSeparated` --- src/DataFrame/IO/CSV.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/DataFrame/IO/CSV.hs b/src/DataFrame/IO/CSV.hs index 23cca806..c4bcbd50 100644 --- a/src/DataFrame/IO/CSV.hs +++ b/src/DataFrame/IO/CSV.hs @@ -237,10 +237,14 @@ ghci> D.readSeparated (D.defaultReadOptions { columnSeparator = ';' }) ".\/data\ @ -} readSeparated :: ReadOptions -> FilePath -> IO DataFrame -readSeparated !opts !path = do - let sep = columnSeparator opts +readSeparated opts !path = do let stripUtf8Bom bs = fromMaybe bs (BL.stripPrefix "\xEF\xBB\xBF" bs) csvData <- stripUtf8Bom <$> BL.readFile path + decodeSeparated opts csvData + +decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame +decodeSeparated !opts csvData = do + let sep = columnSeparator opts let decodeOpts = Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)} let stream = CsvStream.decodeWith decodeOpts Csv.NoHeader csvData From dbfcc85c9247d157dcfaf24966cc29ac65314123 Mon Sep 17 00:00:00 2001 From: Michael Coady Date: Sat, 14 Feb 2026 20:33:52 +0000 Subject: [PATCH 3/4] fix: index out of bounds when calling `aggregate` and `distinct` on a dataframe with no rows --- src/DataFrame/Operations/Aggregation.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs index 650d2159..34a53529 100644 --- a/src/DataFrame/Operations/Aggregation.hs +++ b/src/DataFrame/Operations/Aggregation.hs @@ -254,7 +254,9 @@ computeRowHashes indices df = runST $ do All ungrouped columns will be dropped. -} aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame -aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) = +aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) + | VU.null valueIndices = df + | otherwise = let df' = selectIndices @@ -281,6 +283,8 @@ selectIndices xs df = -- | Filter out all non-unique values in a dataframe. distinct :: DataFrame -> DataFrame -distinct df = selectIndices (VU.map (indices VU.!) (VU.init os)) df +distinct df + | nRows df == 0 = df + | otherwise = selectIndices (VU.map (indices VU.!) (VU.init os)) df where (Grouped _ _ indices os) = groupBy (columnNames df) df From 9ba86e2fbadf0e7e8db4153201ece63d789ce985 Mon Sep 17 00:00:00 2001 From: Michael Coady Date: Sat, 14 Feb 2026 21:18:52 +0000 Subject: [PATCH 4/4] fix: formatting --- src/DataFrame/Operations/Aggregation.hs | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs index 34a53529..f510a193 100644 --- a/src/DataFrame/Operations/Aggregation.hs +++ b/src/DataFrame/Operations/Aggregation.hs @@ -257,22 +257,22 @@ aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) | VU.null valueIndices = df | otherwise = - let - df' = - selectIndices - (VU.map (valueIndices VU.!) (VU.init offsets)) - (select groupingColumns df) + let + df' = + selectIndices + (VU.map (valueIndices VU.!) (VU.init offsets)) + (select groupingColumns df) - f (name, Wrap (expr :: Expr a)) d = - let - value = case interpretAggregation @a gdf expr of - Left e -> throw e - Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr) - Right (Aggregated (TColumn col)) -> col - in - insertColumn name value d - in - fold f aggs df' + f (name, Wrap (expr :: Expr a)) d = + let + value = case interpretAggregation @a gdf expr of + Left e -> throw e + Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr) + Right (Aggregated (TColumn col)) -> col + in + insertColumn name value d + in + fold f aggs df' selectIndices :: VU.Vector Int -> DataFrame -> DataFrame selectIndices xs df =