From f241957e8f1cf8cd535c9ccfb8f8348a769a1a41 Mon Sep 17 00:00:00 2001
From: Michael Coady <mcoady@gmail.com>
Date: Sat, 14 Feb 2026 22:43:10 +0000
Subject: [PATCH 1/5] fix out of bounds exception when calling `aggregate` and
 `distinct` on a dataframe with no rows; split out `decodeSeparated` from
 `readSeparated`

---
 src/DataFrame/IO/CSV.hs                 |  8 +++--
 src/DataFrame/Operations/Aggregation.hs | 43 +++++++++++++++----------
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/DataFrame/IO/CSV.hs b/src/DataFrame/IO/CSV.hs
index 23cca806..c4bcbd50 100644
--- a/src/DataFrame/IO/CSV.hs
+++ b/src/DataFrame/IO/CSV.hs
@@ -237,10 +237,14 @@ ghci> D.readSeparated (D.defaultReadOptions { columnSeparator = ';' }) ".\/data\
 @
 -}
 readSeparated :: ReadOptions -> FilePath -> IO DataFrame
-readSeparated !opts !path = do
-    let sep = columnSeparator opts
+readSeparated opts !path = do
     let stripUtf8Bom bs = fromMaybe bs (BL.stripPrefix "\xEF\xBB\xBF" bs)
     csvData <- stripUtf8Bom <$> BL.readFile path
+    decodeSeparated opts csvData
+
+decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame
+decodeSeparated !opts csvData = do
+    let sep = columnSeparator opts
     let decodeOpts = Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)}
     let stream = CsvStream.decodeWith decodeOpts Csv.NoHeader csvData
 
diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs
index 68e32161..37a68500 100644
--- a/src/DataFrame/Operations/Aggregation.hs
+++ b/src/DataFrame/Operations/Aggregation.hs
@@ -254,23 +254,30 @@ computeRowHashes indices df = runST $ do
 All ungrouped columns will be dropped.
 -}
 aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
-aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) =
-    let
-        df' =
-            selectIndices
-                (VU.map (valueIndices VU.!) (VU.init offsets))
-                (select groupingColumns df)
+aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets)
+    | VU.null valueIndices = 
+        let
+            df' = exclude (M.keys (columnIndices df) L.\\ groupingColumns) df
+            f (name, UExpr (expr :: Expr a)) = insert name ([] :: [a])
+        in
+            fold f aggs df'
+    | otherwise =
+        let
+            df' =
+                selectIndices
+                    (VU.map (valueIndices VU.!) (VU.init offsets))
+                    (select groupingColumns df)
 
-        f (name, UExpr (expr :: Expr a)) d =
-            let
-                value = case interpretAggregation @a gdf expr of
-                    Left e -> throw e
-                    Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
-                    Right (Aggregated (TColumn col)) -> col
-             in
-                insertColumn name value d
-     in
-        fold f aggs df'
+            f (name, UExpr (expr :: Expr a)) d =
+                let
+                    value = case interpretAggregation @a gdf expr of
+                        Left e -> throw e
+                        Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
+                        Right (Aggregated (TColumn col)) -> col
+                in
+                    insertColumn name value d
+        in
+            fold f aggs df'
 
 selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
 selectIndices xs df =
@@ -281,6 +288,8 @@ selectIndices xs df =
 
 -- | Filter out all non-unique values in a dataframe.
 distinct :: DataFrame -> DataFrame
-distinct df = selectIndices (VU.map (indices VU.!) (VU.init os)) df
+distinct df
+    | nRows df == 0 = df 
+    | otherwise = selectIndices (VU.map (indices VU.!) (VU.init os)) df
   where
     (Grouped _ _ indices os) = groupBy (columnNames df) df

From f3e18c7741d2cfec0324054aa2333f54aa3ba865 Mon Sep 17 00:00:00 2001
From: Michael Coady <mcoady@gmail.com>
Date: Sat, 14 Feb 2026 23:46:19 +0000
Subject: [PATCH 2/5] doing what ghc-9.12.2 tells me to do...

---
 src/DataFrame/Operations/Aggregation.hs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs
index 37a68500..7fb214c4 100644
--- a/src/DataFrame/Operations/Aggregation.hs
+++ b/src/DataFrame/Operations/Aggregation.hs
@@ -255,10 +255,12 @@ All ungrouped columns will be dropped.
 -}
 aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
 aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets)
-    | VU.null valueIndices = 
+    | VU.null valueIndices =
         let
             df' = exclude (M.keys (columnIndices df) L.\\ groupingColumns) df
-            f (name, UExpr (expr :: Expr a)) = insert name ([] :: [a])
+            
+            f :: NamedExpr -> DataFrame -> DataFrame
+            f (name, UExpr (_ :: Expr a)) = insert name ([] :: [a])
         in
             fold f aggs df'
     | otherwise =
@@ -289,7 +291,7 @@ selectIndices xs df =
 -- | Filter out all non-unique values in a dataframe.
 distinct :: DataFrame -> DataFrame
 distinct df
-    | nRows df == 0 = df 
+    | nRows df == 0 = df
     | otherwise = selectIndices (VU.map (indices VU.!) (VU.init os)) df
   where
     (Grouped _ _ indices os) = groupBy (columnNames df) df

From b6af77444689e1c125315d95b54bf38f4968c13f Mon Sep 17 00:00:00 2001
From: Michael Coady <mcoady@gmail.com>
Date: Sun, 15 Feb 2026 00:04:14 +0000
Subject: [PATCH 3/5] formatting

---
 src/DataFrame/Operations/Aggregation.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs
index 7fb214c4..5259fad5 100644
--- a/src/DataFrame/Operations/Aggregation.hs
+++ b/src/DataFrame/Operations/Aggregation.hs
@@ -258,10 +258,10 @@ aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets)
     | VU.null valueIndices =
         let
             df' = exclude (M.keys (columnIndices df) L.\\ groupingColumns) df
-            
+
             f :: NamedExpr -> DataFrame -> DataFrame
             f (name, UExpr (_ :: Expr a)) = insert name ([] :: [a])
-        in
+         in
             fold f aggs df'
     | otherwise =
         let
@@ -276,9 +276,9 @@ aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets)
                         Left e -> throw e
                         Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
                         Right (Aggregated (TColumn col)) -> col
-                in
+                 in
                     insertColumn name value d
-        in
+         in
             fold f aggs df'
 
 selectIndices :: VU.Vector Int -> DataFrame -> DataFrame

From 634313c27718625b4f69a5f6a753d0495485d9e2 Mon Sep 17 00:00:00 2001
From: Michael Coady <mcoady@gmail.com>
Date: Mon, 16 Feb 2026 10:13:24 +0000
Subject: [PATCH 4/5] simplify handling of `aggregate` on no rows; add test
 case `aggregationOnNoRows`;

---
 src/DataFrame/Operations/Aggregation.hs | 51 +++++++++++--------------
 tests/Operations/Aggregations.hs        | 21 ++++++++++
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs
index 5259fad5..c6381b5a 100644
--- a/src/DataFrame/Operations/Aggregation.hs
+++ b/src/DataFrame/Operations/Aggregation.hs
@@ -53,6 +53,12 @@ groupBy names df
                 (T.pack $ show $ names L.\\ columnNames df)
                 "groupBy"
                 (columnNames df)
+    | nRows df == 0 =
+        Grouped
+            df
+            names
+            VU.empty
+            (VU.fromList [0])
     | otherwise =
         Grouped
             df
@@ -254,32 +260,23 @@ computeRowHashes indices df = runST $ do
 All ungrouped columns will be dropped.
 -}
 aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
-aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets)
-    | VU.null valueIndices =
-        let
-            df' = exclude (M.keys (columnIndices df) L.\\ groupingColumns) df
-
-            f :: NamedExpr -> DataFrame -> DataFrame
-            f (name, UExpr (_ :: Expr a)) = insert name ([] :: [a])
-         in
-            fold f aggs df'
-    | otherwise =
-        let
-            df' =
-                selectIndices
-                    (VU.map (valueIndices VU.!) (VU.init offsets))
-                    (select groupingColumns df)
+aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) =
+    let
+        df' =
+            selectIndices
+                (VU.map (valueIndices VU.!) (VU.init offsets))
+                (select groupingColumns df)
 
-            f (name, UExpr (expr :: Expr a)) d =
-                let
-                    value = case interpretAggregation @a gdf expr of
-                        Left e -> throw e
-                        Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
-                        Right (Aggregated (TColumn col)) -> col
-                 in
-                    insertColumn name value d
-         in
-            fold f aggs df'
+        f (name, UExpr (expr :: Expr a)) d =
+            let
+                value = case interpretAggregation @a gdf expr of
+                    Left e -> throw e
+                    Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
+                    Right (Aggregated (TColumn col)) -> col
+                in
+                insertColumn name value d
+        in
+        fold f aggs df'
 
 selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
 selectIndices xs df =
@@ -290,8 +287,6 @@ selectIndices xs df =
 
 -- | Filter out all non-unique values in a dataframe.
 distinct :: DataFrame -> DataFrame
-distinct df
-    | nRows df == 0 = df
-    | otherwise = selectIndices (VU.map (indices VU.!) (VU.init os)) df
+distinct df = selectIndices (VU.map (indices VU.!) (VU.init os)) df
   where
     (Grouped _ _ indices os) = groupBy (columnNames df) df
diff --git a/tests/Operations/Aggregations.hs b/tests/Operations/Aggregations.hs
index 6e9a2df0..d43b0ae8 100644
--- a/tests/Operations/Aggregations.hs
+++ b/tests/Operations/Aggregations.hs
@@ -133,6 +133,24 @@ reduceAggregationOfUnaggregatedBinaryOp =
             )
         )
 
+aggregationOnNoRows :: Test
+aggregationOnNoRows =
+    TestCase
+        ( assertEqual
+            "Aggregation on DataFrame with no rows"
+            ( D.fromNamedColumns
+                [ ("test1", DI.fromList ([] :: [Int]))
+                , ("sum(test2)", DI.fromList ([] :: [Int]))
+                ]
+            )
+            ( testData
+                & D.drop 12
+                & D.groupBy ["test1"]
+                & D.aggregate
+                    [F.sum (F.col @Int "test2") `as` "sum(test2)"]
+            )
+        )
+
 tests :: [Test]
 tests =
     [ TestLabel "foldAggregation" foldAggregation
@@ -149,4 +167,7 @@ tests =
     , TestLabel
         "reduceAggregationOfUnaggregatedBinaryOp"
         reduceAggregationOfUnaggregatedBinaryOp
+    , TestLabel
+        "aggregationOnNoRows"
+        aggregationOnNoRows
     ]

From 59cc9f9cc831456ba0c52d3140c690852384d028 Mon Sep 17 00:00:00 2001
From: Michael Coady <mcoady@gmail.com>
Date: Mon, 16 Feb 2026 10:20:11 +0000
Subject: [PATCH 5/5] formatting

---
 src/DataFrame/Operations/Aggregation.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs
index c6381b5a..6d9fdb2d 100644
--- a/src/DataFrame/Operations/Aggregation.hs
+++ b/src/DataFrame/Operations/Aggregation.hs
@@ -273,9 +273,9 @@ aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) =
                     Left e -> throw e
                     Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr)
                     Right (Aggregated (TColumn col)) -> col
-                in
+             in
                 insertColumn name value d
-        in
+     in
         fold f aggs df'
 
 selectIndices :: VU.Vector Int -> DataFrame -> DataFrame