diff --git a/src/DataFrame/IO/CSV.hs b/src/DataFrame/IO/CSV.hs index 23cca806..c4bcbd50 100644 --- a/src/DataFrame/IO/CSV.hs +++ b/src/DataFrame/IO/CSV.hs @@ -237,10 +237,14 @@ ghci> D.readSeparated (D.defaultReadOptions { columnSeparator = ';' }) ".\/data\ @ -} readSeparated :: ReadOptions -> FilePath -> IO DataFrame -readSeparated !opts !path = do - let sep = columnSeparator opts +readSeparated opts !path = do let stripUtf8Bom bs = fromMaybe bs (BL.stripPrefix "\xEF\xBB\xBF" bs) csvData <- stripUtf8Bom <$> BL.readFile path + decodeSeparated opts csvData + +decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame +decodeSeparated !opts csvData = do + let sep = columnSeparator opts let decodeOpts = Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)} let stream = CsvStream.decodeWith decodeOpts Csv.NoHeader csvData diff --git a/src/DataFrame/Operations/Aggregation.hs b/src/DataFrame/Operations/Aggregation.hs index 650d2159..f510a193 100644 --- a/src/DataFrame/Operations/Aggregation.hs +++ b/src/DataFrame/Operations/Aggregation.hs @@ -254,23 +254,25 @@ computeRowHashes indices df = runST $ do All ungrouped columns will be dropped. -} aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame -aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) = - let - df' = - selectIndices - (VU.map (valueIndices VU.!) (VU.init offsets)) - (select groupingColumns df) +aggregate aggs gdf@(Grouped df groupingColumns valueIndices offsets) + | VU.null valueIndices = df + | otherwise = + let + df' = + selectIndices + (VU.map (valueIndices VU.!) (VU.init offsets)) + (select groupingColumns df) - f (name, Wrap (expr :: Expr a)) d = - let - value = case interpretAggregation @a gdf expr of - Left e -> throw e - Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr) - Right (Aggregated (TColumn col)) -> col - in - insertColumn name value d - in - fold f aggs df' + f (name, Wrap (expr :: Expr a)) d = + let + value = case interpretAggregation @a gdf expr of + Left e -> throw e + Right (UnAggregated _) -> throw $ UnaggregatedException (T.pack $ show expr) + Right (Aggregated (TColumn col)) -> col + in + insertColumn name value d + in + fold f aggs df' selectIndices :: VU.Vector Int -> DataFrame -> DataFrame selectIndices xs df = @@ -281,6 +283,8 @@ selectIndices xs df = -- | Filter out all non-unique values in a dataframe. distinct :: DataFrame -> DataFrame -distinct df = selectIndices (VU.map (indices VU.!) (VU.init os)) df +distinct df + | nRows df == 0 = df + | otherwise = selectIndices (VU.map (indices VU.!) (VU.init os)) df where (Grouped _ _ indices os) = groupBy (columnNames df) df