Skip to content

Commit a9986b6

Browse files
committed
Introduce row for partially parsed in columnInfo
1 parent 4686a33 commit a9986b6

File tree

1 file changed

+21
-8
lines changed
  • src/Data/DataFrame/Operations

1 file changed

+21
-8
lines changed

src/Data/DataFrame/Operations/Core.hs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import Data.DataFrame.Internal.Column ( Column(..), toColumn', toColumn, columnL
2323
import Data.DataFrame.Internal.DataFrame (DataFrame(..), getColumn, null, empty)
2424
import Data.DataFrame.Internal.Parsing (isNullish)
2525
import Data.DataFrame.Internal.Types (Columnable)
26+
import Data.Either
2627
import Data.Function (on, (&))
2728
import Data.Maybe
2829
import Data.Type.Equality (type (:~:)(Refl), TestEquality(..))
@@ -152,27 +153,30 @@ columnInfo :: DataFrame -> DataFrame
152153
columnInfo df = empty & insertColumn' "Column Name" (Just $ toColumn (map fst' triples))
153154
& insertColumn' "# Non-null Values" (Just $ toColumn (map snd' triples))
154155
& insertColumn' "# Null Values" (Just $ toColumn (map thd' triples))
155-
& insertColumn' "Type" (Just $ toColumn (map fth' triples))
156+
& insertColumn' "# Partially parsed" (Just $ toColumn (map fth' triples))
157+
& insertColumn' "Type" (Just $ toColumn (map ffth' triples))
156158
where
157-
triples = L.sortBy (compare `on` snd') (V.ifoldl' go [] (columns df)) :: [(T.Text, Int, Int, T.Text)]
159+
triples = L.sortBy (compare `on` snd') (V.ifoldl' go [] (columns df)) :: [(T.Text, Int, Int, Int, T.Text)]
158160
indexMap = M.fromList (map (\(a, b) -> (b, a)) $ M.toList (columnIndices df))
159161
columnName i = indexMap M.! i
160162
go acc i Nothing = acc
161163
go acc i (Just col@(BoxedColumn (c :: V.Vector a))) = let
162164
cname = columnName i
163165
countNulls = nulls col
166+
countPartial = partiallyParsed col
164167
columnType = T.pack $ show $ typeRep @a
165-
in (cname, VG.length c - countNulls, countNulls, columnType) : acc
168+
in (cname, columnLength col - countNulls, countNulls, countPartial, columnType) : acc
166169
go acc i (Just col@(UnboxedColumn c)) = let
167170
cname = columnName i
168171
columnType = T.pack $ columnTypeString col
169172
-- Unboxed columns cannot have nulls since Maybe
170173
-- is not an instance of Unbox a
171-
in (cname, VG.length c, 0, columnType) : acc
172-
fst' (!x, _, _, _) = x
173-
snd' (_, !x, _, _) = x
174-
thd' (_, _, !x, _) = x
175-
fth' (_, _, _, !x) = x
174+
in (cname, columnLength col, 0, 0, columnType) : acc
175+
fst' (!x, _, _, _, _) = x
176+
snd' (_, !x, _, _, _) = x
177+
thd' (_, _, !x, _, _) = x
178+
fth' (_, _, _, !x, _) = x
179+
ffth' (_, _, _, _, !x) = x
176180

177181

178182
nulls :: Column -> Int
@@ -187,6 +191,15 @@ nulls (BoxedColumn (xs :: V.Vector a)) = case testEquality (typeRep @a) (typeRep
187191
_ -> 0
188192
nulls _ = 0
189193

194+
partiallyParsed :: Column -> Int
195+
partiallyParsed (BoxedColumn (xs :: V.Vector a)) =
196+
case typeRep @a of
197+
App (App tycon t1) t2 -> case eqTypeRep tycon (typeRep @Either) of
198+
Just HRefl -> VG.length $ VG.filter isLeft xs
199+
Nothing -> 0
200+
_ -> 0
201+
partiallyParsed _ = 0
202+
190203
fromList :: [(T.Text, Column)] -> DataFrame
191204
fromList = L.foldl' (\df (name, column) -> insertColumn' name (Just column) df) empty
192205

0 commit comments

Comments
 (0)