@@ -23,6 +23,7 @@ import Data.DataFrame.Internal.Column ( Column(..), toColumn', toColumn, columnL
2323import Data.DataFrame.Internal.DataFrame (DataFrame (.. ), getColumn , null , empty )
2424import Data.DataFrame.Internal.Parsing (isNullish )
2525import Data.DataFrame.Internal.Types (Columnable )
26+ import Data.Either
2627import Data.Function (on , (&) )
2728import Data.Maybe
2829import Data.Type.Equality (type (:~: )(Refl ), TestEquality (.. ))
@@ -152,27 +153,30 @@ columnInfo :: DataFrame -> DataFrame
152153columnInfo df = empty & insertColumn' " Column Name" (Just $ toColumn (map fst' triples))
153154 & insertColumn' " # Non-null Values" (Just $ toColumn (map snd' triples))
154155 & insertColumn' " # Null Values" (Just $ toColumn (map thd' triples))
155- & insertColumn' " Type" (Just $ toColumn (map fth' triples))
156+ & insertColumn' " # Partially parsed" (Just $ toColumn (map fth' triples))
157+ & insertColumn' " Type" (Just $ toColumn (map ffth' triples))
156158 where
157- triples = L. sortBy (compare `on` snd') (V. ifoldl' go [] (columns df)) :: [(T. Text , Int , Int , T. Text )]
159+ triples = L. sortBy (compare `on` snd') (V. ifoldl' go [] (columns df)) :: [(T. Text , Int , Int , Int , T. Text )]
158160 indexMap = M. fromList (map (\ (a, b) -> (b, a)) $ M. toList (columnIndices df))
159161 columnName i = indexMap M. ! i
160162 go acc i Nothing = acc
161163 go acc i (Just col@ (BoxedColumn (c :: V. Vector a ))) = let
162164 cname = columnName i
163165 countNulls = nulls col
166+ countPartial = partiallyParsed col
164167 columnType = T. pack $ show $ typeRep @ a
165- in (cname, VG. length c - countNulls, countNulls, columnType) : acc
168+ in (cname, columnLength col - countNulls, countNulls, countPartial , columnType) : acc
166169 go acc i (Just col@ (UnboxedColumn c)) = let
167170 cname = columnName i
168171 columnType = T. pack $ columnTypeString col
169172 -- Unboxed columns cannot have nulls since Maybe
170173 -- is not an instance of Unbox a
171- in (cname, VG. length c, 0 , columnType) : acc
172- fst' (! x, _, _, _) = x
173- snd' (_, ! x, _, _) = x
174- thd' (_, _, ! x, _) = x
175- fth' (_, _, _, ! x) = x
174+ in (cname, columnLength col, 0 , 0 , columnType) : acc
175+ fst' (! x, _, _, _, _) = x
176+ snd' (_, ! x, _, _, _) = x
177+ thd' (_, _, ! x, _, _) = x
178+ fth' (_, _, _, ! x, _) = x
179+ ffth' (_, _, _, _, ! x) = x
176180
177181
178182nulls :: Column -> Int
@@ -187,6 +191,15 @@ nulls (BoxedColumn (xs :: V.Vector a)) = case testEquality (typeRep @a) (typeRep
187191 _ -> 0
188192nulls _ = 0
189193
194+ partiallyParsed :: Column -> Int
195+ partiallyParsed (BoxedColumn (xs :: V. Vector a )) =
196+ case typeRep @ a of
197+ App (App tycon t1) t2 -> case eqTypeRep tycon (typeRep @ Either ) of
198+ Just HRefl -> VG. length $ VG. filter isLeft xs
199+ Nothing -> 0
200+ _ -> 0
201+ partiallyParsed _ = 0
202+
190203fromList :: [(T. Text , Column )] -> DataFrame
191204fromList = L. foldl' (\ df (name, column) -> insertColumn' name (Just column) df) empty
192205
0 commit comments