Skip to content

Commit

Permalink
Merge pull request #93 from ambiata/topic/counting
Browse files Browse the repository at this point in the history
Don't try to infer field form if we haven't seen enough rows
  • Loading branch information
olorin committed Mar 29, 2016
2 parents 497db83 + b7f7605 commit d21b678
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/Warden/Data/TextCounts.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module Warden.Data.TextCounts (
, combineUniqueTextCounts
, emptyUniqueTextCount
, hashText
, renderTextFreeformThreshold
, renderUniqueTextCount
, updateUniqueTextCount
) where
Expand Down Expand Up @@ -56,6 +57,9 @@ newtype TextFreeformThreshold =
unTextFreeformThreshold :: Int
} deriving (Eq, Show)

renderTextFreeformThreshold :: TextFreeformThreshold -> Text
renderTextFreeformThreshold = renderIntegral . unTextFreeformThreshold

data TextCounts =
TextCounts !(V.Vector UniqueTextCount)
| NoTextCounts
Expand Down
8 changes: 8 additions & 0 deletions src/Warden/Error.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import Warden.Data.Field
import Warden.Data.Param
import Warden.Data.Row
import Warden.Data.Schema
import Warden.Data.TextCounts
import Warden.Data.View

data WardenError =
Expand Down Expand Up @@ -149,6 +150,7 @@ data InferenceError =
| NoTextCountError
| NoTextCountForField Int
| CompatibleFieldsGTRowCount RowCount [CompatibleEntries]
| InsufficientRowsForFormInference RowCount TextFreeformThreshold
deriving (Eq, Show)

renderInferenceError :: InferenceError -> Text
Expand Down Expand Up @@ -178,6 +180,12 @@ renderInferenceError = ("inference error: " <>) . render'
, " : "
, T.intercalate ", " (renderCompatibleEntries <$> cs)
]
render' (InsufficientRowsForFormInference rc fft) = T.concat [
"cannot infer form - total row count "
, renderRowCount rc
, " smaller than check freeform threshold "
, renderTextFreeformThreshold fft
]

data ValidationFailure =
ViewMarkerMismatch Text Text Text
Expand Down
6 changes: 6 additions & 0 deletions src/Warden/Inference.hs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,15 @@ textCountSum vms =

inferForms :: NonEmpty ViewMarker -> Either InferenceError TextCountSummary
inferForms vms =
let totalRowCount = totalViewRows vms
fft = vmFFT $ NE.head vms in do
when ((unRowCount totalRowCount) < (fromIntegral $ unTextFreeformThreshold fft)) $
Left $ InsufficientRowsForFormInference totalRowCount fft
case textCountSum vms of
NoTextCounts -> Left NoTextCountError
TextCounts cs -> fmap TextCountSummary $ V.mapM (uncurry summarizeTextCount) $ V.indexed cs
where
vmFFT = checkFreeformThreshold . vmCheckParams . vmMetadata

summarizeTextCount :: Int -> UniqueTextCount -> Either InferenceError FieldForm
summarizeTextCount _ LooksFreeform =
Expand Down
10 changes: 10 additions & 0 deletions test/Test/IO/Warden/Commands/Check/Unit.hs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ prop_check_BadRows =
expected (Left _) = False
expected (Right rs) = elem (RowCheckResult ViewRowCounts (CheckFailed ((RowCheckFailure (HasBadRows (RowCount 1))) :| []))) rs

prop_check_FieldCountObservationMismatch :: Property
prop_check_FieldCountObservationMismatch =
checkUnitTest
(View "test/data/commands/check/schema-field-type")
(commandUnitCheckParams { checkSchemaFile = Just (SchemaFile "test/data/commands/check/schema-field-type.json") } )
expected
where
expected (Left _) = False
expected (Right rs) = elem (RowCheckResult ViewRowCounts (CheckFailed ((SchemaCheckFailure (FieldCountObservationMismatch (FieldCount 2) (FieldCount 1))) :| []))) rs

return []
tests :: IO Bool
tests = $forAllProperties $ quickCheckWithResult (stdArgs { maxSuccess = 1 })
2 changes: 1 addition & 1 deletion test/Test/Warden/Arbitrary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ instance Arbitrary UniqueTextCount where
instance Arbitrary TextCounts where
arbitrary = oneof [
pure NoTextCounts
, (TextCounts <$> arbitrary)
, ((TextCounts . V.fromList . NE.toList) <$> arbitrary)
]

instance Arbitrary SVParseState where
Expand Down
16 changes: 15 additions & 1 deletion test/Test/Warden/Inference.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Test.Warden.Inference where
import Control.Lens (view)

import Data.List (take, repeat)
import Data.List.NonEmpty (NonEmpty)
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Semigroup ((<>))
import qualified Data.Set as S
Expand Down Expand Up @@ -147,6 +147,20 @@ prop_fieldCandidates_boolean fmr = forAll booleanHistogramPair $ \(rc, h) -> cas
Right cands ->
S.member BooleanField cands === True

prop_totalViewRows :: NonEmpty ViewMarker -> Property
prop_totalViewRows vms =
let trs = totalViewRows vms
bads = filter (not . (>=) trs) . NE.toList $ fmap (view totalRows . vmViewCounts . vmMetadata) vms in
bads === []

prop_inferForms_insufficient_rows :: ViewMarker -> Property
prop_inferForms_insufficient_rows vm = forAll (TextFreeformThreshold <$> choose (2, 2000)) $ \fft -> forAll ((RowCount . fromIntegral) <$> choose (0, (unTextFreeformThreshold fft) - 1)) $ \trs ->
-- More lenses maybe?
let vm' = vm { vmMetadata = ((vmMetadata vm) { vmViewCounts = ((vmViewCounts (vmMetadata vm)) { _totalRows = trs })})}
vm'' = vm' { vmMetadata = ((vmMetadata vm') { vmCheckParams = ((vmCheckParams (vmMetadata vm')) { checkFreeformThreshold = fft })})}
vms = pure vm'' in
isLeft (inferForms vms) === True

return []
tests :: IO Bool
tests = $quickCheckAll
17 changes: 17 additions & 0 deletions test/data/commands/check/schema-field-type.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"version": "v1",
"fields": [
{
"field-form": {
"form-type": "freeform"
},
"field-type": "integral-field"
},
{
"field-form": {
"form-type": "freeform"
},
"field-type": "integral-field"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1,2
3,foo
"bar",96

0 comments on commit d21b678

Please sign in to comment.