Skip to content

Commit

Permalink
Optional specification of instance name in CustomSQL analyzer metric. (
Browse files Browse the repository at this point in the history
…awslabs#569)

Co-authored-by: Tyler Mcdaniel <[email protected]>
  • Loading branch information
2 people authored and eycho-am committed Oct 9, 2024
1 parent b474a37 commit 6d8d6c6
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
14 changes: 9 additions & 5 deletions src/main/scala/com/amazon/deequ/analyzers/CustomSql.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ case class CustomSqlState(stateOrError: Either[Double, String]) extends DoubleVa
override def metricValue(): Double = state
}

case class CustomSql(expression: String) extends Analyzer[CustomSqlState, DoubleMetric] {
case class CustomSql(expression: String, disambiguator: String = "*") extends Analyzer[CustomSqlState, DoubleMetric] {
/**
* Compute the state (sufficient statistics) from the data
*
Expand Down Expand Up @@ -76,15 +76,19 @@ case class CustomSql(expression: String) extends Analyzer[CustomSqlState, Double
state match {
// The returned state may
case Some(theState) => theState.stateOrError match {
case Left(value) => DoubleMetric(Entity.Dataset, "CustomSQL", "*", Success(value))
case Right(error) => DoubleMetric(Entity.Dataset, "CustomSQL", "*", Failure(new RuntimeException(error)))
case Left(value) => DoubleMetric(Entity.Dataset, "CustomSQL", disambiguator,
Success(value))
case Right(error) => DoubleMetric(Entity.Dataset, "CustomSQL", disambiguator,
Failure(new RuntimeException(error)))
}
case None =>
DoubleMetric(Entity.Dataset, "CustomSQL", "*", Failure(new RuntimeException("CustomSql Failed To Run")))
DoubleMetric(Entity.Dataset, "CustomSQL", disambiguator,
Failure(new RuntimeException("CustomSql Failed To Run")))
}
}

override private[deequ] def toFailureMetric(failure: Exception) = {
DoubleMetric(Entity.Dataset, "CustomSQL", "*", Failure(new RuntimeException("CustomSql Failed To Run")))
DoubleMetric(Entity.Dataset, "CustomSQL", disambiguator,
Failure(new RuntimeException("CustomSql Failed To Run")))
}
}
19 changes: 18 additions & 1 deletion src/test/scala/com/amazon/deequ/analyzers/CustomSqlTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* use this file except in compliance with the License. A copy of the License
* is located at
*
* http://aws.amazon.com/apache2.0/
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
Expand All @@ -17,6 +17,7 @@ package com.amazon.deequ.analyzers

import com.amazon.deequ.SparkContextSpec
import com.amazon.deequ.metrics.DoubleMetric
import com.amazon.deequ.metrics.Entity
import com.amazon.deequ.utils.FixtureSupport
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
Expand Down Expand Up @@ -84,5 +85,21 @@ class CustomSqlTest extends AnyWordSpec with Matchers with SparkContextSpec with
case Failure(exception) => exception.getMessage should include("foo")
}
}

"apply metric disambiguation string to returned metric" in withSparkSession { session =>
val data = getDfWithStringColumns(session)
data.createOrReplaceTempView("primary")

val disambiguator = "statement1"
val sql = CustomSql("SELECT COUNT(*) FROM primary WHERE `Address Line 2` IS NOT NULL", disambiguator)
val state = sql.computeStateFrom(data)
val metric: DoubleMetric = sql.computeMetricFrom(state)

metric.value.isSuccess shouldBe true
metric.value.get shouldBe 6.0
metric.name shouldBe "CustomSQL"
metric.entity shouldBe Entity.Dataset
metric.instance shouldBe "statement1"
}
}
}

0 comments on commit 6d8d6c6

Please sign in to comment.