add docs for format: sql unit testing

dbt-labs · Apr 13, 2024 · 813c322 · 813c322
1 parent 63f080f
commit 813c322
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 15 deletions.
diff --git a/website/docs/docs/build/unit-tests.md b/website/docs/docs/build/unit-tests.md
@@ -117,9 +117,9 @@ unit_tests:
 ```
 </file>
 
-The previous example defines the mock data using the inline `dict` format, but you can also use `csv` either inline or in a separate fixture file. 
+The previous example defines the mock data using the inline `dict` format, but you can also use `csv` or `sql` either inline or in a separate fixture file. 
 
-You only have to define the mock data for the columns you care about. This enables you to write succinct and _specific_ unit tests.
+When using the `dict` or `csv` format, you only have to define the mock data for the columns you care about. This enables you to write succinct and _specific_ unit tests.
 
 :::note
 

diff --git a/website/docs/reference/resource-properties/data-formats.md b/website/docs/reference/resource-properties/data-formats.md
@@ -3,12 +3,13 @@ title: "Supported data formats for unit tests"
 sidebar_label: "Data formats"
 ---
 
-Currently, mock data for unit testing in dbt supports two formats:
+Currently, mock data for unit testing in dbt supports three formats:
 
 - `dict` (default): Inline dictionary values.
 - `csv`: Inline CSV values or a CSV file.
+- `sql`: Incine SQL query or a SQL file. Note: For this format you must supply mock data for _all rows_. 
 
-We will support more in the future, so watch our [upgrade guides](/docs/dbt-versions/core-upgrade) and this page for updates.
+## dict
 
 The `dict` data format is the default if no `format` is defined.
 
@@ -28,6 +29,8 @@ unit_tests:
 
 ```
 
+## csv
+
 When using the `csv` format, you can use either an inline CSV string for `rows`:
 
 ```yml
@@ -49,6 +52,40 @@ Or, you can provide the name of a CSV file in the `tests/fixtures` directory (or
 
 ```yml
 
+unit_tests:
+  - name: test_my_model
+    model: my_model
+    given:
+      - input: ref('my_model_a')
+        format: csv
+        fixture: my_model_a_fixture
+
+```
+
+## sql
+
+This format provides more flexbility for the types of data you can unit test, but when using `format: sql` you must supply mock data for _all rows_.
+
+When using the `sql` format, you can use either an inline SQL query for `rows`:
+
+```yml
+
+unit_tests:
+  - name: test_my_model
+    model: my_model
+    given:
+      - input: ref('my_model_a')
+        format: csv
+        rows: |
+          select 1 as id, 'gerda' as name, null as loaded_at union all
+          select 2 as id, 'michelle', null as loaded_at as name
+
+```
+
+Or, you can provide the name of a SQL file in the `tests/fixtures` directory (or the configured `test-paths` location) of your project for `fixture`: 
+
+```yml
+
 unit_tests:
   - name: test_my_model
     model: my_model

diff --git a/website/docs/reference/resource-properties/unit-tests.md b/website/docs/reference/resource-properties/unit-tests.md
@@ -17,6 +17,7 @@ To run only your unit tests, use the command:
 - We currently only support adding unit tests to models in your _current_ project.
 - If your model has multiple versions, by default the unit test will run on *all* versions of your model. Read [unit testing versioned models](#unit-testing-versioned-models) for more information.
 - Unit tests must be defined in a YML file in your `models/` directory.
+- If you want to unit test a model that depends on an ephemeral model, you must use `format: sql` for that input.
 
 <file name='dbt_project.yml'>
 
@@ -33,22 +34,20 @@ unit_tests:
       tags: <string> | [<string>]
     given:
       - input: <ref_or_source_call> # optional for seeds
-        format: dict | csv
-        # if format csv, either define dictionary of rows or name of fixture
-        rows:
-          - {dictionary}
-        fixture: <fixture-name>
+        format: dict | csv | sql
+        # either define rows inline or name of fixture
+        rows: {dictionary} | <string>
+        fixture: <fixture-name> # sql or csv 
       - input: ... # declare additional inputs
     expect:
-      format: dict | csv
-      # if format csv, either define dictionary of rows or name of fixture
-      rows: 
-        - {dictionary}
-      fixture: <fixture-name>
+      format: dict | csv | sql
+      # either define rows inline of rows or name of fixture
+      rows: {dictionary} | <string>
+      fixture: <fixture-name> # sql or csv 
     overrides: # optional: configuration for the dbt execution environment
       macros:
         is_incremental: true | false
-        dbt_utils.current_timestamp: str
+        dbt_utils.current_timestamp: <string>
         # ... any other jinja function from https://docs.getdbt.com/reference/dbt-jinja-functions
         # ... any other context property
       vars: {dictionary}
@@ -109,3 +108,26 @@ unit_tests:
       fixture: valid_email_address_fixture_output
 
 ```
+
+```yml
+
+unit_tests:
+  - name: test_is_valid_email_address # this is the unique name of the test
+    model: dim_customers # name of the model I'm unit testing
+    given: # the mock data for your inputs
+      - input: ref('stg_customers')
+        rows:
+         - {email: [email protected],     email_top_level_domain: example.com}
+         - {email: [email protected],     email_top_level_domain: unknown.com}
+         - {email: badgmail.com,         email_top_level_domain: gmail.com}
+         - {email: missingdot@gmailcom,  email_top_level_domain: gmail.com}
+      - input: ref('top_level_email_domains')
+        format: sql
+        rows: |
+          select 'example.com' as tld union all
+          select 'gmail.com' as tld
+    expect: # the expected output given the inputs above
+      format: sql
+      fixture: valid_email_address_fixture_output
+
+```