From 5552ab2168261c693062baf9ecbd7d057f1828a9 Mon Sep 17 00:00:00 2001 From: Padraig Alton Date: Fri, 9 Feb 2024 17:05:50 +0000 Subject: [PATCH 1/4] Add support for ephemeral models --- CHANGELOG.md | 4 ++++ dbt2looker/models.py | 5 +++-- dbt2looker/parser.py | 10 +++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9943468..676d862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Recent and upcoming changes to dbt2looker +## Unreleased +### Added +- support ephemeral models (#57) + ## 0.11.0 ### Added - support label and hidden fields (#49) diff --git a/dbt2looker/models.py b/dbt2looker/models.py index 3430eaf..9961182 100644 --- a/dbt2looker/models.py +++ b/dbt2looker/models.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Union, Dict, List, Optional +from typing import Any, Union, Dict, List, Optional try: from typing import Literal except ImportError: @@ -144,6 +144,7 @@ class DbtModelColumn(BaseModel): class DbtNode(BaseModel): unique_id: str resource_type: str + config: Dict[str, Any] class Dbt2LookerExploreJoin(BaseModel): @@ -224,4 +225,4 @@ def case_insensitive_column_names(cls, v: Dict[str, DbtCatalogNodeColumn]): class DbtCatalog(BaseModel): - nodes: Dict[str, DbtCatalogNode] \ No newline at end of file + nodes: Dict[str, DbtCatalogNode] diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py index ed310f3..03045c9 100644 --- a/dbt2looker/parser.py +++ b/dbt2looker/parser.py @@ -31,21 +31,21 @@ def tags_match(query_tag: str, model: models.DbtModel) -> bool: def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]: manifest = models.DbtManifest(**raw_manifest) - all_models: List[models.DbtModel] = [ + materialized_models: List[models.DbtModel] = [ node for node in manifest.nodes.values() - if node.resource_type == 'model' + if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral' ] # Empty model files have many missing parameters - for model in all_models: + for model in materialized_models: if not hasattr(model, 'name'): logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id) raise SystemExit('Failed') if tag is None: - return all_models - return [model for model in all_models if tags_match(tag, model)] + return materialized_models + return [model for model in materialized_models if tags_match(tag, model)] def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]): From 8aad35994af892777a60194ec7a88053c25c57bc Mon Sep 17 00:00:00 2001 From: Padraig Alton Date: Fri, 9 Feb 2024 17:14:27 +0000 Subject: [PATCH 2/4] Reorder parse_models logic to ignore non-selected empty model files --- CHANGELOG.md | 3 +++ dbt2looker/parser.py | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 676d862..7066f58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ Recent and upcoming changes to dbt2looker ### Added - support ephemeral models (#57) +### Changed +- only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57) + ## 0.11.0 ### Added - support label and hidden fields (#49) diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py index 03045c9..f49477f 100644 --- a/dbt2looker/parser.py +++ b/dbt2looker/parser.py @@ -37,15 +37,18 @@ def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]: if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral' ] + if tag is None: + selected_models = materialized_models + else: + selected_models = [model for model in materialized_models if tags_match(tag, model)] + # Empty model files have many missing parameters - for model in materialized_models: + for model in selected_models: if not hasattr(model, 'name'): logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id) raise SystemExit('Failed') - if tag is None: - return materialized_models - return [model for model in materialized_models if tags_match(tag, model)] + return selected_models def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]): From 8c5a2c31ca3e5312eba64177b9144868c9215038 Mon Sep 17 00:00:00 2001 From: Padraig Alton Date: Fri, 19 Jan 2024 13:28:11 +0000 Subject: [PATCH 3/4] Added helpful warnings for manifest/catalog discrepancy --- CHANGELOG.md | 2 ++ dbt2looker/parser.py | 45 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7066f58..9740204 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ Recent and upcoming changes to dbt2looker ## Unreleased ### Added - support ephemeral models (#57) +- warnings if there is a discrepancy between manifest and catalog (#5) +- more descriptive error message when a column's data type can't be inferred due to not being in the catalog ### Changed - only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57) diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py index f49477f..9e29d5b 100644 --- a/dbt2looker/parser.py +++ b/dbt2looker/parser.py @@ -57,6 +57,33 @@ def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel logging.debug('Model %s has no typed columns, no dimensions will be generated. %s', model.unique_id, model) +def compare_model_vs_node_columns(model: models.DbtModel, node: models.DbtCatalogNode): + model_columns = set(model.columns.keys()) # as defined in YML config + catalogued_columns = set(node.columns.keys()) # as defined in SQL + + # if the YML and SQL columns exactly match, return early + if not model_columns.symmetric_difference(catalogued_columns): + return + + if model_columns.issubset(catalogued_columns): + for undocumented_column in sorted(catalogued_columns.difference(model_columns)): + logging.warning( + f'Column {model.unique_id}.{undocumented_column} has not been documented in YML, ' + 'but is present in the catalog. You should add it to your YML config, ' + 'or (if it is not required) remove it from the model SQL file, run the model, ' + 'and run `dbt docs generate` again') + # after warning the user, return early + return + + # otherwise, there are columns defined in YML that don't match what's defined in SQL + for missing_column in sorted(model_columns.difference(catalogued_columns)): + logging.warning( + f'Column {model.unique_id}.{missing_column} documented in YML, ' + 'but is not defined in the DBT catalog. Check the model SQL file ' + 'and ensure you have run the model and `dbt docs generate`') + return # final return explicitly included for clarity + + def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] = None): catalog_nodes = parse_catalog_nodes(raw_catalog) dbt_models = parse_models(raw_manifest, tag=tag) @@ -77,6 +104,11 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] logging.warning( f'Model {model.unique_id} not found in catalog. No looker view will be generated. ' f'Check if model has materialized in {adapter_type} at {model.relation_name}') + else: + # we know that the model is included in the catalog - extract it + corresponding_catalog_node = catalog_nodes[model.unique_id] + # issue warnings if the catalog columns (defined via SQL) don't match what's documented in YML + compare_model_vs_node_columns(model, corresponding_catalog_node) # Update dbt models with data types from catalog dbt_typed_models = [ @@ -95,7 +127,18 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] return dbt_typed_models +class ColumnNotInCatalogError(Exception): + def __init__(self, model_id: str, column_name: str): + super().__init__( + f'Column {column_name} not found in catalog for model {model_id}, ' + 'cannot find a data type for Looker. Is the column selected in the model SQL file, ' + 'and have you run the model since adding the column to it?') + + def get_column_type_from_catalog(catalog_nodes: Dict[str, models.DbtCatalogNode], model_id: str, column_name: str): node = catalog_nodes.get(model_id) column = None if node is None else node.columns.get(column_name) - return None if column is None else column.type + if column: + return column.type + # otherwise this will fail later when we try to map the data type to a Looker type + raise ColumnNotInCatalogError(model_id, column_name) From 51dbdfb66ea5760bae07acce0efe7a3698a0276c Mon Sep 17 00:00:00 2001 From: Padraig Alton Date: Mon, 7 Oct 2024 11:40:14 +0100 Subject: [PATCH 4/4] Migrate to pydantic v2 (#97) --- CHANGELOG.md | 3 +++ dbt2looker/models.py | 37 +++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9740204..9ea00b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ Recent and upcoming changes to dbt2looker ### Changed - only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57) +### Fixed +- now supports `pydantic` v2 (#97) + ## 0.11.0 ### Added - support label and hidden fields (#49) diff --git a/dbt2looker/models.py b/dbt2looker/models.py index 9961182..a0d4746 100644 --- a/dbt2looker/models.py +++ b/dbt2looker/models.py @@ -4,13 +4,14 @@ from typing import Literal except ImportError: from typing_extensions import Literal -from pydantic import BaseModel, Field, PydanticValueError, validator +from pydantic import BaseModel, Field, validator # dbt2looker utility types -class UnsupportedDbtAdapterError(PydanticValueError): - code = 'unsupported_dbt_adapter' - msg_template = '{wrong_value} is not a supported dbt adapter' +class UnsupportedDbtAdapterError(ValueError): + def __init__(self, wrong_value: str): + msg = f'{wrong_value} is not a supported dbt adapter' + super().__init__(msg) class SupportedDbtAdapters(str, Enum): @@ -82,12 +83,12 @@ class LookerHiddenType(str, Enum): class Dbt2LookerMeasure(BaseModel): type: LookerMeasureType filters: Optional[List[Dict[str, str]]] = [] - description: Optional[str] - sql: Optional[str] - value_format_name: Optional[LookerValueFormatName] - group_label: Optional[str] - label: Optional[str] - hidden: Optional[LookerHiddenType] + description: Optional[str] = None + sql: Optional[str] = None + value_format_name: Optional[LookerValueFormatName] = None + group_label: Optional[str] = None + label: Optional[str] = None + hidden: Optional[LookerHiddenType] = None @validator('filters') def filters_are_singular_dicts(cls, v: List[Dict[str, str]]): @@ -100,10 +101,10 @@ def filters_are_singular_dicts(cls, v: List[Dict[str, str]]): class Dbt2LookerDimension(BaseModel): enabled: Optional[bool] = True - name: Optional[str] - sql: Optional[str] - description: Optional[str] - value_format_name: Optional[LookerValueFormatName] + name: Optional[str] = None + sql: Optional[str] = None + description: Optional[str] = None + value_format_name: Optional[LookerValueFormatName] = None class Dbt2LookerMeta(BaseModel): @@ -137,7 +138,7 @@ class DbtModelColumnMeta(Dbt2LookerMeta): class DbtModelColumn(BaseModel): name: str description: str - data_type: Optional[str] + data_type: Optional[str] = None meta: DbtModelColumnMeta @@ -201,13 +202,13 @@ class DbtCatalogNodeMetadata(BaseModel): type: str db_schema: str = Field(..., alias='schema') name: str - comment: Optional[str] - owner: Optional[str] + comment: Optional[str] = None + owner: Optional[str] = None class DbtCatalogNodeColumn(BaseModel): type: str - comment: Optional[str] + comment: Optional[str] = None index: int name: str