Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to Pydantic v2 #98

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

Recent and upcoming changes to dbt2looker

## Unreleased
### Added
- support ephemeral models (#57)
- warnings if there is a discrepancy between manifest and catalog (#5)
- more descriptive error message when a column's data type can't be inferred due to not being in the catalog

### Changed
- only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57)

### Fixed
- now supports `pydantic` v2 (#97)

## 0.11.0
### Added
- support label and hidden fields (#49)
Expand Down
42 changes: 22 additions & 20 deletions dbt2looker/models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from enum import Enum
from typing import Union, Dict, List, Optional
from typing import Any, Union, Dict, List, Optional
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
from pydantic import BaseModel, Field, PydanticValueError, validator
from pydantic import BaseModel, Field, validator


# dbt2looker utility types
class UnsupportedDbtAdapterError(PydanticValueError):
code = 'unsupported_dbt_adapter'
msg_template = '{wrong_value} is not a supported dbt adapter'
class UnsupportedDbtAdapterError(ValueError):
def __init__(self, wrong_value: str):
msg = f'{wrong_value} is not a supported dbt adapter'
super().__init__(msg)


class SupportedDbtAdapters(str, Enum):
Expand Down Expand Up @@ -82,12 +83,12 @@ class LookerHiddenType(str, Enum):
class Dbt2LookerMeasure(BaseModel):
type: LookerMeasureType
filters: Optional[List[Dict[str, str]]] = []
description: Optional[str]
sql: Optional[str]
value_format_name: Optional[LookerValueFormatName]
group_label: Optional[str]
label: Optional[str]
hidden: Optional[LookerHiddenType]
description: Optional[str] = None
sql: Optional[str] = None
value_format_name: Optional[LookerValueFormatName] = None
group_label: Optional[str] = None
label: Optional[str] = None
hidden: Optional[LookerHiddenType] = None

@validator('filters')
def filters_are_singular_dicts(cls, v: List[Dict[str, str]]):
Expand All @@ -100,10 +101,10 @@ def filters_are_singular_dicts(cls, v: List[Dict[str, str]]):

class Dbt2LookerDimension(BaseModel):
enabled: Optional[bool] = True
name: Optional[str]
sql: Optional[str]
description: Optional[str]
value_format_name: Optional[LookerValueFormatName]
name: Optional[str] = None
sql: Optional[str] = None
description: Optional[str] = None
value_format_name: Optional[LookerValueFormatName] = None


class Dbt2LookerMeta(BaseModel):
Expand Down Expand Up @@ -137,13 +138,14 @@ class DbtModelColumnMeta(Dbt2LookerMeta):
class DbtModelColumn(BaseModel):
name: str
description: str
data_type: Optional[str]
data_type: Optional[str] = None
meta: DbtModelColumnMeta


class DbtNode(BaseModel):
unique_id: str
resource_type: str
config: Dict[str, Any]


class Dbt2LookerExploreJoin(BaseModel):
Expand Down Expand Up @@ -200,13 +202,13 @@ class DbtCatalogNodeMetadata(BaseModel):
type: str
db_schema: str = Field(..., alias='schema')
name: str
comment: Optional[str]
owner: Optional[str]
comment: Optional[str] = None
owner: Optional[str] = None


class DbtCatalogNodeColumn(BaseModel):
type: str
comment: Optional[str]
comment: Optional[str] = None
index: int
name: str

Expand All @@ -224,4 +226,4 @@ def case_insensitive_column_names(cls, v: Dict[str, DbtCatalogNodeColumn]):


class DbtCatalog(BaseModel):
nodes: Dict[str, DbtCatalogNode]
nodes: Dict[str, DbtCatalogNode]
60 changes: 53 additions & 7 deletions dbt2looker/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,24 @@ def tags_match(query_tag: str, model: models.DbtModel) -> bool:

def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]:
manifest = models.DbtManifest(**raw_manifest)
all_models: List[models.DbtModel] = [
materialized_models: List[models.DbtModel] = [
node
for node in manifest.nodes.values()
if node.resource_type == 'model'
if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral'
]

if tag is None:
selected_models = materialized_models
else:
selected_models = [model for model in materialized_models if tags_match(tag, model)]

# Empty model files have many missing parameters
for model in all_models:
for model in selected_models:
if not hasattr(model, 'name'):
logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id)
raise SystemExit('Failed')

if tag is None:
return all_models
return [model for model in all_models if tags_match(tag, model)]
return selected_models


def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]):
Expand All @@ -54,6 +57,33 @@ def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel
logging.debug('Model %s has no typed columns, no dimensions will be generated. %s', model.unique_id, model)


def compare_model_vs_node_columns(model: models.DbtModel, node: models.DbtCatalogNode):
model_columns = set(model.columns.keys()) # as defined in YML config
catalogued_columns = set(node.columns.keys()) # as defined in SQL

# if the YML and SQL columns exactly match, return early
if not model_columns.symmetric_difference(catalogued_columns):
return

if model_columns.issubset(catalogued_columns):
for undocumented_column in sorted(catalogued_columns.difference(model_columns)):
logging.warning(
f'Column {model.unique_id}.{undocumented_column} has not been documented in YML, '
'but is present in the catalog. You should add it to your YML config, '
'or (if it is not required) remove it from the model SQL file, run the model, '
'and run `dbt docs generate` again')
# after warning the user, return early
return

# otherwise, there are columns defined in YML that don't match what's defined in SQL
for missing_column in sorted(model_columns.difference(catalogued_columns)):
logging.warning(
f'Column {model.unique_id}.{missing_column} documented in YML, '
'but is not defined in the DBT catalog. Check the model SQL file '
'and ensure you have run the model and `dbt docs generate`')
return # final return explicitly included for clarity


def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] = None):
catalog_nodes = parse_catalog_nodes(raw_catalog)
dbt_models = parse_models(raw_manifest, tag=tag)
Expand All @@ -74,6 +104,11 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str]
logging.warning(
f'Model {model.unique_id} not found in catalog. No looker view will be generated. '
f'Check if model has materialized in {adapter_type} at {model.relation_name}')
else:
# we know that the model is included in the catalog - extract it
corresponding_catalog_node = catalog_nodes[model.unique_id]
# issue warnings if the catalog columns (defined via SQL) don't match what's documented in YML
compare_model_vs_node_columns(model, corresponding_catalog_node)

# Update dbt models with data types from catalog
dbt_typed_models = [
Expand All @@ -92,7 +127,18 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str]
return dbt_typed_models


class ColumnNotInCatalogError(Exception):
def __init__(self, model_id: str, column_name: str):
super().__init__(
f'Column {column_name} not found in catalog for model {model_id}, '
'cannot find a data type for Looker. Is the column selected in the model SQL file, '
'and have you run the model since adding the column to it?')


def get_column_type_from_catalog(catalog_nodes: Dict[str, models.DbtCatalogNode], model_id: str, column_name: str):
node = catalog_nodes.get(model_id)
column = None if node is None else node.columns.get(column_name)
return None if column is None else column.type
if column:
return column.type
# otherwise this will fail later when we try to map the data type to a Looker type
raise ColumnNotInCatalogError(model_id, column_name)