Skip to content

Commit

Permalink
Delete empty descriptors after relocation (#62)
Browse files Browse the repository at this point in the history
Closes #55
  • Loading branch information
kokorin authored Sep 21, 2024
1 parent 6a0d5ec commit 182c56f
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 18 deletions.
2 changes: 1 addition & 1 deletion dbt_pumpkin/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def plural_name(self) -> str:

@classmethod
def values(cls) -> set[str]:
return cls._value2member_map_.keys()
return set(cls._value2member_map_.keys())

def __str__(self):
return self.value
Expand Down
42 changes: 35 additions & 7 deletions dbt_pumpkin/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@


@dataclass(frozen=True)
class Action:
resource_type: ResourceType
resource_name: str

class Action(ABC):
@abstractmethod
def affected_files(self) -> set[Path]:
"""
Expand All @@ -40,7 +37,13 @@ def execute(self, files: dict[Path, dict]):


@dataclass(frozen=True)
class RelocateResource(Action):
class ResourceAction(Action, ABC):
resource_type: ResourceType
resource_name: str


@dataclass(frozen=True)
class RelocateResource(ResourceAction):
from_path: Path
to_path: Path

Expand All @@ -65,7 +68,32 @@ def execute(self, files: dict[Path, dict]):


@dataclass(frozen=True)
class BootstrapResource(Action):
class DeleteEmptyDescriptor(Action):
path: Path

def affected_files(self) -> set[Path]:
return {self.path}

def describe(self) -> str:
return f"Delete if empty {self.path}"

def execute(self, files: dict[Path, dict]):
content = files.get(self.path)
if content is None:
return

no_content = True
for res_type in ResourceType:
resources = content.get(res_type.plural_name)
if resources:
no_content = False

if no_content:
files[self.path] = None


@dataclass(frozen=True)
class BootstrapResource(ResourceAction):
path: Path

def __post_init__(self):
Expand All @@ -86,7 +114,7 @@ def execute(self, files: dict[Path, dict]):


@dataclass(frozen=True)
class ResourceColumnAction(Action, ABC):
class ResourceColumnAction(ResourceAction, ABC):
source_name: str | None
path: Path

Expand Down
10 changes: 10 additions & 0 deletions dbt_pumpkin/planner.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import logging
import re
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path

from dbt_pumpkin.data import Resource, ResourceColumn, ResourceConfig, ResourceType, Table, TableColumn
from dbt_pumpkin.exception import PumpkinError
from dbt_pumpkin.plan import (
Action,
AddResourceColumn,
BootstrapResource,
DeleteEmptyDescriptor,
DeleteResourceColumn,
Plan,
RelocateResource,
Expand Down Expand Up @@ -70,6 +75,7 @@ def plan(self) -> Plan:
path_resolver = PathResolver()

sources: dict[str, list[Resource]] = {}
cleanup_paths: set[Path] = set()

for resource in self._resources:
if resource.type == ResourceType.SOURCE:
Expand All @@ -95,6 +101,7 @@ def plan(self) -> Plan:
if resource.yaml_path != to_yaml_path:
logger.debug("Planned relocate action: %s", resource.unique_id)
actions.append(RelocateResource(resource.type, resource.name, resource.yaml_path, to_yaml_path))
cleanup_paths.add(resource.yaml_path)

for source_name, source_tables in sources.items():
# make sure all source's resources have exactly the same configuration
Expand All @@ -117,6 +124,9 @@ def plan(self) -> Plan:
if yaml_path != to_yaml_path:
logger.debug("Planned relocate action: %s", source_name)
actions.append(RelocateResource(ResourceType.SOURCE, source_name, yaml_path, to_yaml_path))
cleanup_paths.add(yaml_path)

actions += [DeleteEmptyDescriptor(to_cleanup) for to_cleanup in sorted(cleanup_paths)]

return Plan(actions)

Expand Down
11 changes: 8 additions & 3 deletions dbt_pumpkin/storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import logging
import os
from abc import abstractmethod
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -55,7 +56,11 @@ def load_yaml(self, files: set[Path]) -> dict[Path, any]:
def save_yaml(self, files: dict[Path, any]):
for file, content in files.items():
resolved_file = self._root_dir / file
resolved_file.parent.mkdir(exist_ok=True)

logger.debug("Saving file: %s", resolved_file)
self._yaml.dump(content, resolved_file)
if content is not None:
logger.debug("Saving file: %s", resolved_file)
resolved_file.parent.mkdir(exist_ok=True)
self._yaml.dump(content, resolved_file)
elif resolved_file.exists():
logger.debug("Deleting file: %s", resolved_file)
os.remove(resolved_file)
27 changes: 27 additions & 0 deletions tests/test_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dbt_pumpkin.plan import (
AddResourceColumn,
BootstrapResource,
DeleteEmptyDescriptor,
DeleteResourceColumn,
RelocateResource,
ReorderResourceColumns,
Expand Down Expand Up @@ -122,6 +123,32 @@ def test_relocate_resource_to_new_file(files):
assert files == expected


@pytest.mark.parametrize("resource_type", list(ResourceType))
def test_delete_empty_descriptor(resource_type: ResourceType):
action = DeleteEmptyDescriptor(
path=Path("models/schema.yml"),
)

files = {
Path("models/schema.yml"): {
"version": 2,
resource_type.plural_name: [],
},
}
action.execute(files)
assert files == {Path("models/schema.yml"): None}

files = {
Path("models/schema.yml"): {
"version": 2,
resource_type.plural_name: [{"name": "any_name"}],
},
}
expected = copy.deepcopy(files)
action.execute(files)
assert files == expected


def test_relocate_resource_error(files):
action = RelocateResource(
resource_type=ResourceType.MODEL,
Expand Down
21 changes: 14 additions & 7 deletions tests/test_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dbt_pumpkin.plan import (
AddResourceColumn,
BootstrapResource,
DeleteEmptyDescriptor,
DeleteResourceColumn,
RelocateResource,
ReorderResourceColumns,
Expand Down Expand Up @@ -148,20 +149,26 @@ def test_relocation_no_yaml_path(no_yaml_path_resources):


def test_relocation_yaml_per_resource(separate_yaml_resources):
assert set(RelocationPlanner(separate_yaml_resources).plan().actions) == {
assert RelocationPlanner(separate_yaml_resources).plan().actions == [
RelocateResource(
resource_type=ResourceType.MODEL,
resource_name="stg_customers",
from_path=Path("models/staging/_schema.yml"),
to_path=Path("models/staging/_stg_customers.yml"),
),
RelocateResource(
resource_type=ResourceType.SOURCE,
resource_name="ingested",
from_path=Path("models/staging/_sources.yml"),
to_path=Path("models/staging/_ingested.yml"),
),
RelocateResource(
resource_type=ResourceType.MODEL,
resource_name="stg_customers",
from_path=Path("models/staging/_schema.yml"),
to_path=Path("models/staging/_stg_customers.yml"),
DeleteEmptyDescriptor(
path=Path("models/staging/_schema.yml"),
),
}
DeleteEmptyDescriptor(
path=Path("models/staging/_sources.yml"),
),
]


def test_relocation_yaml_actual_paths(actual_yaml_resources):
Expand Down
25 changes: 25 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,28 @@ def test_roundtrip_preserve_quotes(tmp_path: Path):
actual = (tmp_path / "my_model.yml").read_text()

assert content == actual


def test_save_yaml_deletes_if_content_is_none(tmp_path: Path):
schema_file = tmp_path / "schema.yml"
schema_file.write_text(
textwrap.dedent("""\
version: 2
models:
- name: my_model
""")
)

storage = DiskStorage(tmp_path, yaml_format=None)
assert schema_file.exists()

storage.save_yaml({Path("schema.yml"): None})
assert not schema_file.exists()


def test_save_yaml_does_nothing_if_content_is_none_no_file(tmp_path: Path):
schema_file = tmp_path / "schema.yml"

storage = DiskStorage(tmp_path, yaml_format=None)
storage.save_yaml({Path("schema.yml"): None})
assert not schema_file.exists()

0 comments on commit 182c56f

Please sign in to comment.