From 7b2036deae467e65c04c3712360600427ee16dc2 Mon Sep 17 00:00:00 2001 From: Barak Fatal <35402131+bo156@users.noreply.github.com> Date: Sun, 10 Mar 2024 09:43:37 +0200 Subject: [PATCH] fix(terraform): Dont duplicate more vertices than needed for nested modules with large count/for each values + used cache to avoid extensive usage of os.path.realpath to drastically improve performance (#6072) * Used cache to avoid extensive usage of os.path.realpath to drastically improve performance * Mypy and comment fix * Solved duplicate vertices case when using nested modules and count>2 --- .../graph_builder/foreach/module_handler.py | 8 ++++++++ checkov/terraform/graph_builder/local_graph.py | 12 +++++++++++- .../child/main.tf | 11 +++++++++++ .../modules.tf | 9 +++++++++ .../parent/main.tf | 12 ++++++++++++ .../variable_rendering/test_foreach_renderer.py | 7 +++++++ 6 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/child/main.tf create mode 100644 tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/modules.tf create mode 100644 tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/parent/main.tf diff --git a/checkov/terraform/graph_builder/foreach/module_handler.py b/checkov/terraform/graph_builder/foreach/module_handler.py index 566ca77b897..529d893e4e1 100644 --- a/checkov/terraform/graph_builder/foreach/module_handler.py +++ b/checkov/terraform/graph_builder/foreach/module_handler.py @@ -247,9 +247,17 @@ def _create_new_module_with_vertices(self, main_resource: TerraformBlock, def _add_new_vertices_for_module(self, new_module_key: TFModule | None, new_module_value: dict[str, list[int]], new_resource_vertex_idx: int) -> dict[str, list[int]]: new_vertices_module_value: dict[str, list[int]] = defaultdict(list) + seen_vertices = [] for vertex_type, vertices_idx in new_module_value.items(): for vertex_idx in vertices_idx: module_vertex = self.local_graph.vertices[vertex_idx] + if module_vertex in seen_vertices: + # Makes sure we won't mistakenly go over vertices we already copied. + # This may happen when using nested modules with count>2, + # as we might duplicate the previous count index resources mistakenly. + # See issue https://github.com/bridgecrewio/checkov/issues/6068 + continue + seen_vertices.append(module_vertex) new_vertex = pickle_deepcopy(module_vertex) new_vertex.source_module_object = new_module_key self.local_graph.vertices.append(new_vertex) diff --git a/checkov/terraform/graph_builder/local_graph.py b/checkov/terraform/graph_builder/local_graph.py index 3a52ca194ca..f1191ec99cb 100644 --- a/checkov/terraform/graph_builder/local_graph.py +++ b/checkov/terraform/graph_builder/local_graph.py @@ -60,6 +60,9 @@ def __init__(self, module: Module) -> None: self.enable_modules_foreach_handling = strtobool(os.getenv('CHECKOV_ENABLE_MODULES_FOREACH_HANDLING', 'True')) self.foreach_blocks: Dict[str, List[int]] = {BlockType.RESOURCE: [], BlockType.MODULE: []} + # Important for foreach performance, see issue https://github.com/bridgecrewio/checkov/issues/6068 + self._vertex_path_to_realpath_cache: Dict[str, str] = {} + def build_graph(self, render_variables: bool) -> None: self._create_vertices() logging.info(f"[TerraformLocalGraph] created {len(self.vertices)} vertices") @@ -399,9 +402,16 @@ def _find_vertex_with_best_match(self, relevant_vertices_indexes: List[int], ori vertex_index_with_longest_common_prefix = -1 longest_common_prefix = "" vertices_with_longest_common_prefix = [] + origin_real_path = os.path.realpath(origin_path) for vertex_index in relevant_vertices_indexes: vertex = self.vertices[vertex_index] - common_prefix = os.path.commonpath([os.path.realpath(vertex.path), os.path.realpath(origin_path)]) + if vertex.path in self._vertex_path_to_realpath_cache: + # Using cache to make sure performance stays stable + vertex_realpath = self._vertex_path_to_realpath_cache[vertex.path] + else: + vertex_realpath = os.path.realpath(vertex.path) + self._vertex_path_to_realpath_cache[vertex.path] = vertex_realpath + common_prefix = os.path.commonpath([vertex_realpath, origin_real_path]) if len(common_prefix) > len(longest_common_prefix): vertex_index_with_longest_common_prefix = vertex_index longest_common_prefix = common_prefix diff --git a/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/child/main.tf b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/child/main.tf new file mode 100644 index 00000000000..9164a57f720 --- /dev/null +++ b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/child/main.tf @@ -0,0 +1,11 @@ +## child/main.tf +variable "child-name" { + type = string +} +resource "terraform_data" "child-example" { + input = "1" +} +output "child-result" { + value = terraform_data.child-example.output +} + diff --git a/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/modules.tf b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/modules.tf new file mode 100644 index 00000000000..c451b5dced7 --- /dev/null +++ b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/modules.tf @@ -0,0 +1,9 @@ +# modules.tf +module "modules" { + count = 12 + source = "./parent" + parent = count.index +} +output "modules-result" { + value = { for k, v in module.modules-parent : k => v } +} \ No newline at end of file diff --git a/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/parent/main.tf b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/parent/main.tf new file mode 100644 index 00000000000..1005ba13b48 --- /dev/null +++ b/tests/terraform/graph/variable_rendering/resources/os_example_large_count_with_nested_module/parent/main.tf @@ -0,0 +1,12 @@ +# parent/main.tf +variable "parent" { + type = string +} +module "parent" { + source = "../child" + child-name = "1" +} + +output "parent-result" { + value = module.parent.child-result +} \ No newline at end of file diff --git a/tests/terraform/graph/variable_rendering/test_foreach_renderer.py b/tests/terraform/graph/variable_rendering/test_foreach_renderer.py index 036e507f094..50b2c89d5e5 100644 --- a/tests/terraform/graph/variable_rendering/test_foreach_renderer.py +++ b/tests/terraform/graph/variable_rendering/test_foreach_renderer.py @@ -422,6 +422,13 @@ def test_foreach_with_lookup(): assert graph.vertices[1].attributes.get('uniform_bucket_level_access') == [True] +@mock.patch.dict(os.environ, {"CHECKOV_ENABLE_MODULES_FOREACH_HANDLING": "True"}) +def test_foreach_large_count_with_nested_module(checkov_source_path): + dir_name = 'os_example_large_count_with_nested_module' + graph, _ = build_and_get_graph_by_path(dir_name, render_var=True) + assert len(graph.vertices) == 85 + + def test__get_tf_module_with_no_foreach(): module = TFModule(name='1', path='1', foreach_idx='1', nested_tf_module=TFModule(name='2', path='2', foreach_idx='2', nested_tf_module=None))