From ee564f48650118f8fd39cfccd880cf7c20664635 Mon Sep 17 00:00:00 2001
From: YaaraVerner <86768411+YaaraVerner@users.noreply.github.com>
Date: Sun, 1 Oct 2023 14:00:01 +0300
Subject: [PATCH] platform(terraform): upload tf sub graphs (#5596)

* implement

* fixes

* fix merge conflicts

* fix lint

* fix lint

* fix lint

* fix types

* fix types

* fix types
---
 .../common/bridgecrew/platform_integration.py |  8 ++-
 checkov/common/bridgecrew/wrapper.py          | 27 +++++-----
 checkov/common/runners/runner_registry.py     | 28 ++++++----
 checkov/main.py                               |  7 ++-
 checkov/terraform/graph_manager.py            | 14 ++---
 checkov/terraform/runner.py                   | 52 ++++++++++++-------
 6 files changed, 79 insertions(+), 57 deletions(-)

diff --git a/checkov/common/bridgecrew/platform_integration.py b/checkov/common/bridgecrew/platform_integration.py
index 12763873b16..1988cd3379e 100644
--- a/checkov/common/bridgecrew/platform_integration.py
+++ b/checkov/common/bridgecrew/platform_integration.py
@@ -14,7 +14,7 @@
 from os import path
 from pathlib import Path
 from time import sleep
-from typing import List, Dict, TYPE_CHECKING, Any, cast
+from typing import List, Dict, TYPE_CHECKING, Any, cast, Optional
 
 import boto3
 import dpath
@@ -37,7 +37,7 @@
 from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SCANNABLE_PACKAGE_FILES
 from checkov.common.bridgecrew.check_type import CheckType
 from checkov.common.runners.base_runner import filter_ignored_paths
-from checkov.common.typing import _CicdDetails
+from checkov.common.typing import _CicdDetails, LibraryGraph
 from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2
 from checkov.common.util.data_structures_utils import merge_dicts
 from checkov.common.util.dockerfile import is_dockerfile
@@ -66,8 +66,6 @@
     from mypy_boto3_s3.client import S3Client
     from requests import Response
     from typing_extensions import TypeGuard
-    from igraph import Graph
-    from networkx import DiGraph
 
 
 SLEEP_SECONDS = 1
@@ -558,7 +556,7 @@ def persist_logs_stream(self, logs_stream: StringIO) -> None:
         log_path = f'{self.support_repo_path}/checkov_results' if self.support_repo_path == self.repo_path else self.support_repo_path
         persist_logs_stream(logs_stream, self.s3_client, self.support_bucket, log_path)
 
-    def persist_graphs(self, graphs: dict[str, DiGraph | Graph], absolute_root_folder: str = '') -> None:
+    def persist_graphs(self, graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]], absolute_root_folder: str = '') -> None:
         if not self.use_s3_integration or not self.s3_client:
             return
         if not self.bucket or not self.repo_path:
diff --git a/checkov/common/bridgecrew/wrapper.py b/checkov/common/bridgecrew/wrapper.py
index 8b279c3cb4b..196769b90cf 100644
--- a/checkov/common/bridgecrew/wrapper.py
+++ b/checkov/common/bridgecrew/wrapper.py
@@ -6,7 +6,7 @@
 import itertools
 from concurrent import futures
 from io import StringIO
-from typing import Any, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING, Optional
 from collections import defaultdict
 
 import dpath
@@ -23,7 +23,7 @@
 
 from checkov.common.bridgecrew.check_type import CheckType
 from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS
-from checkov.common.typing import _ReducedScanReport
+from checkov.common.typing import _ReducedScanReport, LibraryGraph
 from checkov.common.util.file_utils import compress_string_io_tar
 from checkov.common.util.igraph_serialization import serialize_to_json
 from checkov.common.util.json_utils import CustomJSONEncoder
@@ -153,14 +153,14 @@ def enrich_and_persist_checks_metadata(
 
 
 def persist_graphs(
-    graphs: dict[str, DiGraph | Graph | PyDiGraph[Any, Any]],
-    s3_client: S3Client,
-    bucket: str,
-    full_repo_object_key: str,
-    timeout: int,
-    absolute_root_folder: str = '',
+        graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]],
+        s3_client: S3Client,
+        bucket: str,
+        full_repo_object_key: str,
+        timeout: int,
+        absolute_root_folder: str = ''
 ) -> None:
-    def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder: str = '') -> None:
+    def _upload_graph(check_type: str, graph: LibraryGraph, _absolute_root_folder: str = '', subgraph_path: Optional[str] = None) -> None:
         if isinstance(graph, DiGraph):
             json_obj = node_link_data(graph)
             graph_file_name = FILE_NAME_NETWORKX
@@ -173,7 +173,8 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder
         else:
             logging.error(f"unsupported graph type '{graph.__class__.__name__}'")
             return
-        s3_key = f'{graphs_repo_object_key}/{check_type}/{graph_file_name}'
+        multi_graph_addition = (f"multi-graph/{subgraph_path}" if subgraph_path is not None else '').rstrip("/")
+        s3_key = os.path.join(graphs_repo_object_key, check_type, multi_graph_addition, graph_file_name)
         try:
             _put_json_object(s3_client, json_obj, bucket, s3_key)
         except Exception:
@@ -183,9 +184,9 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder
 
     with futures.ThreadPoolExecutor() as executor:
         futures.wait(
-            [executor.submit(_upload_graph, check_type, graph, absolute_root_folder) for
-             check_type, graph in graphs.items()],
+            [executor.submit(_upload_graph, check_type, graph, absolute_root_folder, subgraph_path) for
+             check_type, graphs in graphs.items() for graph, subgraph_path in graphs],
             return_when=futures.FIRST_EXCEPTION,
             timeout=timeout
         )
-    logging.info(f"Done persisting {len(graphs)} graphs")
+    logging.info(f"Done persisting {len(list(itertools.chain(*graphs.values())))} graphs")
diff --git a/checkov/common/runners/runner_registry.py b/checkov/common/runners/runner_registry.py
index 936be4345a6..797d6231886 100644
--- a/checkov/common/runners/runner_registry.py
+++ b/checkov/common/runners/runner_registry.py
@@ -37,7 +37,7 @@
 from checkov.common.output.spdx import SPDX
 from checkov.common.parallelizer.parallel_runner import parallel_runner
 from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
-from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds
+from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds, LibraryGraph
 from checkov.common.util import data_structures_utils
 from checkov.common.util.banner import tool as tool_name
 from checkov.common.util.data_structures_utils import pickle_deepcopy
@@ -53,8 +53,6 @@
     from checkov.common.output.baseline import Baseline
     from checkov.common.runners.base_runner import BaseRunner  # noqa
     from checkov.runner_filter import RunnerFilter
-    from igraph import Graph
-    from networkx import DiGraph
 
 CONSOLE_OUTPUT = "console"
 CHECK_BLOCK_TYPES = frozenset(["resource", "data", "provider", "module"])
@@ -97,7 +95,7 @@ def __init__(
         self._check_type_to_report_map: dict[str, Report] = {}  # used for finding reports with the same check type
         self.licensing_integration = licensing_integration  # can be maniuplated by unit tests
         self.secrets_omitter_class = secrets_omitter_class
-        self.check_type_to_graph: dict[str, Graph | DiGraph] = {}
+        self.check_type_to_graph: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {}
         for runner in runners:
             if isinstance(runner, image_runner):
                 runner.image_referencers = self.image_referencing_runners
@@ -126,7 +124,7 @@ def run(
                 # This is the only runner, so raise a clear indication of failure
                 raise ModuleNotEnabledError(f'The framework "{runner_check_type}" is part of the "{self.licensing_integration.get_subscription_for_runner(runner_check_type).name}" module, which is not enabled in the platform')
         else:
-            def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, DiGraph | Graph | None]:
+            def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, Optional[list[tuple[LibraryGraph, Optional[str]]]]]:
                 report = runner.run(
                     root_folder=root_folder,
                     external_checks_dir=external_checks_dir,
@@ -140,7 +138,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
                     report = Report(check_type=runner.check_type)
 
                 if runner.graph_manager:
-                    return report, runner.check_type, runner.graph_manager.get_reader_endpoint()
+                    return report, runner.check_type, self.extract_graphs_from_runner(runner)
                 return report, None, None
 
             valid_runners = []
@@ -174,10 +172,10 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
             full_check_type_to_graph = {}
             for result in parallel_runner_results:
                 if result is not None:
-                    report, check_type, graph = result
+                    report, check_type, graphs = result
                     reports.append(report)
-                    if check_type is not None and graph is not None:
-                        full_check_type_to_graph[check_type] = graph
+                    if check_type is not None and graphs is not None:
+                        full_check_type_to_graph[check_type] = graphs
             self.check_type_to_graph = full_check_type_to_graph
 
         merged_reports = self._merge_reports(reports)
@@ -192,7 +190,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
             self._handle_report(scan_report, repo_root_for_plan_enrichment)
 
         if not self.check_type_to_graph:
-            self.check_type_to_graph = {runner.check_type: runner.graph_manager.get_reader_endpoint() for runner
+            self.check_type_to_graph = {runner.check_type: self.extract_graphs_from_runner(runner) for runner
                                         in self.runners if runner.graph_manager}
         return self.scan_reports
 
@@ -750,3 +748,13 @@ def extract_git_info_from_account_id(account_id: str) -> tuple[str, str]:
             git_org, git_repository = "", ""
 
         return git_org, git_repository
+
+    @staticmethod
+    def extract_graphs_from_runner(runner: _BaseRunner) -> list[tuple[LibraryGraph, Optional[str]]]:
+        # exist only for terraform
+        all_graphs = getattr(runner, 'all_graphs', None)
+        if all_graphs:
+            return all_graphs   # type:ignore[no-any-return]
+        elif runner.graph_manager:
+            return [(runner.graph_manager.get_reader_endpoint(), None)]
+        return []
diff --git a/checkov/main.py b/checkov/main.py
index 1b7c392a12a..16945165ab8 100755
--- a/checkov/main.py
+++ b/checkov/main.py
@@ -12,7 +12,7 @@
 import platform
 from collections import defaultdict
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 
 import argcomplete
 import configargparse
@@ -45,6 +45,7 @@
 from checkov.common.bridgecrew.check_type import checkov_runners, CheckType
 from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
 from checkov.common.runners.runner_registry import RunnerRegistry
+from checkov.common.typing import LibraryGraph
 from checkov.common.util import prompt
 from checkov.common.util.banner import banner as checkov_banner, tool as checkov_tool
 from checkov.common.util.config_utils import get_default_config_paths
@@ -83,8 +84,6 @@
     from checkov.common.output.report import Report
     from configargparse import Namespace
     from typing_extensions import Literal
-    from igraph import Graph
-    from networkx import DiGraph
 
 signal.signal(signal.SIGINT, lambda x, y: sys.exit(''))
 
@@ -131,7 +130,7 @@ def __init__(self, argv: list[str] = sys.argv[1:]) -> None:
         self.runners = DEFAULT_RUNNERS
         self.scan_reports: "list[Report]" = []
         self.run_metadata: dict[str, str | list[str]] = {}
-        self.graphs: dict[str, DiGraph | Graph] = {}
+        self.graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {}
         self.url: str | None = None
 
         self.parse_config(argv=argv)
diff --git a/checkov/terraform/graph_manager.py b/checkov/terraform/graph_manager.py
index 1dddc9c8211..708529c402f 100644
--- a/checkov/terraform/graph_manager.py
+++ b/checkov/terraform/graph_manager.py
@@ -32,7 +32,7 @@ def build_multi_graph_from_source_directory(
         external_modules_download_path: str = DEFAULT_EXTERNAL_MODULES_DIR,
         vars_files: list[str] | None = None,
         create_graph: bool = True,
-    ) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]]:
+    ) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]]:
         logging.info("Parsing HCL files in source dir to multi graph")
         modules_with_definitions = self.parser.parse_multi_graph_hcl_module(
             source_dir=source_dir,
@@ -45,13 +45,15 @@ def build_multi_graph_from_source_directory(
             create_graph=create_graph,
         )
 
-        graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]] = []
+        graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]] = []
         for module, tf_definitions in modules_with_definitions:
             if create_graph and module:
                 logging.info("Building graph from parsed module")
                 local_graph = local_graph_class(module)
                 local_graph.build_graph(render_variables=render_variables)
-                graphs.append((local_graph, tf_definitions))
+                subgraph_abs_path = module.source_dir
+                subgraph_path = subgraph_abs_path[subgraph_abs_path.rindex(source_dir) + len(source_dir) + 1:]
+                graphs.append((local_graph, tf_definitions, subgraph_path))
 
         return graphs
 
@@ -111,15 +113,15 @@ def build_graph_from_definitions(
         return local_graph
 
     def build_multi_graph_from_definitions(self, definitions: dict[TFDefinitionKey, dict[str, Any]],
-                                           render_variables: bool = True) -> list[TerraformLocalGraph]:
+                                           render_variables: bool = True) -> list[tuple[str, TerraformLocalGraph]]:
         module, tf_definitions = self.parser.parse_hcl_module_from_tf_definitions(definitions, "", self.source)
         dirs_to_definitions = self.parser.create_definition_by_dirs(tf_definitions)
 
-        graphs: list[TerraformLocalGraph] = []
+        graphs: list[tuple[str, TerraformLocalGraph]] = []
         for source_path, dir_definitions in dirs_to_definitions.items():
             module, parsed_tf_definitions = self.parser.parse_hcl_module_from_multi_tf_definitions(dir_definitions, source_path, self.source)
             local_graph = TerraformLocalGraph(module)
             local_graph.build_graph(render_variables=render_variables)
-            graphs.append(local_graph)
+            graphs.append((source_path, local_graph))
 
         return graphs
diff --git a/checkov/terraform/runner.py b/checkov/terraform/runner.py
index fbc524eadf8..ae3cebb0660 100644
--- a/checkov/terraform/runner.py
+++ b/checkov/terraform/runner.py
@@ -8,6 +8,8 @@
 from typing_extensions import TypeAlias  # noqa[TC002]
 
 from checkov.common.bridgecrew.check_type import CheckType
+from checkov.common.graph.checks_infra.registry import BaseRegistry
+from checkov.common.graph.graph_builder.consts import GraphSource
 from checkov.common.output.extra_resource import ExtraResource
 from checkov.common.parallelizer.parallel_runner import parallel_runner
 from checkov.common.output.graph_record import GraphRecord
@@ -20,6 +22,7 @@
 from checkov.common.util.secrets import omit_secret_value_from_checks
 from checkov.runner_filter import RunnerFilter
 from checkov.terraform.base_runner import BaseTerraformRunner
+from checkov.terraform.graph_manager import TerraformGraphManager
 from checkov.terraform.modules.module_objects import TFDefinitionKey, TFModule
 from checkov.terraform.context_parsers.registry import parser_registry
 from checkov.terraform.evaluation.base_variable_evaluation import BaseVariableEvaluation
@@ -29,9 +32,10 @@
 from checkov.terraform.graph_builder.local_graph import TerraformLocalGraph
 from checkov.terraform.tag_providers import get_resource_tags
 from checkov.common.runners.base_runner import strtobool
+from checkov.terraform.tf_parser import TFParser
 
 if TYPE_CHECKING:
-    from checkov.common.typing import _SkippedCheck, LibraryGraph
+    from checkov.common.typing import _SkippedCheck, LibraryGraph, LibraryGraphConnector
 
 _TerraformContext: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]"
 _TerraformDefinitions: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]"
@@ -42,6 +46,18 @@
 class Runner(BaseTerraformRunner[_TerraformDefinitions, _TerraformContext, TFDefinitionKey]):
     check_type = CheckType.TERRAFORM  # noqa: CCE003  # a static attribute
 
+    def __init__(
+        self,
+        parser: TFParser | None = None,
+        db_connector: LibraryGraphConnector | None = None,
+        external_registries: list[BaseRegistry] | None = None,
+        source: str = GraphSource.TERRAFORM,
+        graph_class: type[TerraformLocalGraph] = TerraformLocalGraph,
+        graph_manager: TerraformGraphManager | None = None,
+    ) -> None:
+        super().__init__(parser, db_connector, external_registries, source, graph_class, graph_manager)
+        self.all_graphs: list[tuple[LibraryGraph, str]] = []
+
     def run(
         self,
         root_folder: str | None,
@@ -57,8 +73,7 @@ def run(
         report = Report(self.check_type)
         parsing_errors: dict[str, Exception] = {}
         self.load_external_checks(external_checks_dir)
-        local_graph = None
-        all_graphs: list[LibraryGraph] = []
+        local_graphs = None
         if self.context is None or self.definitions is None or self.breadcrumbs is None:
             self.definitions = {}
             logging.info("Scanning root folder and producing fresh tf_definitions and context")
@@ -76,11 +91,11 @@ def run(
                         vars_files=runner_filter.var_files,
                         create_graph=CHECKOV_CREATE_GRAPH,
                     )
-                    local_graph = []
-                    for graph, definitions in graphs_with_definitions:
+                    local_graphs = []
+                    for graph, definitions, subgraph_path in graphs_with_definitions:
                         for definition in definitions:
                             self.definitions.update(definition)
-                        local_graph.append(graph)
+                        local_graphs.append((subgraph_path, graph))
                 else:
                     single_graph, self.definitions = self.graph_manager.build_graph_from_source_directory(
                         source_dir=root_folder,
@@ -93,7 +108,7 @@ def run(
                         create_graph=CHECKOV_CREATE_GRAPH,
                     )
                     # Make graph a list to allow single processing method for all cases
-                    local_graph = [single_graph]
+                    local_graphs = [('', single_graph)]
             elif files:
                 files = [os.path.abspath(file) for file in files]
                 root_folder = os.path.split(os.path.commonprefix(files))[0]
@@ -101,22 +116,20 @@ def run(
 
                 if CHECKOV_CREATE_GRAPH:
                     if tf_split_graph:
-                        local_graph = self.graph_manager.build_multi_graph_from_definitions(  # type:ignore[assignment]  # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
+                        local_graphs = self.graph_manager.build_multi_graph_from_definitions(  # type:ignore[assignment]  # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
                             self.definitions
                         )
                     else:
                         # local_graph needs to be a list to allow supporting multi graph
-                        local_graph = [self.graph_manager.build_graph_from_definitions(self.definitions)]
+                        local_graphs = [('', self.graph_manager.build_graph_from_definitions(self.definitions))]
             else:
                 raise Exception("Root directory was not specified, files were not specified")
 
-            if CHECKOV_CREATE_GRAPH and local_graph:
+            if CHECKOV_CREATE_GRAPH and local_graphs:
                 self._update_definitions_and_breadcrumbs(
-                    all_graphs,
-                    local_graph,  # type:ignore[arg-type]  # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
+                    local_graphs,  # type:ignore[arg-type]  # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
                     report,
-                    root_folder,
-                )
+                    root_folder)
         else:
             logging.info("Scanning root folder using existing tf_definitions")
             if root_folder is None:
@@ -129,8 +142,8 @@ def run(
         report.add_parsing_errors(parsing_errors.keys())
 
         if CHECKOV_CREATE_GRAPH:
-            if all_graphs:
-                for igraph_graph in all_graphs:
+            if self.all_graphs:
+                for igraph_graph, _ in self.all_graphs:
                     graph_report = self.get_graph_checks_report(root_folder, runner_filter, graph=igraph_graph)
                     merge_reports(report, graph_report)
             else:
@@ -177,17 +190,18 @@ def parse_file(file: str) -> tuple[str, dict[str, Any] | None, dict[str, Excepti
                     parsing_errors.update(file_parsing_errors)
 
     def _update_definitions_and_breadcrumbs(
-        self, all_graphs: list[LibraryGraph], local_graph: list[TerraformLocalGraph], report: Report, root_folder: str
+        self, local_graphs: list[tuple[str, TerraformLocalGraph]], report: Report, root_folder: str
     ) -> None:
         self.definitions = {}
         self.breadcrumbs = {}
-        for graph in local_graph:
+        self.all_graphs = []
+        for subgraph_path, graph in local_graphs:
             for vertex in graph.vertices:
                 if vertex.block_type == BlockType.RESOURCE:
                     vertex_id = vertex.attributes.get(CustomAttributes.TF_RESOURCE_ADDRESS)
                     report.add_resource(f"{vertex.path}:{vertex_id}")
             igraph_graph = self.graph_manager.save_graph(graph)
-            all_graphs.append(igraph_graph)
+            self.all_graphs.append((igraph_graph, subgraph_path))
             current_definitions, current_breadcrumbs = convert_graph_vertices_to_tf_definitions(
                 graph.vertices,
                 root_folder,