From ee564f48650118f8fd39cfccd880cf7c20664635 Mon Sep 17 00:00:00 2001 From: YaaraVerner <86768411+YaaraVerner@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:00:01 +0300 Subject: [PATCH] platform(terraform): upload tf sub graphs (#5596) * implement * fixes * fix merge conflicts * fix lint * fix lint * fix lint * fix types * fix types * fix types --- .../common/bridgecrew/platform_integration.py | 8 ++- checkov/common/bridgecrew/wrapper.py | 27 +++++----- checkov/common/runners/runner_registry.py | 28 ++++++---- checkov/main.py | 7 ++- checkov/terraform/graph_manager.py | 14 ++--- checkov/terraform/runner.py | 52 ++++++++++++------- 6 files changed, 79 insertions(+), 57 deletions(-) diff --git a/checkov/common/bridgecrew/platform_integration.py b/checkov/common/bridgecrew/platform_integration.py index 12763873b16..1988cd3379e 100644 --- a/checkov/common/bridgecrew/platform_integration.py +++ b/checkov/common/bridgecrew/platform_integration.py @@ -14,7 +14,7 @@ from os import path from pathlib import Path from time import sleep -from typing import List, Dict, TYPE_CHECKING, Any, cast +from typing import List, Dict, TYPE_CHECKING, Any, cast, Optional import boto3 import dpath @@ -37,7 +37,7 @@ from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SCANNABLE_PACKAGE_FILES from checkov.common.bridgecrew.check_type import CheckType from checkov.common.runners.base_runner import filter_ignored_paths -from checkov.common.typing import _CicdDetails +from checkov.common.typing import _CicdDetails, LibraryGraph from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 from checkov.common.util.data_structures_utils import merge_dicts from checkov.common.util.dockerfile import is_dockerfile @@ -66,8 +66,6 @@ from mypy_boto3_s3.client import S3Client from requests import Response from typing_extensions import TypeGuard - from igraph import Graph - from networkx import DiGraph SLEEP_SECONDS = 1 @@ -558,7 +556,7 @@ def persist_logs_stream(self, logs_stream: StringIO) -> None: log_path = f'{self.support_repo_path}/checkov_results' if self.support_repo_path == self.repo_path else self.support_repo_path persist_logs_stream(logs_stream, self.s3_client, self.support_bucket, log_path) - def persist_graphs(self, graphs: dict[str, DiGraph | Graph], absolute_root_folder: str = '') -> None: + def persist_graphs(self, graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]], absolute_root_folder: str = '') -> None: if not self.use_s3_integration or not self.s3_client: return if not self.bucket or not self.repo_path: diff --git a/checkov/common/bridgecrew/wrapper.py b/checkov/common/bridgecrew/wrapper.py index 8b279c3cb4b..196769b90cf 100644 --- a/checkov/common/bridgecrew/wrapper.py +++ b/checkov/common/bridgecrew/wrapper.py @@ -6,7 +6,7 @@ import itertools from concurrent import futures from io import StringIO -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, Optional from collections import defaultdict import dpath @@ -23,7 +23,7 @@ from checkov.common.bridgecrew.check_type import CheckType from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS -from checkov.common.typing import _ReducedScanReport +from checkov.common.typing import _ReducedScanReport, LibraryGraph from checkov.common.util.file_utils import compress_string_io_tar from checkov.common.util.igraph_serialization import serialize_to_json from checkov.common.util.json_utils import CustomJSONEncoder @@ -153,14 +153,14 @@ def enrich_and_persist_checks_metadata( def persist_graphs( - graphs: dict[str, DiGraph | Graph | PyDiGraph[Any, Any]], - s3_client: S3Client, - bucket: str, - full_repo_object_key: str, - timeout: int, - absolute_root_folder: str = '', + graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]], + s3_client: S3Client, + bucket: str, + full_repo_object_key: str, + timeout: int, + absolute_root_folder: str = '' ) -> None: - def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder: str = '') -> None: + def _upload_graph(check_type: str, graph: LibraryGraph, _absolute_root_folder: str = '', subgraph_path: Optional[str] = None) -> None: if isinstance(graph, DiGraph): json_obj = node_link_data(graph) graph_file_name = FILE_NAME_NETWORKX @@ -173,7 +173,8 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder else: logging.error(f"unsupported graph type '{graph.__class__.__name__}'") return - s3_key = f'{graphs_repo_object_key}/{check_type}/{graph_file_name}' + multi_graph_addition = (f"multi-graph/{subgraph_path}" if subgraph_path is not None else '').rstrip("/") + s3_key = os.path.join(graphs_repo_object_key, check_type, multi_graph_addition, graph_file_name) try: _put_json_object(s3_client, json_obj, bucket, s3_key) except Exception: @@ -183,9 +184,9 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder with futures.ThreadPoolExecutor() as executor: futures.wait( - [executor.submit(_upload_graph, check_type, graph, absolute_root_folder) for - check_type, graph in graphs.items()], + [executor.submit(_upload_graph, check_type, graph, absolute_root_folder, subgraph_path) for + check_type, graphs in graphs.items() for graph, subgraph_path in graphs], return_when=futures.FIRST_EXCEPTION, timeout=timeout ) - logging.info(f"Done persisting {len(graphs)} graphs") + logging.info(f"Done persisting {len(list(itertools.chain(*graphs.values())))} graphs") diff --git a/checkov/common/runners/runner_registry.py b/checkov/common/runners/runner_registry.py index 936be4345a6..797d6231886 100644 --- a/checkov/common/runners/runner_registry.py +++ b/checkov/common/runners/runner_registry.py @@ -37,7 +37,7 @@ from checkov.common.output.spdx import SPDX from checkov.common.parallelizer.parallel_runner import parallel_runner from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger -from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds +from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds, LibraryGraph from checkov.common.util import data_structures_utils from checkov.common.util.banner import tool as tool_name from checkov.common.util.data_structures_utils import pickle_deepcopy @@ -53,8 +53,6 @@ from checkov.common.output.baseline import Baseline from checkov.common.runners.base_runner import BaseRunner # noqa from checkov.runner_filter import RunnerFilter - from igraph import Graph - from networkx import DiGraph CONSOLE_OUTPUT = "console" CHECK_BLOCK_TYPES = frozenset(["resource", "data", "provider", "module"]) @@ -97,7 +95,7 @@ def __init__( self._check_type_to_report_map: dict[str, Report] = {} # used for finding reports with the same check type self.licensing_integration = licensing_integration # can be maniuplated by unit tests self.secrets_omitter_class = secrets_omitter_class - self.check_type_to_graph: dict[str, Graph | DiGraph] = {} + self.check_type_to_graph: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {} for runner in runners: if isinstance(runner, image_runner): runner.image_referencers = self.image_referencing_runners @@ -126,7 +124,7 @@ def run( # This is the only runner, so raise a clear indication of failure raise ModuleNotEnabledError(f'The framework "{runner_check_type}" is part of the "{self.licensing_integration.get_subscription_for_runner(runner_check_type).name}" module, which is not enabled in the platform') else: - def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, DiGraph | Graph | None]: + def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, Optional[list[tuple[LibraryGraph, Optional[str]]]]]: report = runner.run( root_folder=root_folder, external_checks_dir=external_checks_dir, @@ -140,7 +138,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non report = Report(check_type=runner.check_type) if runner.graph_manager: - return report, runner.check_type, runner.graph_manager.get_reader_endpoint() + return report, runner.check_type, self.extract_graphs_from_runner(runner) return report, None, None valid_runners = [] @@ -174,10 +172,10 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non full_check_type_to_graph = {} for result in parallel_runner_results: if result is not None: - report, check_type, graph = result + report, check_type, graphs = result reports.append(report) - if check_type is not None and graph is not None: - full_check_type_to_graph[check_type] = graph + if check_type is not None and graphs is not None: + full_check_type_to_graph[check_type] = graphs self.check_type_to_graph = full_check_type_to_graph merged_reports = self._merge_reports(reports) @@ -192,7 +190,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non self._handle_report(scan_report, repo_root_for_plan_enrichment) if not self.check_type_to_graph: - self.check_type_to_graph = {runner.check_type: runner.graph_manager.get_reader_endpoint() for runner + self.check_type_to_graph = {runner.check_type: self.extract_graphs_from_runner(runner) for runner in self.runners if runner.graph_manager} return self.scan_reports @@ -750,3 +748,13 @@ def extract_git_info_from_account_id(account_id: str) -> tuple[str, str]: git_org, git_repository = "", "" return git_org, git_repository + + @staticmethod + def extract_graphs_from_runner(runner: _BaseRunner) -> list[tuple[LibraryGraph, Optional[str]]]: + # exist only for terraform + all_graphs = getattr(runner, 'all_graphs', None) + if all_graphs: + return all_graphs # type:ignore[no-any-return] + elif runner.graph_manager: + return [(runner.graph_manager.get_reader_endpoint(), None)] + return [] diff --git a/checkov/main.py b/checkov/main.py index 1b7c392a12a..16945165ab8 100755 --- a/checkov/main.py +++ b/checkov/main.py @@ -12,7 +12,7 @@ import platform from collections import defaultdict from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import argcomplete import configargparse @@ -45,6 +45,7 @@ from checkov.common.bridgecrew.check_type import checkov_runners, CheckType from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger from checkov.common.runners.runner_registry import RunnerRegistry +from checkov.common.typing import LibraryGraph from checkov.common.util import prompt from checkov.common.util.banner import banner as checkov_banner, tool as checkov_tool from checkov.common.util.config_utils import get_default_config_paths @@ -83,8 +84,6 @@ from checkov.common.output.report import Report from configargparse import Namespace from typing_extensions import Literal - from igraph import Graph - from networkx import DiGraph signal.signal(signal.SIGINT, lambda x, y: sys.exit('')) @@ -131,7 +130,7 @@ def __init__(self, argv: list[str] = sys.argv[1:]) -> None: self.runners = DEFAULT_RUNNERS self.scan_reports: "list[Report]" = [] self.run_metadata: dict[str, str | list[str]] = {} - self.graphs: dict[str, DiGraph | Graph] = {} + self.graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {} self.url: str | None = None self.parse_config(argv=argv) diff --git a/checkov/terraform/graph_manager.py b/checkov/terraform/graph_manager.py index 1dddc9c8211..708529c402f 100644 --- a/checkov/terraform/graph_manager.py +++ b/checkov/terraform/graph_manager.py @@ -32,7 +32,7 @@ def build_multi_graph_from_source_directory( external_modules_download_path: str = DEFAULT_EXTERNAL_MODULES_DIR, vars_files: list[str] | None = None, create_graph: bool = True, - ) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]]: + ) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]]: logging.info("Parsing HCL files in source dir to multi graph") modules_with_definitions = self.parser.parse_multi_graph_hcl_module( source_dir=source_dir, @@ -45,13 +45,15 @@ def build_multi_graph_from_source_directory( create_graph=create_graph, ) - graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]] = [] + graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]] = [] for module, tf_definitions in modules_with_definitions: if create_graph and module: logging.info("Building graph from parsed module") local_graph = local_graph_class(module) local_graph.build_graph(render_variables=render_variables) - graphs.append((local_graph, tf_definitions)) + subgraph_abs_path = module.source_dir + subgraph_path = subgraph_abs_path[subgraph_abs_path.rindex(source_dir) + len(source_dir) + 1:] + graphs.append((local_graph, tf_definitions, subgraph_path)) return graphs @@ -111,15 +113,15 @@ def build_graph_from_definitions( return local_graph def build_multi_graph_from_definitions(self, definitions: dict[TFDefinitionKey, dict[str, Any]], - render_variables: bool = True) -> list[TerraformLocalGraph]: + render_variables: bool = True) -> list[tuple[str, TerraformLocalGraph]]: module, tf_definitions = self.parser.parse_hcl_module_from_tf_definitions(definitions, "", self.source) dirs_to_definitions = self.parser.create_definition_by_dirs(tf_definitions) - graphs: list[TerraformLocalGraph] = [] + graphs: list[tuple[str, TerraformLocalGraph]] = [] for source_path, dir_definitions in dirs_to_definitions.items(): module, parsed_tf_definitions = self.parser.parse_hcl_module_from_multi_tf_definitions(dir_definitions, source_path, self.source) local_graph = TerraformLocalGraph(module) local_graph.build_graph(render_variables=render_variables) - graphs.append(local_graph) + graphs.append((source_path, local_graph)) return graphs diff --git a/checkov/terraform/runner.py b/checkov/terraform/runner.py index fbc524eadf8..ae3cebb0660 100644 --- a/checkov/terraform/runner.py +++ b/checkov/terraform/runner.py @@ -8,6 +8,8 @@ from typing_extensions import TypeAlias # noqa[TC002] from checkov.common.bridgecrew.check_type import CheckType +from checkov.common.graph.checks_infra.registry import BaseRegistry +from checkov.common.graph.graph_builder.consts import GraphSource from checkov.common.output.extra_resource import ExtraResource from checkov.common.parallelizer.parallel_runner import parallel_runner from checkov.common.output.graph_record import GraphRecord @@ -20,6 +22,7 @@ from checkov.common.util.secrets import omit_secret_value_from_checks from checkov.runner_filter import RunnerFilter from checkov.terraform.base_runner import BaseTerraformRunner +from checkov.terraform.graph_manager import TerraformGraphManager from checkov.terraform.modules.module_objects import TFDefinitionKey, TFModule from checkov.terraform.context_parsers.registry import parser_registry from checkov.terraform.evaluation.base_variable_evaluation import BaseVariableEvaluation @@ -29,9 +32,10 @@ from checkov.terraform.graph_builder.local_graph import TerraformLocalGraph from checkov.terraform.tag_providers import get_resource_tags from checkov.common.runners.base_runner import strtobool +from checkov.terraform.tf_parser import TFParser if TYPE_CHECKING: - from checkov.common.typing import _SkippedCheck, LibraryGraph + from checkov.common.typing import _SkippedCheck, LibraryGraph, LibraryGraphConnector _TerraformContext: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]" _TerraformDefinitions: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]" @@ -42,6 +46,18 @@ class Runner(BaseTerraformRunner[_TerraformDefinitions, _TerraformContext, TFDefinitionKey]): check_type = CheckType.TERRAFORM # noqa: CCE003 # a static attribute + def __init__( + self, + parser: TFParser | None = None, + db_connector: LibraryGraphConnector | None = None, + external_registries: list[BaseRegistry] | None = None, + source: str = GraphSource.TERRAFORM, + graph_class: type[TerraformLocalGraph] = TerraformLocalGraph, + graph_manager: TerraformGraphManager | None = None, + ) -> None: + super().__init__(parser, db_connector, external_registries, source, graph_class, graph_manager) + self.all_graphs: list[tuple[LibraryGraph, str]] = [] + def run( self, root_folder: str | None, @@ -57,8 +73,7 @@ def run( report = Report(self.check_type) parsing_errors: dict[str, Exception] = {} self.load_external_checks(external_checks_dir) - local_graph = None - all_graphs: list[LibraryGraph] = [] + local_graphs = None if self.context is None or self.definitions is None or self.breadcrumbs is None: self.definitions = {} logging.info("Scanning root folder and producing fresh tf_definitions and context") @@ -76,11 +91,11 @@ def run( vars_files=runner_filter.var_files, create_graph=CHECKOV_CREATE_GRAPH, ) - local_graph = [] - for graph, definitions in graphs_with_definitions: + local_graphs = [] + for graph, definitions, subgraph_path in graphs_with_definitions: for definition in definitions: self.definitions.update(definition) - local_graph.append(graph) + local_graphs.append((subgraph_path, graph)) else: single_graph, self.definitions = self.graph_manager.build_graph_from_source_directory( source_dir=root_folder, @@ -93,7 +108,7 @@ def run( create_graph=CHECKOV_CREATE_GRAPH, ) # Make graph a list to allow single processing method for all cases - local_graph = [single_graph] + local_graphs = [('', single_graph)] elif files: files = [os.path.abspath(file) for file in files] root_folder = os.path.split(os.path.commonprefix(files))[0] @@ -101,22 +116,20 @@ def run( if CHECKOV_CREATE_GRAPH: if tf_split_graph: - local_graph = self.graph_manager.build_multi_graph_from_definitions( # type:ignore[assignment] # will be fixed after removing 'CHECKOV_CREATE_GRAPH' + local_graphs = self.graph_manager.build_multi_graph_from_definitions( # type:ignore[assignment] # will be fixed after removing 'CHECKOV_CREATE_GRAPH' self.definitions ) else: # local_graph needs to be a list to allow supporting multi graph - local_graph = [self.graph_manager.build_graph_from_definitions(self.definitions)] + local_graphs = [('', self.graph_manager.build_graph_from_definitions(self.definitions))] else: raise Exception("Root directory was not specified, files were not specified") - if CHECKOV_CREATE_GRAPH and local_graph: + if CHECKOV_CREATE_GRAPH and local_graphs: self._update_definitions_and_breadcrumbs( - all_graphs, - local_graph, # type:ignore[arg-type] # will be fixed after removing 'CHECKOV_CREATE_GRAPH' + local_graphs, # type:ignore[arg-type] # will be fixed after removing 'CHECKOV_CREATE_GRAPH' report, - root_folder, - ) + root_folder) else: logging.info("Scanning root folder using existing tf_definitions") if root_folder is None: @@ -129,8 +142,8 @@ def run( report.add_parsing_errors(parsing_errors.keys()) if CHECKOV_CREATE_GRAPH: - if all_graphs: - for igraph_graph in all_graphs: + if self.all_graphs: + for igraph_graph, _ in self.all_graphs: graph_report = self.get_graph_checks_report(root_folder, runner_filter, graph=igraph_graph) merge_reports(report, graph_report) else: @@ -177,17 +190,18 @@ def parse_file(file: str) -> tuple[str, dict[str, Any] | None, dict[str, Excepti parsing_errors.update(file_parsing_errors) def _update_definitions_and_breadcrumbs( - self, all_graphs: list[LibraryGraph], local_graph: list[TerraformLocalGraph], report: Report, root_folder: str + self, local_graphs: list[tuple[str, TerraformLocalGraph]], report: Report, root_folder: str ) -> None: self.definitions = {} self.breadcrumbs = {} - for graph in local_graph: + self.all_graphs = [] + for subgraph_path, graph in local_graphs: for vertex in graph.vertices: if vertex.block_type == BlockType.RESOURCE: vertex_id = vertex.attributes.get(CustomAttributes.TF_RESOURCE_ADDRESS) report.add_resource(f"{vertex.path}:{vertex_id}") igraph_graph = self.graph_manager.save_graph(graph) - all_graphs.append(igraph_graph) + self.all_graphs.append((igraph_graph, subgraph_path)) current_definitions, current_breadcrumbs = convert_graph_vertices_to_tf_definitions( graph.vertices, root_folder,