Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

platform(terraform): upload tf sub graphs #5596

Merged
merged 11 commits into from
Oct 1, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions checkov/common/bridgecrew/platform_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from os import path
from pathlib import Path
from time import sleep
from typing import List, Dict, TYPE_CHECKING, Any, cast
from typing import List, Dict, TYPE_CHECKING, Any, cast, Tuple, Optional

import boto3
import dpath
Expand Down Expand Up @@ -558,7 +558,7 @@ def persist_logs_stream(self, logs_stream: StringIO) -> None:
log_path = f'{self.support_repo_path}/checkov_results' if self.support_repo_path == self.repo_path else self.support_repo_path
persist_logs_stream(logs_stream, self.s3_client, self.support_bucket, log_path)

def persist_graphs(self, graphs: dict[str, DiGraph | Graph], absolute_root_folder: str = '') -> None:
def persist_graphs(self, graphs: dict[str, list[Tuple[DiGraph | Graph, Optional[str]]]], absolute_root_folder: str = '') -> None:
YaaraVerner marked this conversation as resolved.
Show resolved Hide resolved
if not self.use_s3_integration or not self.s3_client:
return
if not self.bucket or not self.repo_path:
Expand Down
15 changes: 8 additions & 7 deletions checkov/common/bridgecrew/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import itertools
from concurrent import futures
from io import StringIO
from typing import Any, TYPE_CHECKING
from typing import Any, TYPE_CHECKING, Tuple, Optional
from collections import defaultdict

import dpath
Expand Down Expand Up @@ -146,9 +146,9 @@ def enrich_and_persist_checks_metadata(
return checks_metadata_paths


def persist_graphs(graphs: dict[str, DiGraph | Graph], s3_client: S3Client, bucket: str, full_repo_object_key: str,
def persist_graphs(graphs: dict[str, list[Tuple[DiGraph | Graph, Optional[str]]]], s3_client: S3Client, bucket: str, full_repo_object_key: str,
timeout: int, absolute_root_folder: str = '') -> None:
def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder: str = '') -> None:
def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder: str = '', subgraph_path: Optional[str] = None) -> None:
if isinstance(graph, DiGraph):
json_obj = node_link_data(graph)
graph_file_name = 'graph_networkx.json'
Expand All @@ -158,7 +158,8 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder
else:
logging.error(f"unsupported graph type '{graph.__class__.__name__}'")
return
s3_key = f'{graphs_repo_object_key}/{check_type}/{graph_file_name}'
multi_graph_addition = (f"multi-graph/{subgraph_path}" if subgraph_path is not None else '').rstrip("/")
s3_key = os.path.join(graphs_repo_object_key, check_type, multi_graph_addition, graph_file_name)
try:
_put_json_object(s3_client, json_obj, bucket, s3_key)
except Exception:
Expand All @@ -168,9 +169,9 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder

with futures.ThreadPoolExecutor() as executor:
futures.wait(
[executor.submit(_upload_graph, check_type, graph, absolute_root_folder) for
check_type, graph in graphs.items()],
[executor.submit(_upload_graph, check_type, graph, absolute_root_folder, subgraph_path) for
check_type, graphs in graphs.items() for graph, subgraph_path in graphs],
return_when=futures.FIRST_EXCEPTION,
timeout=timeout
)
logging.info(f"Done persisting {len(graphs)} graphs")
logging.info(f"Done persisting {len(list(itertools.chain(*graphs.values())))} graphs")
30 changes: 19 additions & 11 deletions checkov/common/runners/runner_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import defaultdict
from collections.abc import Iterable
from pathlib import Path
from typing import List, Dict, Any, Optional, cast, TYPE_CHECKING, Type
from typing import List, Dict, Any, Optional, cast, TYPE_CHECKING, Type, Tuple

from typing_extensions import Literal

Expand All @@ -37,7 +37,7 @@
from checkov.common.output.spdx import SPDX
from checkov.common.parallelizer.parallel_runner import parallel_runner
from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds
from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds, LibraryGraph
from checkov.common.util import data_structures_utils
from checkov.common.util.banner import tool as tool_name
from checkov.common.util.data_structures_utils import pickle_deepcopy
Expand All @@ -53,8 +53,6 @@
from checkov.common.output.baseline import Baseline
from checkov.common.runners.base_runner import BaseRunner # noqa
from checkov.runner_filter import RunnerFilter
from igraph import Graph
from networkx import DiGraph

CONSOLE_OUTPUT = "console"
CHECK_BLOCK_TYPES = frozenset(["resource", "data", "provider", "module"])
Expand Down Expand Up @@ -97,7 +95,7 @@ def __init__(
self._check_type_to_report_map: dict[str, Report] = {} # used for finding reports with the same check type
self.licensing_integration = licensing_integration # can be maniuplated by unit tests
self.secrets_omitter_class = secrets_omitter_class
self.check_type_to_graph: dict[str, Graph | DiGraph] = {}
self.check_type_to_graph: dict[str, list[Tuple[LibraryGraph, Optional[str]]]] = {}
for runner in runners:
if isinstance(runner, image_runner):
runner.image_referencers = self.image_referencing_runners
Expand Down Expand Up @@ -126,7 +124,7 @@ def run(
# This is the only runner, so raise a clear indication of failure
raise ModuleNotEnabledError(f'The framework "{runner_check_type}" is part of the "{self.licensing_integration.get_subscription_for_runner(runner_check_type).name}" module, which is not enabled in the platform')
else:
def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, DiGraph | Graph | None]:
def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, Optional[list[Tuple[LibraryGraph, Optional[str]]]]]:
report = runner.run(
root_folder=root_folder,
external_checks_dir=external_checks_dir,
Expand All @@ -140,7 +138,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
report = Report(check_type=runner.check_type)

if runner.graph_manager:
return report, runner.check_type, runner.graph_manager.get_reader_endpoint()
return report, runner.check_type, self.extract_graphs_from_runner(runner)
return report, None, None

valid_runners = []
Expand Down Expand Up @@ -174,10 +172,10 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
full_check_type_to_graph = {}
for result in parallel_runner_results:
if result is not None:
report, check_type, graph = result
report, check_type, graphs = result
reports.append(report)
if check_type is not None and graph is not None:
full_check_type_to_graph[check_type] = graph
if check_type is not None and graphs is not None:
full_check_type_to_graph[check_type] = graphs
self.check_type_to_graph = full_check_type_to_graph

merged_reports = self._merge_reports(reports)
Expand All @@ -192,7 +190,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
self._handle_report(scan_report, repo_root_for_plan_enrichment)

if not self.check_type_to_graph:
self.check_type_to_graph = {runner.check_type: runner.graph_manager.get_reader_endpoint() for runner
self.check_type_to_graph = {runner.check_type: self.extract_graphs_from_runner(runner) for runner
in self.runners if runner.graph_manager}
return self.scan_reports

Expand Down Expand Up @@ -750,3 +748,13 @@ def extract_git_info_from_account_id(account_id: str) -> tuple[str, str]:
git_org, git_repository = "", ""

return git_org, git_repository

@staticmethod
def extract_graphs_from_runner(runner: _BaseRunner) -> List[Tuple[LibraryGraph, Optional[str]]]:
YaaraVerner marked this conversation as resolved.
Show resolved Hide resolved
# exist only for terraform
all_graphs = getattr(runner, 'all_graphs', None)
if all_graphs:
return all_graphs # type:ignore[no-any-return]
elif runner.graph_manager:
return [(runner.graph_manager.get_reader_endpoint(), None)]
return []
7 changes: 3 additions & 4 deletions checkov/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import platform
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Tuple, Optional

import argcomplete
import configargparse
Expand Down Expand Up @@ -45,6 +45,7 @@
from checkov.common.bridgecrew.check_type import checkov_runners, CheckType
from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
from checkov.common.runners.runner_registry import RunnerRegistry
from checkov.common.typing import LibraryGraph
from checkov.common.util import prompt
from checkov.common.util.banner import banner as checkov_banner, tool as checkov_tool
from checkov.common.util.config_utils import get_default_config_paths
Expand Down Expand Up @@ -83,8 +84,6 @@
from checkov.common.output.report import Report
from configargparse import Namespace
from typing_extensions import Literal
from igraph import Graph
from networkx import DiGraph

signal.signal(signal.SIGINT, lambda x, y: sys.exit(''))

Expand Down Expand Up @@ -131,7 +130,7 @@ def __init__(self, argv: list[str] = sys.argv[1:]) -> None:
self.runners = DEFAULT_RUNNERS
self.scan_reports: "list[Report]" = []
self.run_metadata: dict[str, str | list[str]] = {}
self.graphs: dict[str, DiGraph | Graph] = {}
self.graphs: dict[str, list[Tuple[LibraryGraph, Optional[str]]]] = {}
self.url: str | None = None

self.parse_config(argv=argv)
Expand Down
14 changes: 8 additions & 6 deletions checkov/terraform/graph_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def build_multi_graph_from_source_directory(
external_modules_download_path: str = DEFAULT_EXTERNAL_MODULES_DIR,
vars_files: list[str] | None = None,
create_graph: bool = True,
) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]]:
) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]]:
logging.info("Parsing HCL files in source dir to multi graph")
modules_with_definitions = self.parser.parse_multi_graph_hcl_module(
source_dir=source_dir,
Expand All @@ -45,13 +45,15 @@ def build_multi_graph_from_source_directory(
create_graph=create_graph,
)

graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]] = []
graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]] = []
for module, tf_definitions in modules_with_definitions:
if create_graph and module:
logging.info("Building graph from parsed module")
local_graph = local_graph_class(module)
local_graph.build_graph(render_variables=render_variables)
graphs.append((local_graph, tf_definitions))
subgraph_abs_path = module.source_dir
subgraph_path = subgraph_abs_path[subgraph_abs_path.rindex(source_dir) + len(source_dir) + 1:]
graphs.append((local_graph, tf_definitions, subgraph_path))

return graphs

Expand Down Expand Up @@ -111,15 +113,15 @@ def build_graph_from_definitions(
return local_graph

def build_multi_graph_from_definitions(self, definitions: dict[TFDefinitionKey, dict[str, Any]],
render_variables: bool = True) -> list[TerraformLocalGraph]:
render_variables: bool = True) -> list[tuple[str, TerraformLocalGraph]]:
module, tf_definitions = self.parser.parse_hcl_module_from_tf_definitions(definitions, "", self.source)
dirs_to_definitions = self.parser.create_definition_by_dirs(tf_definitions)

graphs: list[TerraformLocalGraph] = []
graphs: list[tuple[str, TerraformLocalGraph]] = []
for source_path, dir_definitions in dirs_to_definitions.items():
module, parsed_tf_definitions = self.parser.parse_hcl_module_from_multi_tf_definitions(dir_definitions, source_path, self.source)
local_graph = TerraformLocalGraph(module)
local_graph.build_graph(render_variables=render_variables)
graphs.append(local_graph)
graphs.append((source_path, local_graph))

return graphs
52 changes: 33 additions & 19 deletions checkov/terraform/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from typing_extensions import TypeAlias # noqa[TC002]

from checkov.common.bridgecrew.check_type import CheckType
from checkov.common.graph.checks_infra.registry import BaseRegistry
from checkov.common.graph.graph_builder.consts import GraphSource
from checkov.common.output.extra_resource import ExtraResource
from checkov.common.parallelizer.parallel_runner import parallel_runner
from checkov.common.output.graph_record import GraphRecord
Expand All @@ -20,6 +22,7 @@
from checkov.common.util.secrets import omit_secret_value_from_checks
from checkov.runner_filter import RunnerFilter
from checkov.terraform.base_runner import BaseTerraformRunner
from checkov.terraform.graph_manager import TerraformGraphManager
from checkov.terraform.modules.module_objects import TFDefinitionKey, TFModule
from checkov.terraform.context_parsers.registry import parser_registry
from checkov.terraform.evaluation.base_variable_evaluation import BaseVariableEvaluation
Expand All @@ -29,9 +32,10 @@
from checkov.terraform.graph_builder.local_graph import TerraformLocalGraph
from checkov.terraform.tag_providers import get_resource_tags
from checkov.common.runners.base_runner import strtobool
from checkov.terraform.tf_parser import TFParser

if TYPE_CHECKING:
from checkov.common.typing import _SkippedCheck, LibraryGraph
from checkov.common.typing import _SkippedCheck, LibraryGraph, LibraryGraphConnector

_TerraformContext: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]"
_TerraformDefinitions: TypeAlias = "dict[TFDefinitionKey, dict[str, Any]]"
Expand All @@ -42,6 +46,18 @@
class Runner(BaseTerraformRunner[_TerraformDefinitions, _TerraformContext, TFDefinitionKey]):
check_type = CheckType.TERRAFORM # noqa: CCE003 # a static attribute

def __init__(
self,
parser: TFParser | None = None,
db_connector: LibraryGraphConnector | None = None,
external_registries: list[BaseRegistry] | None = None,
source: str = GraphSource.TERRAFORM,
graph_class: type[TerraformLocalGraph] = TerraformLocalGraph,
graph_manager: TerraformGraphManager | None = None,
) -> None:
super().__init__(parser, db_connector, external_registries, source, graph_class, graph_manager)
self.all_graphs: list[tuple[LibraryGraph, str]] = []

def run(
self,
root_folder: str | None,
Expand All @@ -57,8 +73,7 @@ def run(
report = Report(self.check_type)
parsing_errors: dict[str, Exception] = {}
self.load_external_checks(external_checks_dir)
local_graph = None
all_graphs: list[LibraryGraph] = []
local_graphs = None
if self.context is None or self.definitions is None or self.breadcrumbs is None:
self.definitions = {}
logging.info("Scanning root folder and producing fresh tf_definitions and context")
Expand All @@ -76,11 +91,11 @@ def run(
vars_files=runner_filter.var_files,
create_graph=CHECKOV_CREATE_GRAPH,
)
local_graph = []
for graph, definitions in graphs_with_definitions:
local_graphs = []
for graph, definitions, subgraph_path in graphs_with_definitions:
for definition in definitions:
self.definitions.update(definition)
local_graph.append(graph)
local_graphs.append((subgraph_path, graph))
else:
single_graph, self.definitions = self.graph_manager.build_graph_from_source_directory(
source_dir=root_folder,
Expand All @@ -93,30 +108,28 @@ def run(
create_graph=CHECKOV_CREATE_GRAPH,
)
# Make graph a list to allow single processing method for all cases
local_graph = [single_graph]
local_graphs = [('', single_graph)]
elif files:
files = [os.path.abspath(file) for file in files]
root_folder = os.path.split(os.path.commonprefix(files))[0]
self._parse_files(files, parsing_errors)

if CHECKOV_CREATE_GRAPH:
if tf_split_graph:
local_graph = self.graph_manager.build_multi_graph_from_definitions( # type:ignore[assignment] # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
local_graphs = self.graph_manager.build_multi_graph_from_definitions( # type:ignore[assignment] # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
self.definitions
)
else:
# local_graph needs to be a list to allow supporting multi graph
local_graph = [self.graph_manager.build_graph_from_definitions(self.definitions)]
local_graphs = [('', self.graph_manager.build_graph_from_definitions(self.definitions))]
else:
raise Exception("Root directory was not specified, files were not specified")

if CHECKOV_CREATE_GRAPH and local_graph:
if CHECKOV_CREATE_GRAPH and local_graphs:
self._update_definitions_and_breadcrumbs(
all_graphs,
local_graph, # type:ignore[arg-type] # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
local_graphs, # type:ignore[arg-type] # will be fixed after removing 'CHECKOV_CREATE_GRAPH'
report,
root_folder,
)
root_folder)
else:
logging.info("Scanning root folder using existing tf_definitions")
if root_folder is None:
Expand All @@ -129,8 +142,8 @@ def run(
report.add_parsing_errors(parsing_errors.keys())

if CHECKOV_CREATE_GRAPH:
if all_graphs:
for igraph_graph in all_graphs:
if self.all_graphs:
for igraph_graph, _ in self.all_graphs:
graph_report = self.get_graph_checks_report(root_folder, runner_filter, graph=igraph_graph)
merge_reports(report, graph_report)
else:
Expand Down Expand Up @@ -177,17 +190,18 @@ def parse_file(file: str) -> tuple[str, dict[str, Any] | None, dict[str, Excepti
parsing_errors.update(file_parsing_errors)

def _update_definitions_and_breadcrumbs(
self, all_graphs: list[LibraryGraph], local_graph: list[TerraformLocalGraph], report: Report, root_folder: str
self, local_graphs: list[tuple[str, TerraformLocalGraph]], report: Report, root_folder: str
) -> None:
self.definitions = {}
self.breadcrumbs = {}
for graph in local_graph:
self.all_graphs = []
for subgraph_path, graph in local_graphs:
for vertex in graph.vertices:
if vertex.block_type == BlockType.RESOURCE:
vertex_id = vertex.attributes.get(CustomAttributes.TF_RESOURCE_ADDRESS)
report.add_resource(f"{vertex.path}:{vertex_id}")
igraph_graph = self.graph_manager.save_graph(graph)
all_graphs.append(igraph_graph)
self.all_graphs.append((igraph_graph, subgraph_path))
current_definitions, current_breadcrumbs = convert_graph_vertices_to_tf_definitions(
graph.vertices,
root_folder,
Expand Down