Skip to content

Commit

Permalink
platform(terraform): upload tf sub graphs (#5596)
Browse files Browse the repository at this point in the history
* implement

* fixes

* fix merge conflicts

* fix lint

* fix lint

* fix lint

* fix types

* fix types

* fix types
  • Loading branch information
YaaraVerner authored Oct 1, 2023
1 parent ebb1fca commit ee564f4
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 57 deletions.
8 changes: 3 additions & 5 deletions checkov/common/bridgecrew/platform_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from os import path
from pathlib import Path
from time import sleep
from typing import List, Dict, TYPE_CHECKING, Any, cast
from typing import List, Dict, TYPE_CHECKING, Any, cast, Optional

import boto3
import dpath
Expand All @@ -37,7 +37,7 @@
from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SCANNABLE_PACKAGE_FILES
from checkov.common.bridgecrew.check_type import CheckType
from checkov.common.runners.base_runner import filter_ignored_paths
from checkov.common.typing import _CicdDetails
from checkov.common.typing import _CicdDetails, LibraryGraph
from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2
from checkov.common.util.data_structures_utils import merge_dicts
from checkov.common.util.dockerfile import is_dockerfile
Expand Down Expand Up @@ -66,8 +66,6 @@
from mypy_boto3_s3.client import S3Client
from requests import Response
from typing_extensions import TypeGuard
from igraph import Graph
from networkx import DiGraph


SLEEP_SECONDS = 1
Expand Down Expand Up @@ -558,7 +556,7 @@ def persist_logs_stream(self, logs_stream: StringIO) -> None:
log_path = f'{self.support_repo_path}/checkov_results' if self.support_repo_path == self.repo_path else self.support_repo_path
persist_logs_stream(logs_stream, self.s3_client, self.support_bucket, log_path)

def persist_graphs(self, graphs: dict[str, DiGraph | Graph], absolute_root_folder: str = '') -> None:
def persist_graphs(self, graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]], absolute_root_folder: str = '') -> None:
if not self.use_s3_integration or not self.s3_client:
return
if not self.bucket or not self.repo_path:
Expand Down
27 changes: 14 additions & 13 deletions checkov/common/bridgecrew/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import itertools
from concurrent import futures
from io import StringIO
from typing import Any, TYPE_CHECKING
from typing import Any, TYPE_CHECKING, Optional
from collections import defaultdict

import dpath
Expand All @@ -23,7 +23,7 @@

from checkov.common.bridgecrew.check_type import CheckType
from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS
from checkov.common.typing import _ReducedScanReport
from checkov.common.typing import _ReducedScanReport, LibraryGraph
from checkov.common.util.file_utils import compress_string_io_tar
from checkov.common.util.igraph_serialization import serialize_to_json
from checkov.common.util.json_utils import CustomJSONEncoder
Expand Down Expand Up @@ -153,14 +153,14 @@ def enrich_and_persist_checks_metadata(


def persist_graphs(
graphs: dict[str, DiGraph | Graph | PyDiGraph[Any, Any]],
s3_client: S3Client,
bucket: str,
full_repo_object_key: str,
timeout: int,
absolute_root_folder: str = '',
graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]],
s3_client: S3Client,
bucket: str,
full_repo_object_key: str,
timeout: int,
absolute_root_folder: str = ''
) -> None:
def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder: str = '') -> None:
def _upload_graph(check_type: str, graph: LibraryGraph, _absolute_root_folder: str = '', subgraph_path: Optional[str] = None) -> None:
if isinstance(graph, DiGraph):
json_obj = node_link_data(graph)
graph_file_name = FILE_NAME_NETWORKX
Expand All @@ -173,7 +173,8 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder
else:
logging.error(f"unsupported graph type '{graph.__class__.__name__}'")
return
s3_key = f'{graphs_repo_object_key}/{check_type}/{graph_file_name}'
multi_graph_addition = (f"multi-graph/{subgraph_path}" if subgraph_path is not None else '').rstrip("/")
s3_key = os.path.join(graphs_repo_object_key, check_type, multi_graph_addition, graph_file_name)
try:
_put_json_object(s3_client, json_obj, bucket, s3_key)
except Exception:
Expand All @@ -183,9 +184,9 @@ def _upload_graph(check_type: str, graph: DiGraph | Graph, _absolute_root_folder

with futures.ThreadPoolExecutor() as executor:
futures.wait(
[executor.submit(_upload_graph, check_type, graph, absolute_root_folder) for
check_type, graph in graphs.items()],
[executor.submit(_upload_graph, check_type, graph, absolute_root_folder, subgraph_path) for
check_type, graphs in graphs.items() for graph, subgraph_path in graphs],
return_when=futures.FIRST_EXCEPTION,
timeout=timeout
)
logging.info(f"Done persisting {len(graphs)} graphs")
logging.info(f"Done persisting {len(list(itertools.chain(*graphs.values())))} graphs")
28 changes: 18 additions & 10 deletions checkov/common/runners/runner_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from checkov.common.output.spdx import SPDX
from checkov.common.parallelizer.parallel_runner import parallel_runner
from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds
from checkov.common.typing import _ExitCodeThresholds, _BaseRunner, _ScaExitCodeThresholds, LibraryGraph
from checkov.common.util import data_structures_utils
from checkov.common.util.banner import tool as tool_name
from checkov.common.util.data_structures_utils import pickle_deepcopy
Expand All @@ -53,8 +53,6 @@
from checkov.common.output.baseline import Baseline
from checkov.common.runners.base_runner import BaseRunner # noqa
from checkov.runner_filter import RunnerFilter
from igraph import Graph
from networkx import DiGraph

CONSOLE_OUTPUT = "console"
CHECK_BLOCK_TYPES = frozenset(["resource", "data", "provider", "module"])
Expand Down Expand Up @@ -97,7 +95,7 @@ def __init__(
self._check_type_to_report_map: dict[str, Report] = {} # used for finding reports with the same check type
self.licensing_integration = licensing_integration # can be maniuplated by unit tests
self.secrets_omitter_class = secrets_omitter_class
self.check_type_to_graph: dict[str, Graph | DiGraph] = {}
self.check_type_to_graph: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {}
for runner in runners:
if isinstance(runner, image_runner):
runner.image_referencers = self.image_referencing_runners
Expand Down Expand Up @@ -126,7 +124,7 @@ def run(
# This is the only runner, so raise a clear indication of failure
raise ModuleNotEnabledError(f'The framework "{runner_check_type}" is part of the "{self.licensing_integration.get_subscription_for_runner(runner_check_type).name}" module, which is not enabled in the platform')
else:
def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, DiGraph | Graph | None]:
def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | None, Optional[list[tuple[LibraryGraph, Optional[str]]]]]:
report = runner.run(
root_folder=root_folder,
external_checks_dir=external_checks_dir,
Expand All @@ -140,7 +138,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
report = Report(check_type=runner.check_type)

if runner.graph_manager:
return report, runner.check_type, runner.graph_manager.get_reader_endpoint()
return report, runner.check_type, self.extract_graphs_from_runner(runner)
return report, None, None

valid_runners = []
Expand Down Expand Up @@ -174,10 +172,10 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
full_check_type_to_graph = {}
for result in parallel_runner_results:
if result is not None:
report, check_type, graph = result
report, check_type, graphs = result
reports.append(report)
if check_type is not None and graph is not None:
full_check_type_to_graph[check_type] = graph
if check_type is not None and graphs is not None:
full_check_type_to_graph[check_type] = graphs
self.check_type_to_graph = full_check_type_to_graph

merged_reports = self._merge_reports(reports)
Expand All @@ -192,7 +190,7 @@ def _parallel_run(runner: _BaseRunner) -> tuple[Report | list[Report], str | Non
self._handle_report(scan_report, repo_root_for_plan_enrichment)

if not self.check_type_to_graph:
self.check_type_to_graph = {runner.check_type: runner.graph_manager.get_reader_endpoint() for runner
self.check_type_to_graph = {runner.check_type: self.extract_graphs_from_runner(runner) for runner
in self.runners if runner.graph_manager}
return self.scan_reports

Expand Down Expand Up @@ -750,3 +748,13 @@ def extract_git_info_from_account_id(account_id: str) -> tuple[str, str]:
git_org, git_repository = "", ""

return git_org, git_repository

@staticmethod
def extract_graphs_from_runner(runner: _BaseRunner) -> list[tuple[LibraryGraph, Optional[str]]]:
# exist only for terraform
all_graphs = getattr(runner, 'all_graphs', None)
if all_graphs:
return all_graphs # type:ignore[no-any-return]
elif runner.graph_manager:
return [(runner.graph_manager.get_reader_endpoint(), None)]
return []
7 changes: 3 additions & 4 deletions checkov/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import platform
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

import argcomplete
import configargparse
Expand Down Expand Up @@ -45,6 +45,7 @@
from checkov.common.bridgecrew.check_type import checkov_runners, CheckType
from checkov.common.resource_code_logger_filter import add_resource_code_filter_to_logger
from checkov.common.runners.runner_registry import RunnerRegistry
from checkov.common.typing import LibraryGraph
from checkov.common.util import prompt
from checkov.common.util.banner import banner as checkov_banner, tool as checkov_tool
from checkov.common.util.config_utils import get_default_config_paths
Expand Down Expand Up @@ -83,8 +84,6 @@
from checkov.common.output.report import Report
from configargparse import Namespace
from typing_extensions import Literal
from igraph import Graph
from networkx import DiGraph

signal.signal(signal.SIGINT, lambda x, y: sys.exit(''))

Expand Down Expand Up @@ -131,7 +130,7 @@ def __init__(self, argv: list[str] = sys.argv[1:]) -> None:
self.runners = DEFAULT_RUNNERS
self.scan_reports: "list[Report]" = []
self.run_metadata: dict[str, str | list[str]] = {}
self.graphs: dict[str, DiGraph | Graph] = {}
self.graphs: dict[str, list[tuple[LibraryGraph, Optional[str]]]] = {}
self.url: str | None = None

self.parse_config(argv=argv)
Expand Down
14 changes: 8 additions & 6 deletions checkov/terraform/graph_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def build_multi_graph_from_source_directory(
external_modules_download_path: str = DEFAULT_EXTERNAL_MODULES_DIR,
vars_files: list[str] | None = None,
create_graph: bool = True,
) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]]:
) -> list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]]:
logging.info("Parsing HCL files in source dir to multi graph")
modules_with_definitions = self.parser.parse_multi_graph_hcl_module(
source_dir=source_dir,
Expand All @@ -45,13 +45,15 @@ def build_multi_graph_from_source_directory(
create_graph=create_graph,
)

graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]]]] = []
graphs: list[tuple[TerraformLocalGraph | None, list[dict[TFDefinitionKey, dict[str, Any]]], str]] = []
for module, tf_definitions in modules_with_definitions:
if create_graph and module:
logging.info("Building graph from parsed module")
local_graph = local_graph_class(module)
local_graph.build_graph(render_variables=render_variables)
graphs.append((local_graph, tf_definitions))
subgraph_abs_path = module.source_dir
subgraph_path = subgraph_abs_path[subgraph_abs_path.rindex(source_dir) + len(source_dir) + 1:]
graphs.append((local_graph, tf_definitions, subgraph_path))

return graphs

Expand Down Expand Up @@ -111,15 +113,15 @@ def build_graph_from_definitions(
return local_graph

def build_multi_graph_from_definitions(self, definitions: dict[TFDefinitionKey, dict[str, Any]],
render_variables: bool = True) -> list[TerraformLocalGraph]:
render_variables: bool = True) -> list[tuple[str, TerraformLocalGraph]]:
module, tf_definitions = self.parser.parse_hcl_module_from_tf_definitions(definitions, "", self.source)
dirs_to_definitions = self.parser.create_definition_by_dirs(tf_definitions)

graphs: list[TerraformLocalGraph] = []
graphs: list[tuple[str, TerraformLocalGraph]] = []
for source_path, dir_definitions in dirs_to_definitions.items():
module, parsed_tf_definitions = self.parser.parse_hcl_module_from_multi_tf_definitions(dir_definitions, source_path, self.source)
local_graph = TerraformLocalGraph(module)
local_graph.build_graph(render_variables=render_variables)
graphs.append(local_graph)
graphs.append((source_path, local_graph))

return graphs
Loading

0 comments on commit ee564f4

Please sign in to comment.