Skip to content

Commit

Permalink
fix(sast): fix sast report format (#5811)
Browse files Browse the repository at this point in the history
* fix sast report format

* .

* fix pr comments

* lint

* lint

* ???

* lint

* lint

* on prem remove code block

* lint

* mypy

* mypy

* lint

* fix ut

* fix tests?

* print data

* .

* .

* .

* .

* revert

* mypy

* ?

* mypy
  • Loading branch information
achiar99 authored Dec 10, 2023
1 parent 4a67bce commit 55756d8
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 11 deletions.
65 changes: 61 additions & 4 deletions checkov/common/bridgecrew/platform_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from time import sleep
from types import MethodType
from typing import List, Dict, TYPE_CHECKING, Any, cast, Optional, Union
from typing import List, Dict, TYPE_CHECKING, Any, Set, cast, Optional, Union
from urllib.parse import urlparse

import boto3
Expand All @@ -34,8 +34,9 @@
from checkov.common.bridgecrew.wrapper import persist_assets_results, reduce_scan_reports, persist_checks_results, \
enrich_and_persist_checks_metadata, checkov_results_prefix, persist_run_metadata, _put_json_object, \
persist_logs_stream, persist_graphs, persist_resource_subgraph_maps, persist_reachability_results
from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SCANNABLE_PACKAGE_FILES
from checkov.common.models.consts import SAST_SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SCANNABLE_PACKAGE_FILES
from checkov.common.runners.base_runner import filter_ignored_paths
from checkov.common.sast.consts import SastLanguages
from checkov.common.typing import _CicdDetails, LibraryGraph
from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2
from checkov.common.util.data_structures_utils import merge_dicts
Expand Down Expand Up @@ -64,6 +65,7 @@
from checkov.secrets.coordinator import EnrichedSecret
from mypy_boto3_s3.client import S3Client
from typing_extensions import TypeGuard
from checkov.sast.prisma_models.report import Match

SLEEP_SECONDS = 1

Expand Down Expand Up @@ -129,6 +131,8 @@ def __init__(self) -> None:
self.no_cert_verify: bool = False
self.on_prem: bool = False
self.daemon_process = False # set to 'True' when running in multiprocessing 'spawn' mode
self.scan_dir: List[str] = []
self.scan_file: List[str] = []

def init_instance(self, platform_integration_data: dict[str, Any]) -> None:
"""This is mainly used for recreating the instance without interacting with the platform again"""
Expand Down Expand Up @@ -474,6 +478,7 @@ def persist_repository(
files: list[str] | None = None,
excluded_paths: list[str] | None = None,
included_paths: list[str] | None = None,
sast_languages: Set[SastLanguages] | None = None
) -> None:
"""
Persist the repository found on root_dir path to Bridgecrew's platform. If --file flag is used, only files
Expand All @@ -496,6 +501,12 @@ def persist_repository(
continue
if file_extension in SUPPORTED_FILE_EXTENSIONS or f_name in SUPPORTED_FILES:
files_to_persist.append(FileToPersist(f, os.path.relpath(f, root_dir)))
if sast_languages:
for framwork in sast_languages:
if file_extension in SAST_SUPPORTED_FILE_EXTENSIONS[framwork]:
files_to_persist.append(FileToPersist(f, os.path.relpath(f, root_dir)))
break

else:
for root_path, d_names, f_names in os.walk(root_dir):
# self.excluded_paths only contains the config fetched from the platform.
Expand All @@ -506,10 +517,15 @@ def persist_repository(
_, file_extension = os.path.splitext(file_path)
if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 and file_extension in SCANNABLE_PACKAGE_FILES:
continue
full_file_path = os.path.join(root_path, file_path)
relative_file_path = os.path.relpath(full_file_path, root_dir)
if file_extension in SUPPORTED_FILE_EXTENSIONS or file_path in SUPPORTED_FILES or is_dockerfile(file_path):
full_file_path = os.path.join(root_path, file_path)
relative_file_path = os.path.relpath(full_file_path, root_dir)
files_to_persist.append(FileToPersist(full_file_path, relative_file_path))
if sast_languages:
for framwork in sast_languages:
if file_extension in SAST_SUPPORTED_FILE_EXTENSIONS[framwork]:
files_to_persist.append(FileToPersist(full_file_path, relative_file_path))
break

self.persist_files(files_to_persist)

Expand All @@ -534,6 +550,46 @@ def persist_git_configuration(self, root_dir: str | Path, git_config_folders: li

self.persist_files(files_to_persist)

def adjust_sast_match_location_path(self, match: Match) -> None:
for dir in self.scan_dir:
if not match.location.path.startswith(dir):
continue
match.location.path = match.location.path.replace(dir, self.repo_path) # type: ignore
return
for file in self.scan_file:
if match.location.path != file:
continue
file_dir = '/'.join(match.location.path.split('/')[0:-1])
match.location.path = match.location.path.replace(file_dir, self.repo_path) # type: ignore
return

@staticmethod
def _delete_code_block_from_sast_report(report: Dict[str, Any]) -> None:
if isinstance(report, dict):
for key, value in report.items():
if key == 'code_block':
report[key] = ''
BcPlatformIntegration._delete_code_block_from_sast_report(value)
if isinstance(report, list):
for item in report:
BcPlatformIntegration._delete_code_block_from_sast_report(item)

def persist_sast_scan_results(self, reports: List[Report]) -> None:
sast_scan_reports = {}
for report in reports:
if not report.check_type.startswith('sast'):
continue
if not report.sast_report: # type: ignore
continue
for _, match_by_check in report.sast_report.rule_match.items(): # type: ignore
for _, match in match_by_check.items():
for m in match.matches:
self.adjust_sast_match_location_path(m)
sast_scan_reports[report.check_type] = report.sast_report.model_dump(mode='json') # type: ignore
if self.on_prem:
BcPlatformIntegration._delete_code_block_from_sast_report(sast_scan_reports)
persist_checks_results(sast_scan_reports, self.s3_client, self.bucket, self.repo_path) # type: ignore

def persist_scan_results(self, scan_reports: list[Report]) -> None:
"""
Persist checkov's scan result into bridgecrew's platform.
Expand Down Expand Up @@ -1068,6 +1124,7 @@ def _upload_run(self, args: argparse.Namespace, scan_reports: list[Report]) -> N
print(Style.BRIGHT + colored("Metadata upload complete", 'green',
attrs=['bold']) + Style.RESET_ALL)
self.persist_scan_results(scan_reports)
self.persist_sast_scan_results(scan_reports)
print(Style.BRIGHT + colored("Report upload complete", 'green',
attrs=['bold']) + Style.RESET_ALL)
self.commit_repository(args.branch)
Expand Down
6 changes: 6 additions & 0 deletions checkov/common/bridgecrew/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
FILE_NAME_IGRAPH = 'graph_igraph.json'
FILE_NAME_RUSTWORKX = 'graph_rustworkx.json'

SAST_FRAMEWORK_PREFIX = 'sast'


def _is_scanned_file(file: str) -> bool:
file_ending = os.path.splitext(file)[1]
Expand Down Expand Up @@ -80,6 +82,8 @@ def reduce_scan_reports(scan_reports: list[Report], on_prem: Optional[bool] = Fa
reduced_scan_reports: dict[str, _ReducedScanReport] = {}
for report in scan_reports:
check_type = report.check_type
if check_type.startswith(SAST_FRAMEWORK_PREFIX):
continue
reduced_keys = secrets_check_reduced_keys if check_type == CheckType.SECRETS else check_reduced_keys
if on_prem:
reduced_keys = tuple(k for k in reduced_keys if k != 'code_block')
Expand Down Expand Up @@ -167,6 +171,8 @@ def enrich_and_persist_checks_metadata(
checks_metadata_paths: dict[str, dict[str, str]] = {}
for scan_report in scan_reports:
check_type = scan_report.check_type
if check_type.startswith(SAST_FRAMEWORK_PREFIX):
continue
checks_metadata_object = _extract_checks_metadata(scan_report, full_repo_object_key, on_prem)
checks_metadata_object_path = f'{full_repo_object_key}/{checkov_results_prefix}/{check_type}/checks_metadata.json'
dpath.new(checks_metadata_paths, f"{check_type}/checks_metadata_path", checks_metadata_object_path)
Expand Down
8 changes: 8 additions & 0 deletions checkov/common/models/consts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re

from checkov.common.sast.consts import SastLanguages

SUPPORTED_FILE_EXTENSIONS = [".tf", ".yml", ".yaml", ".json", ".template", ".bicep", ".hcl"]
SUPPORTED_PACKAGE_FILES = {
"bower.json",
Expand All @@ -22,6 +24,12 @@

SCANNABLE_PACKAGE_FILES = SUPPORTED_PACKAGE_FILES | DEPENDENCY_TREE_SUPPORTED_FILES

SAST_SUPPORTED_FILE_EXTENSIONS = {
SastLanguages.JAVA: ['.java'],
SastLanguages.JAVASCRIPT: ['.js'],
SastLanguages.PYTHON: ['.py']
}

ANY_VALUE = "CKV_ANY"
DOCKER_IMAGE_REGEX = re.compile(r'(?:[^\s\/]+\/)?([^\s:]+):?([^\s]*)')
access_key_pattern = re.compile("(?<![A-Z0-9])[A-Z0-9]{20}(?![A-Z0-9])") # nosec
Expand Down
12 changes: 10 additions & 2 deletions checkov/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, List
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, List, Set

import argcomplete
import configargparse
Expand Down Expand Up @@ -509,6 +509,7 @@ def run(self, banner: str = checkov_banner, tool: str = checkov_tool, source_typ

if self.config.directory:
exit_codes = []
bc_integration.scan_dir = self.config.directory
for root_folder in self.config.directory:
absolute_root_folder = os.path.abspath(root_folder)
if not os.path.exists(root_folder):
Expand Down Expand Up @@ -544,6 +545,7 @@ def run(self, banner: str = checkov_banner, tool: str = checkov_tool, source_typ
included_paths=included_paths,
git_configuration_folders=git_configuration_folders,
sca_supported_ir_report=runner_registry.sca_supported_ir_report,
sast_languages=runner_filter.sast_languages
)

if self.config.create_baseline:
Expand Down Expand Up @@ -601,8 +603,9 @@ def run(self, banner: str = checkov_banner, tool: str = checkov_tool, source_typ
if not self.config.skip_results_upload and not bc_integration.s3_setup_failed:
try:
if not bc_integration.on_prem:
bc_integration.persist_repository(os.path.dirname(self.config.dockerfile_path), files=files)
bc_integration.persist_repository(os.path.dirname(self.config.dockerfile_path), files=files, sast_languages=runner_filter.sast_languages)
bc_integration.persist_scan_results(self.scan_reports)
bc_integration.persist_sast_scan_results(self.scan_reports)
bc_integration.persist_image_scan_results(sca_runner.raw_report, self.config.dockerfile_path,
self.config.docker_image,
self.config.branch)
Expand All @@ -625,6 +628,7 @@ def run(self, banner: str = checkov_banner, tool: str = checkov_tool, source_typ
exit_code = self.print_results(runner_registry=runner_registry, url=self.url)
return exit_code
elif self.config.file:
bc_integration.scan_file = self.config.file
runner_registry.filter_runners_for_files(self.config.file)
self.scan_reports = runner_registry.run(
external_checks_dir=external_checks_dir,
Expand Down Expand Up @@ -663,6 +667,7 @@ def run(self, banner: str = checkov_banner, tool: str = checkov_tool, source_typ
files=files,
excluded_paths=runner_filter.excluded_paths,
git_configuration_folders=git_configuration_folders,
sast_languages=runner_filter.sast_languages
)

should_run_contributor_metrics = bc_integration.bc_api_key and self.config.repo_id and self.config.prisma_api_url
Expand Down Expand Up @@ -738,6 +743,7 @@ def upload_results(
included_paths: list[str] | None = None,
git_configuration_folders: list[str] | None = None,
sca_supported_ir_report: Report | None = None,
sast_languages: Set[SastLanguages] | None = None
) -> None:
"""Upload scan results and other relevant files"""

Expand All @@ -749,13 +755,15 @@ def upload_results(
files=files,
excluded_paths=excluded_paths,
included_paths=included_paths,
sast_languages=sast_languages
)
if git_configuration_folders:
bc_integration.persist_git_configuration(os.getcwd(), git_configuration_folders)
if sca_supported_ir_report:
scan_reports_to_upload = [report for report in self.scan_reports if report.check_type != 'sca_image']
scan_reports_to_upload.append(sca_supported_ir_report)
bc_integration.persist_scan_results(scan_reports_to_upload)
bc_integration.persist_sast_scan_results(scan_reports_to_upload)
bc_integration.persist_assets_scan_results(self.sast_data.imports_data)
bc_integration.persist_reachability_scan_results(self.sast_data.reachability_report)
bc_integration.persist_run_metadata(self.run_metadata)
Expand Down
13 changes: 9 additions & 4 deletions checkov/sast/engines/prisma_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,9 @@ def create_report(self, prisma_report: PrismaReport) -> List[SastReport]:
logging.debug(prisma_report.profiler)
reports: List[SastReport] = []
for lang, checks in prisma_report.rule_match.items():
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang)
sast_report = PrismaReport(rule_match={lang: checks}, errors=prisma_report.errors, profiler=prisma_report.profiler,
run_metadata=prisma_report.run_metadata, imports={}, reachability_report={})
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang, sast_report)
for check_id, match_rule in checks.items():
check_name = match_rule.check_name
check_cwe = match_rule.check_cwe
Expand Down Expand Up @@ -326,14 +328,15 @@ def create_report(self, prisma_report: PrismaReport) -> List[SastReport]:
if report_parsing_errors:
report.add_parsing_errors(report_parsing_errors)
reports.append(report)

for lang in prisma_report.imports:
for report in reports:
if report.language == lang:
report.sast_imports = prisma_report.imports[lang]
break
else:
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang)
sast_report = PrismaReport(rule_match={lang: checks}, errors=prisma_report.errors, profiler=prisma_report.profiler,
run_metadata=prisma_report.run_metadata, imports={}, reachability_report={})
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang, sast_report)
report.sast_imports = prisma_report.imports[lang]
reports.append(report)

Expand All @@ -343,7 +346,9 @@ def create_report(self, prisma_report: PrismaReport) -> List[SastReport]:
report.sast_reachability = prisma_report.reachability_report[lang]
break
else:
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang)
sast_report = PrismaReport(rule_match={lang: checks}, errors=prisma_report.errors, profiler=prisma_report.profiler,
run_metadata=prisma_report.run_metadata, imports={}, reachability_report={})
report = SastReport(f'{self.check_type.lower()}_{lang.value}', prisma_report.run_metadata, lang, sast_report)
report.sast_reachability = prisma_report.reachability_report[lang]
reports.append(report)
return reports
Expand Down
3 changes: 3 additions & 0 deletions checkov/sast/prisma_models/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ class MatchMetadata(BaseModel):


class Match(BaseModel):
exact_hash: str # noqa: CCE003
structure_hash: str # noqa: CCE003
location: MatchLocation # noqa: CCE003
metadata: MatchMetadata # noqa: CCE003
minimized_ast: str # noqa: CCE003


class RuleMatch(BaseModel):
Expand Down
4 changes: 3 additions & 1 deletion checkov/sast/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@

from checkov.common.output.report import Report
from checkov.common.sast.consts import POLICIES_ERRORS, POLICIES_ERRORS_COUNT, SOURCE_FILES_COUNT, POLICY_COUNT, SastLanguages
from checkov.sast.prisma_models.report import PrismaReport


class SastReport(Report):
def __init__(self, check_type: str, metadata: Dict[str, Optional[Union[str, int, List[str]]]], language: SastLanguages):
def __init__(self, check_type: str, metadata: Dict[str, Optional[Union[str, int, List[str]]]], language: SastLanguages, sast_report: Optional[PrismaReport] = None):
super().__init__(check_type)
self.metadata = metadata
self.language: SastLanguages = language
self.sast_imports: Dict[str, Any] = {}
self.sast_reachability: Dict[str, Any] = {}
self.sast_report: Optional[PrismaReport] = sast_report

def get_summary(self) -> Dict[str, Union[int, str]]:
base_summary: Dict[str, Union[int, str]] = super().get_summary()
Expand Down

0 comments on commit 55756d8

Please sign in to comment.