Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into christophe-papazian/e…
Browse files Browse the repository at this point in the history
…nable_stack_traces_for_iast
  • Loading branch information
christophe-papazian committed Nov 29, 2024
2 parents bb89e5b + b6ff124 commit 2c49910
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 47 deletions.
21 changes: 6 additions & 15 deletions benchmarks/appsec_iast_aspects/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,25 +181,16 @@ modulo_aspect: &modulo_aspect
warmups: 1
function_name: "iast_modulo_aspect"

modulo_aspect_for_bytes: &modulo_aspect
processes: 10
loops: 5
values: 100
warmups: 1
modulo_aspect_for_bytes:
<<: *modulo_aspect
function_name: "iast_modulo_aspect_for_bytes"

modulo_aspect_for_bytes_bytearray: &modulo_aspect
processes: 10
loops: 5
values: 100
warmups: 1
modulo_aspect_for_bytes_bytearray:
<<: *modulo_aspect
function_name: "iast_modulo_aspect_for_bytes_bytearray"

modulo_aspect_for_bytearray_bytearray: &modulo_aspect
processes: 10
loops: 5
values: 100
warmups: 1
modulo_aspect_for_bytearray_bytearray:
<<: *modulo_aspect
function_name: "iast_modulo_aspect_for_bytearray_bytearray"

modulo_noaspect:
Expand Down
3 changes: 3 additions & 0 deletions ddtrace/appsec/_iast/_ast/ast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@
"pkg_resources.",
"pluggy.",
"protobuf.",
"psycopg.", # PostgreSQL adapter for Python (v3)
"_psycopg.", # PostgreSQL adapter for Python (v3)
"psycopg2.", # PostgreSQL adapter for Python (v2)
"pycparser.", # this package is called when a module is imported, propagation is not needed
"pytest.", # Testing framework
"_pytest.",
Expand Down
17 changes: 13 additions & 4 deletions ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import string

from ddtrace.internal.logger import get_logger
from ddtrace.settings.asm import config as asm_config
Expand All @@ -16,7 +17,15 @@

log = get_logger(__name__)

REDACTED_SOURCE_BUFFER = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
REDACTED_SOURCE_BUFFER = string.ascii_letters + string.digits
LEN_SOURCE_BUFFER = len(REDACTED_SOURCE_BUFFER)


def get_redacted_source(length):
full_repeats = length // LEN_SOURCE_BUFFER
remainder = length % LEN_SOURCE_BUFFER
result = REDACTED_SOURCE_BUFFER * full_repeats + REDACTED_SOURCE_BUFFER[:remainder]
return result


class SensitiveHandler:
Expand Down Expand Up @@ -218,7 +227,7 @@ def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources):
if source_index < len(sources):
if not sources[source_index].redacted and self.is_sensible_source(sources[source_index]):
redacted_sources.append(source_index)
sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)]
sources[source_index].pattern = get_redacted_source(len(sources[source_index].value))
sources[source_index].redacted = True

if source_index in redacted_sources:
Expand Down Expand Up @@ -282,7 +291,7 @@ def redact_source(self, sources, redacted_sources, redacted_sources_context, sou
if source_index is not None:
if not sources[source_index].redacted:
redacted_sources.append(source_index)
sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)]
sources[source_index].pattern = get_redacted_source(len(sources[source_index].value))
sources[source_index].redacted = True

if source_index not in redacted_sources_context.keys():
Expand Down Expand Up @@ -334,12 +343,12 @@ def write_redacted_value_part(
sensitive_start = 0
sensitive = _value[sensitive_start : _source_redaction_context["end"] - offset]
index_of_part_value_in_pattern = source.value.find(sensitive)

pattern = (
placeholder[index_of_part_value_in_pattern : index_of_part_value_in_pattern + len(sensitive)]
if index_of_part_value_in_pattern > -1
else placeholder[_source_redaction_context["start"] : _source_redaction_context["end"]]
)

value_parts.append({"redacted": True, "source": source_index, "pattern": pattern})
_value = _value[len(pattern) :]
offset += len(pattern)
Expand Down
44 changes: 23 additions & 21 deletions ddtrace/appsec/_iast/_evidence_redaction/url_sensitive_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,33 @@


log = get_logger(__name__)
AUTHORITY = r"^(?:[^:]+:)?//([^@]+)@"
QUERY_FRAGMENT = r"[?#&]([^=&;]+)=([^?#&]+)"
pattern = re.compile(f"({AUTHORITY})|({QUERY_FRAGMENT})", re.IGNORECASE | re.MULTILINE)
AUTHORITY_PATTERN = re.compile(r"https?://([^@]+)(?=@)", re.IGNORECASE | re.MULTILINE)
QUERY_FRAGMENT_PATTERN = re.compile(r"[?#&]([^=&;]+)=([^?#&]+)", re.IGNORECASE | re.MULTILINE)


def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
try:
ranges = []
regex_result = pattern.search(evidence.value)
def find_authority(ranges, evidence):
regex_result = AUTHORITY_PATTERN.search(evidence.value)
while regex_result is not None:
if isinstance(regex_result.group(1), str):
start = regex_result.start(1)
end = regex_result.start(1) + (len(regex_result.group(1)))
ranges.append({"start": start, "end": end})

while regex_result is not None:
if isinstance(regex_result.group(1), str):
end = regex_result.start() + (len(regex_result.group(0)) - 1)
start = end - len(regex_result.group(1))
ranges.append({"start": start, "end": end})
regex_result = AUTHORITY_PATTERN.search(evidence.value, regex_result.end())

if isinstance(regex_result.group(3), str):
end = regex_result.start() + len(regex_result.group(0))
start = end - len(regex_result.group(3))
ranges.append({"start": start, "end": end})

regex_result = pattern.search(evidence.value, regex_result.end())
def find_query_fragment(ranges, evidence):
regex_result = QUERY_FRAGMENT_PATTERN.search(evidence.value)
while regex_result is not None:
if isinstance(regex_result.group(2), str):
start = regex_result.start(2)
end = regex_result.start(2) + (len(regex_result.group(2)))
ranges.append({"start": start, "end": end})
regex_result = QUERY_FRAGMENT_PATTERN.search(evidence.value, regex_result.end())

return ranges
except Exception as e:
log.debug(e)

return []
def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
ranges = []
find_authority(ranges, evidence)
find_query_fragment(ranges, evidence)
return ranges
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Code security: This fix resolves a patching issue with `psycopg3`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Code Security: Ensure IAST SSRF vulnerability redacts the url query parameters correctly.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_CMDI
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,5 @@ def test_header_injection_redaction_suite(
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_HEADER_INJECTION
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected
4 changes: 3 additions & 1 deletion tests/appsec/iast/taint_sinks/test_sql_injection_redacted.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@


# FIXME: ideally all these should pass, through the key is that we don't leak any potential PII
_ignore_list = {46, 47}
_ignore_list = {
46,
}


@pytest.mark.parametrize(
Expand Down
46 changes: 40 additions & 6 deletions tests/appsec/iast/taint_sinks/test_ssrf_redacted.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@


@pytest.mark.parametrize(
"evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_SSRF))[0:2]
"evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_SSRF, ignore_list={9, 10}))
)
def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults):
# TODO: fix get_parametrize(VULN_SSRF)[2:] replacements doesn't work correctly with params of SSRF
tainted_object = evidence_input_value = evidence_input.get("value", "")
if evidence_input_value:
tainted_object = _taint_pyobject_multiranges(
Expand All @@ -49,6 +48,7 @@ def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_SSRF
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected


Expand All @@ -71,15 +71,44 @@ def test_ssrf_redact_param(iast_context_defaults):
assert result["vulnerabilities"]
for v in result["vulnerabilities"]:
assert v["evidence"]["valueParts"] == [
{"value": "https://www.domain1.com/"},
{"redacted": True},
{"value": "https://www.domain1.com/?id="},
{"pattern": "abcdefgh", "redacted": True, "source": 0},
{"value": "&param2="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
]


def test_ssrf_redact_params_log_url(iast_context_defaults):
url = (
"http://dovahkiin.dovahkiin.naal.ok/zin/los/vahriin/wahdein-vokul-mahfaeraak.ast-vaal-4ee5-aa01-3de356fd2cd8/snapshots/"
"f7591b64-9839-4be8-a3a9-e7ccf5c0e17e?schemaVersion=c27f6149-36b6-4f3a-bacb-ba4b8db36892"
)
tainted_source = taint_pyobject(pyobject=url, source_name="encoded_forward_api", source_value=url)

ev = Evidence(value=tainted_source)

loc = Location(path="foobar.py", line=35, spanId=123)
v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc)
report = IastSpanReporter(vulnerabilities={v})
report.add_ranges_to_evidence_and_extract_sources(v)
result = report.build_and_scrub_value_parts()

assert result["vulnerabilities"]
for v in result["vulnerabilities"]:
assert v["evidence"]["valueParts"] == [
{
"source": 0,
"value": "http://dovahkiin.dovahkiin.naal.ok/zin/los/vahriin/wahdein-vokul-mahfaeraak.ast-vaal-4ee5-aa01-3de356fd"
"2cd8/snapshots/f7591b64-9839-4be8-a3a9-e7ccf5c0e17e?schemaVersion=",
},
{"pattern": "TUVWXYZ0123456789abcdefghijklmnopqrs", "redacted": True, "source": 0},
]


def test_cmdi_redact_user_password(iast_context_defaults):
user_taint_range = taint_pyobject(pyobject="root", source_name="username", source_value="root")
password_taint_range = taint_pyobject(
Expand Down Expand Up @@ -107,7 +136,12 @@ def test_cmdi_redact_user_password(iast_context_defaults):
assert v["evidence"]["valueParts"] == [
{"value": "https://"},
{"pattern": "abcd", "redacted": True, "source": 0},
{"value": ":"},
{"redacted": True},
{"pattern": "abcdefghijklmnopqrs", "redacted": True, "source": 1},
{"value": "@domain1.com/?id=&param2=value2&param3=value3&param3=value3"},
{"value": "@domain1.com/?id=&param2="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
]

0 comments on commit 2c49910

Please sign in to comment.