Skip to content

Commit

Permalink
fix redaction so playback on different system can work
Browse files Browse the repository at this point in the history
  • Loading branch information
jeking3 committed Sep 24, 2020
1 parent 93cbdc6 commit 70ad4fe
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 85 deletions.
9 changes: 7 additions & 2 deletions interposer/recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,18 @@ def tearDownClass(cls) -> None:

super().tearDownClass()

def redact(self, secret: str) -> str:
def redact(self, secret: str, replacement: str = "*") -> str:
"""
Redact a secret in playback mode, and keep track of the secret in
recording mode. This allows tests to use secrets quite normally.
Callers just have to remember to run secrets through redact().
It is recommended you provide a unique replacment string (it does
not need to be the same length as the secret), for each secret
to disambiguate them in the same run, especially for secrets of the
same length.
"""
return self.tapedeck.redact(secret)
return self.tapedeck.redact(secret, replacement=replacement)


class TapeDeckCallHandler(CallHandler):
Expand Down
91 changes: 43 additions & 48 deletions interposer/tapedeck.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from typing import Callable
from typing import Dict
from typing import Optional
from typing import Set

import yaml

Expand Down Expand Up @@ -172,13 +171,11 @@ def __init__(self, deck: Path, mode: Mode) -> None:
self.deck = deck
self.file_format = None
self.mode = mode
self.redactions: Set[str] = set()

self._logger = logging.getLogger(__name__)

# call ordinal key (channel name) and value (ordinal number)
self._call_ordinals: Dict[str, int] = {}

self._logger = logging.getLogger(__name__)
self._redactions: Dict[str, str] = dict()
# the open file resource
self._tape = None

Expand Down Expand Up @@ -233,6 +230,8 @@ def open(self) -> None:
if self._tape:
raise TapeDeckOpenError()

self._reset()

if self.mode == Mode.Playback:
self._tape = shelve.open(
str(self.deck), flag="r", protocol=self.PICKLE_PROTOCOL
Expand All @@ -253,12 +252,6 @@ def open(self) -> None:
self._tape[self.LABEL_FILE_FORMAT] = self.CURRENT_FILE_FORMAT
self.file_format = self.CURRENT_FILE_FORMAT

# ensure if close() then open() is called we reset the ordinals to zero
self._call_ordinals = dict()
self.redactions = set()

self._call_ordinals = {}

self._log(
logging.DEBUG,
"open",
Expand All @@ -282,6 +275,8 @@ def close(self) -> None:
f"{self.deck} for {self.mode} using file format {self.file_format}",
)

self._reset()

def record(
self,
context: CallContext,
Expand Down Expand Up @@ -348,29 +343,45 @@ def playback(self, context: CallContext, channel: str = "default") -> Any:
self._log_ex("playback", context, payload.ex)
raise payload.ex

def redact(self, secret: str) -> str:
def redact(self, secret: str, replacement: str = "*") -> str:
"""
Auto-track secrets for redaction.
In recording mode this returns the secret and makes sure the secret
never makes it into the recording; instead a redacted version does,
which is done before call contexts get hashed so the hashed context
is of the redacted context. The results and exceptions are also
redacted before storage.
Tracks the secret for redaction of content being written. For example
if a secret exists inside an object call result, it will be
overwritten with an equal length string containing the replacement,
wrapping around if necessary. The replacement can be a single
character or a string which will get duplicated (or truncated) to
fit to the required size.
It is recommended that for each secret a different replacement is
used, since equal length secrets with the same replacement will yield
equal redacted secrets.
In playback mode the redacted secret is returned so the lookup finds
the redacted context.
During recording this method updates the redaction dict internally
and returns the original secret (so the remainder of the recording
can proceed), however call signatures will have the replacement in
place of the secret. In playback mode when called, this method
will return the replacement that was used during recording so the
call can be found.
"""
if not isinstance(secret, str):
raise TypeError("secret must be a string")
if not isinstance(replacement, str):
raise TypeError("replacement must be a string")
if not secret:
raise AttributeError("secret cannot be an empty string")
if not replacement:
raise AttributeError("replacement cannot be an empty string")

wantlen = len(secret)
obfu = (replacement * math.ceil(wantlen / len(replacement)))[:wantlen]

if self.mode == Mode.Recording:
self.redactions.add(secret)
self._redactions[secret] = obfu
return secret
else:
return self._obscured(secret)
return obfu

def _advance(self, context: CallContext, channel: str) -> str:
"""
Expand Down Expand Up @@ -482,9 +493,8 @@ def _log(self, level: int, category: str, action: str, msg: str) -> None:
Common funnel for logs.
"""
msg = f"TAPE: {category}({action}): {msg}"
for secret in self.redactions:
redacted_secret = self._redact(secret)
msg = msg.replace(secret, redacted_secret)
for secret, replacement in self._redactions.items():
msg = msg.replace(secret, replacement)
self._logger.log(level, msg)

def _log_ex(self, action: str, context: CallContext, ex: Exception) -> None:
Expand Down Expand Up @@ -519,42 +529,21 @@ def _log_result(self, action: str, context: CallContext, result: Any) -> None:
if self._logger.isEnabledFor(self.DEBUG_WITH_RESULTS):
context.meta[self.LABEL_TAPE].pop(self.LABEL_RESULT)

def _obscured(self, secret: str) -> str:
"""
Create a redaction for a secret that is based on the content
of the secret in order to disambiguate it from other redactions
in the same recording, but still be redacted.
"""
wantlen = len(secret)
uniq = sha256(secret.encode()).hexdigest()
if len(uniq) < wantlen:
uniq *= math.ceil(wantlen / len(uniq))
if wantlen > 31:
# bookmarks make redactions easier to spot in logs
uniq = ":::REDACTED:::" + uniq[: wantlen - 28] + ":::REDACTED:::"
elif wantlen > 10:
uniq = ":::" + uniq[: wantlen - 6] + ":::"
else:
uniq = uniq[:wantlen]
return uniq

def _redact(self, entity: Any, return_bytes: bool = False) -> Any:
"""
Redacts any known secrets in an object by converting it to pickled
binary form, then doing a binary secret replacement, then unpickling.
This is used before we hash contexts and before we store results to
make sure there are no secrets in the recording. The secrets must
be fed to us from the consumer (self.redactions).
be fed to us from the consumer (self._redactions).
Raises:
PicklingError if something in the context cannot be pickled.
"""
raw = pickle.dumps(entity, protocol=self.PICKLE_PROTOCOL)
for secret in self.redactions:
binary_secret = secret.encode()
redacted_secret = self._obscured(secret).encode()
raw = raw.replace(binary_secret, redacted_secret)
for secret, replacement in self._redactions.items():
raw = raw.replace(secret.encode(), replacement.encode())
return pickle.loads(raw) if not return_bytes else raw # nosec

def _reduce_call(self, context: CallContext) -> Callable:
Expand All @@ -581,3 +570,9 @@ def _reduce_call(self, context: CallContext) -> Callable:
result = context.call
context.call = sig
return result

def _reset(self) -> None:
""" Clean out stuff at open and close. """
self.file_format = None
self._call_ordinals = dict()
self._redactions = dict()
27 changes: 12 additions & 15 deletions tests/recorder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,15 @@ def tearDownClass(cls) -> None:
Delete the recording file since we tested both modes in one test.
"""
datafile = Path(str(cls.tapedeck.deck) + ".gz")
redactions = cls.tapedeck.redactions
redactions = cls.tapedeck._redactions
super().tearDownClass()
if datafile.exists():
with gzip.open(datafile, "rb") as fin:
raw = fin.read()
for secret in redactions:
for secret, replacement in redactions.items():
assert (
secret.encode() not in raw
), "a secret leaked into the recording"
), "a secret leaked into the recording!"
datafile.unlink()
os.environ.pop("RECORDING")

Expand All @@ -315,25 +315,22 @@ def test_secrets(self):
# post-processing redaction list and after the recording file is closed
# the secrets get redacted; in playback mode this redacts it immediately
# so the playback matches the recording
assert not self.tapedeck.redactions
uut = cls(self.redact(self.token))
assert self.tapedeck.redactions
assert uut.get_token() == self.redact(self.token)
assert not self.tapedeck._redactions
uut = cls(self.redact(self.token, replacement="TOKEN"))
assert self.tapedeck._redactions
assert uut.get_token() == self.redact(self.token, replacement="TOKEN")
assert self.redact(self.token) == self.token

self.tapedeck.close() # applies redaction to recording
self.tapedeck.mode = Mode.Playback
self.tapedeck.open()

assert not self.tapedeck.redactions
uut = cls(self.redact(self.token))
foo = self.redact(self.token)
assert foo.startswith(":::REDACTED:::")
assert foo.endswith(":::REDACTED:::")
assert len(foo) == len(self.token)
assert ":" not in foo[14:-14]
assert not self.tapedeck._redactions
uut = cls(self.redact(self.token, replacement="TOKEN"))
foo = self.redact(self.token, replacement="TOKEN")
assert foo == "TOKENTOKENTOKENTOKENTOKENTOKENTOKENT"

# most importantly, the value that was recorded was also redacted
# most importantly, the object initializer argument was redacted
assert uut.get_token() == foo
with self.assertRaises(RecordedCallNotFoundError):
uut.get_token()
Expand Down
42 changes: 22 additions & 20 deletions tests/tapedeck_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,42 +172,44 @@ def test_recording_too_old(self):
finally:
TapeDeck.EARLIEST_FILE_FORMAT_SUPPORTED = save

def test_redacted_obscured(self):
def test_redact(self):
"""
Tests logic that makes an obscure but unique redaction string.
"""
uut = TapeDeck(self.datadir / "recording", Mode.Playback)
uut = TapeDeck(self.datadir / "recording", Mode.Recording)

with self.assertRaises(TypeError):
uut.redact(None)
with self.assertRaises(TypeError):
uut.redact(42)
with self.assertRaises(AttributeError):
uut.redact("")
with self.assertRaises(TypeError):
uut.redact("foo", replacement=None)
with self.assertRaises(TypeError):
uut.redact("foo", replacement=42)
with self.assertRaises(AttributeError):
uut.redact("foo", replacement="")

for i in range(1, 512):
assert len(uut._obscured("*" * i)) == i
assert uut.redact("foo") == "foo" # recording mode
assert uut._redactions.get("foo") == "***" # default

token = str(uuid.uuid4())
token2 = str(uuid.uuid4())
assert uut.redact("foo", replacement="#") == "foo" # recording mode
assert uut._redactions.get("foo") == "###" # default

# tokens up to length 10 are just hash
assert uut._obscured("A") != uut._obscured("Z")
assert ":" not in uut._obscured("123456789")
assert uut.redact("foo", replacement="BARSAM") == "foo" # recording mode
assert uut._redactions.get("foo") == "BAR" # default

# tokens up to length 32 have small bookmarks for logging standout
foo = uut._obscured("*" * 31)
assert foo.startswith(":::")
assert foo.endswith(":::")
assert ":" not in foo[3:-3]
assert (
uut.redact("fudge brownie", replacement="BARS") == "fudge brownie"
) # recording mode
assert uut._redactions.get("fudge brownie") == "BARSBARSBARSB" # default

# longer secrets have :::REDACTED::: bookmarks and still unique
bar = uut._obscured(token)
assert bar.startswith(":::REDACTED:::")
assert bar.endswith(":::REDACTED:::")
assert ":" not in bar[14:-14]
uut.mode = Mode.Playback

assert uut._obscured(token) != uut._obscured(token2)
assert (
uut.redact("fudge brownie", replacement="BARS") == "BARSBARSBARSB"
) # playback mode

def test_recording_secrets(self):
""" Tests automatic redaction of known secrets and use in playback """
Expand Down

0 comments on commit 70ad4fe

Please sign in to comment.