Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for incremental scans #153

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def initialize(
elif os.path.isfile(element):
files_to_scan.append(element)
else:
log.error('detect-secrets: %s: No such file or directory', element)
log.error('detect-secrets: "%s": No such file or directory', element)

if not files_to_scan:
return output
Expand Down Expand Up @@ -262,7 +262,7 @@ def trim_baseline_of_removed_secrets(results, baseline, filelist):
return updated


def merge_baseline(old_baseline, new_baseline):
def merge_baseline(old_baseline, new_baseline, keep_old_results=False):
"""Updates baseline to be compatible with the latest version of
detect-secrets.

Expand All @@ -283,15 +283,17 @@ def merge_baseline(old_baseline, new_baseline):
new_baseline['results'] = merge_results(
old_baseline['results'],
new_baseline['results'],
keep_old_results,
)

return new_baseline


def merge_results(old_results, new_results):
def merge_results(old_results, new_results, keep_old_results=False):
"""Update results in new baseline with audit information from old baseline.


Secrets only appear in old baseline are ignored.
Secrets that only appear in old baseline are ignored. Unless keep_old_results is set True

If secret exists in both old and new baselines, old baseline has audit (is_secret)
info but new baseline does not, then audit info will be copied to new baseline.
Expand All @@ -302,16 +304,29 @@ def merge_results(old_results, new_results):
:type new_results: dict
:param new_results: results to replaced status quo

:type keep_old_results: bool
:param keep_old_results: if set true keep old results in new_results

:rtype: dict
"""
for filename, old_secrets in old_results.items():
if filename not in new_results:
next_iter = False
if not keep_old_results and filename not in new_results:
continue

old_secrets_mapping = {}
for old_secret in old_secrets:
old_secrets_mapping[old_secret['hashed_secret']] = old_secret

if keep_old_results and filename not in new_results:
if filename not in new_results:
new_results[filename] = []
new_results[filename].append(old_secret)
next_iter = True

if next_iter:
continue

for new_secret in new_results[filename]:
if new_secret['hashed_secret'] not in old_secrets_mapping:
# We don't join the two secret sets, because if the newer
Expand Down
19 changes: 19 additions & 0 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,8 @@ def add_arguments(self):
self._add_opt_in_options()
self._add_keyword_exclude()
self._add_ghe_instance()
self._add_keep_old_results()
self._add_path_file()

return self

Expand Down Expand Up @@ -788,3 +790,20 @@ def _add_ghe_instance(self):
type=str,
help='Instance URL for GHE i.e. github.ibm.com',
)

def _add_keep_old_results(self):
self.parser.add_argument(
'--keep-old-results',
action='store_true',
help='Keep files from old result that don\'t appear in the current scan',
)

def _add_path_file(self):
self.parser.add_argument(
'--path-file',
type=str,
help=(
'Read paths from this file.'
'If paths are pass into the cmdline they will be ignored'
),
)
13 changes: 12 additions & 1 deletion detect_secrets/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import errno
import json
import sys

Expand Down Expand Up @@ -178,13 +179,22 @@ def _perform_scan(args, plugins, automaton, word_list_hash):
if args.import_filename:
_add_baseline_to_exclude_files(args)

paths = args.path
if args.path_file:
try:
with open(args.path_file) as file:
paths = [line.rstrip() for line in file]
except FileNotFoundError:
print('Path File not found: {}'.format(args.path_file), file=sys.stderr)
sys.exit(errno.ENOENT)

new_baseline = baseline.initialize(
plugins=plugins,
exclude_files_regex=args.exclude_files,
exclude_lines_regex=args.exclude_lines,
word_list_file=args.word_list_file,
word_list_hash=word_list_hash,
path=args.path,
path=paths,
should_scan_all_files=args.all_files,
output_raw=args.output_raw,
output_verified_false=args.output_verified_false,
Expand All @@ -195,6 +205,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash):
new_baseline = baseline.merge_baseline(
old_baseline,
new_baseline,
args.keep_old_results,
)

return new_baseline
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pytest
pyyaml
responses
tox-pip-extensions
tox>=3.8
tox>=3.8,<4.0
unidiff
ibm_db
boxsdk[jwt]
Expand Down
17 changes: 17 additions & 0 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,23 @@ def test_old_results_have_shifted_subset(self):
],
}

def test_old_results_completely_kept(self):
secretA = self.get_secret()
secretB = self.get_secret()

assert merge_results(
{
'filenameA': [secretA],
},
{
'filenameB': [secretB],
},
True,
) == {
'filenameA': [secretA],
'filenameB': [secretB],
}

def test_old_results_completely_overriden(self):
secretA = self.get_secret()
secretB = self.get_secret()
Expand Down
2 changes: 2 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def test_reads_from_stdin(self, mock_merge_baseline):
mock_merge_baseline.assert_called_once_with(
{'key': 'value'},
Any(dict),
False,
)

def test_reads_old_baseline_from_file(self, mock_merge_baseline):
Expand All @@ -243,6 +244,7 @@ def test_reads_old_baseline_from_file(self, mock_merge_baseline):
mock_merge_baseline.assert_called_once_with(
{'key': 'value'},
Any(dict),
False,
)

def test_reads_non_existed_baseline_from_file(
Expand Down
Loading