Skip to content

Commit

Permalink
Merge pull request #24 from ukgovdatascience/extend-jupyter-hooks-to-…
Browse files Browse the repository at this point in the history
…colab

Extend pre-commit hooks to include Google Colab notebooks
  • Loading branch information
ESKYoung authored Apr 19, 2021
2 parents fffbc28 + fd043f6 commit f3f3516
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 68 deletions.
17 changes: 8 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@ repos:
- id: flake8
args: ["hooks", "tests", "{{ cookiecutter.repo_name }}/src"]
- repo: https://github.com/Yelp/detect-secrets
rev: v0.14.3
rev: v1.0.3
hooks:
- id: detect-secrets
args: ["--baseline", ".secrets.baseline"]
exclude: .*/tests/.*
- repo: https://github.com/aflc/pre-commit-jupyter
rev: v1.1.0
hooks:
- id: jupyter-notebook-cleanup
args:
- --remove-kernel-metadata
- --pin-patterns
- "[keep_output]"
- repo: https://github.com/kynan/nbstripout
rev: 0.3.9
hooks:
- id: nbstripout
args:
- --extra-keys
- "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId"
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
hooks:
Expand Down
63 changes: 45 additions & 18 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
{
"custom_plugin_paths": [],
"exclude": {
"files": null,
"lines": null
},
"generated_at": "2021-01-11T09:41:31Z",
"version": "1.0.3",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "ArtifactoryDetector"
"name": "AzureStorageKeyDetector"
},
{
"base64_limit": 4.5,
"name": "Base64HighEntropyString"
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
Expand All @@ -23,8 +21,8 @@
"name": "CloudantDetector"
},
{
"hex_limit": 3,
"name": "HexHighEntropyString"
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
Expand All @@ -36,12 +34,15 @@
"name": "JwtTokenDetector"
},
{
"keyword_exclude": null,
"name": "KeywordDetector"
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "PrivateKeyDetector"
},
Expand All @@ -51,17 +52,43 @@
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {},
"version": "0.14.3",
"word_list": {
"file": null,
"hash": null
}
"generated_at": "2021-03-04T17:23:56Z"
}
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
cookiecutter
coverage
detect-secrets
flake8
detect-secrets==1.0.3
myst-parser
pre-commit
pytest
Expand Down
17 changes: 8 additions & 9 deletions {{ cookiecutter.repo_name }}/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@ repos:
- id: flake8
args: ["src"]
- repo: https://github.com/Yelp/detect-secrets
rev: v0.14.3
rev: v1.0.3
hooks:
- id: detect-secrets
args: ["--baseline", ".secrets.baseline"]
exclude: .*/tests/.*
- repo: https://github.com/aflc/pre-commit-jupyter
rev: v1.1.0
hooks:
- id: jupyter-notebook-cleanup
args:
- --remove-kernel-metadata
- --pin-patterns
- "[keep_output]"
- repo: https://github.com/kynan/nbstripout
rev: 0.3.9
hooks:
- id: nbstripout
args:
- --extra-keys
- "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId"
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
hooks:
Expand Down
63 changes: 45 additions & 18 deletions {{ cookiecutter.repo_name }}/.secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
{
"custom_plugin_paths": [],
"exclude": {
"files": null,
"lines": null
},
"generated_at": "2021-01-11T09:41:31Z",
"version": "1.0.3",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "ArtifactoryDetector"
"name": "AzureStorageKeyDetector"
},
{
"base64_limit": 4.5,
"name": "Base64HighEntropyString"
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
Expand All @@ -23,8 +21,8 @@
"name": "CloudantDetector"
},
{
"hex_limit": 3,
"name": "HexHighEntropyString"
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
Expand All @@ -36,12 +34,15 @@
"name": "JwtTokenDetector"
},
{
"keyword_exclude": null,
"name": "KeywordDetector"
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "PrivateKeyDetector"
},
Expand All @@ -51,17 +52,43 @@
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {},
"version": "0.14.3",
"word_list": {
"file": null,
"hash": null
}
"generated_at": "2021-03-04T17:23:56Z"
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,16 @@ For this repository, we are using `pre-commit` for a number of purposes:
- Checking for secrets being committed accidentally — see [here](#definition-of-a-secret-according-to-detect-secrets)
for the definition of a "secret";
- Checking for any large files (over 5 MB) being committed; and
- Cleaning Jupyter notebooks, which means removing all outputs and execution counts.
- Cleaning Jupyter notebooks, which means removing all outputs, execution counts, Python kernels, and, for Google
Colaboratory (Colab), stripping out user information.

We have configured `pre-commit` to run automatically on _every commit_. By running on each commit, we ensure that
`pre-commit` will be able to detect all contraventions and keep our repository in a healthy state.

> ⚠️ **No pre-commit hooks will be run on Google Colab notebooks pushed directly to GitHub**. For security reasons, it
> is highly recommended that you manually download your notebook, and commit up locally to ensure pre-commit hooks are
> executed on your changes
## Installation

In order for `pre-commit` to run, action is needed to configure it on your system.
Expand Down Expand Up @@ -76,34 +81,81 @@ contain words that will trip the KeywordDetector plugin; see the `DENYLIST` vari
If `pre-commit` detects any secrets when you try to create a commit, it will detail what it found and where to go to
check the secret.

If the detected secret is a false-positive, you should update the `.secrets.baseline` through the following steps:
If the detected secret is a false positive, there are two options to resolve this, and prevent your commit from being
blocked: [inline allowlisting (recommended)](#inline-allowlisting-recommended) or
[updating `.secrets.baseline`](#updating-secretsbaseline).

In either case, if an actual secret is detected (or a combination of actual secrets and false positives), first remove
the actual secret before following either of these processes.

#### Inline allowlisting (recommended)

To exclude a false positive, add a `pragma` comment such as:

```python
secret = "Password123" # pragma: allowlist secret
```

or

```python
# pragma: allowlist nextline secret
secret = "Password123"
```

If the detected secret is actually a secret (or other sensitive information), remove the secret and re-commit; there is
no need to add any `pragma` comments.

If your commit contains a mixture of false positives and actual secrets, remove the actual secrets first before adding
`pragma` comments to the false positives.

#### Updating `.secrets.baseline`

- Run `detect-secrets scan --update .secrets.baseline` from the root folder in the terminal to index the
false-positive(s);
To exclude a false positive, you can also update the `.secrets.baseline` through the following steps:

- Run `detect-secrets scan --baseline .secrets.baseline` from the root folder in the terminal to index the
false positive(s);
- Next, audit all indexed secrets via `detect-secrets audit .secrets.baseline` (the same as during initial set-up, if a
`.secrets.baseline` doesn't exist); and
- Finally, ensure that you commit the updated `.secrets.baseline` in the same commit as the false-positive(s) it has
- Finally, ensure that you commit the updated `.secrets.baseline` in the same commit as the false positive(s) it has
been updated for.

If the detected secret is actually a secret (or other sensitive information), remove the secret and re-commit. There is
no need to update the `.secrets.baseline` file in this case.

If your commit contains a mixture of false-positives and actual secrets, remove the actual secrets first before
If your commit contains a mixture of false positives and actual secrets, remove the actual secrets first before
updating and auditing the `.secrets.baseline` file.

## Keeping specific Jupyter notebook outputs

It may be necessary or useful to keep certain output cells of a Jupyter notebook, for example charts or graphs
visualising some set of data. To do this, add the following comment at the top of the input block:
visualising some set of data. To do this, according to the documentation for the [`nbstripout`][nbstripout] package,
either:

1. Add a `keep_output` tag to the desired cell; or
2. Add `"keep_output": true` to the desired cell's metadata.

```julia
# [keep_output]
You can access cell tags or metadata in Jupyter by enabling the "Tags" or "Edit Metadata" toolbar
(View > Cell Toolbar > Tags; View > Cell Toolbar > Edit Metadata). For the tags approach, enter `keep_output` in the
text field for each desired cell, and press the "Add tag" button. For the metadata approach, press the "Edit Metadata"
button on each desired cell, and edit the metadata to look like this:

```json
{
"keep_output": true
}
```

This will tell the hook not to strip the resulting output of this cell, allowing it to be committed.
This will tell the hook not to strip the resulting output of the desired cell(s), allowing the output(s) to be
committed.

> ℹ️ Currently (March 2020) there is no way to add tags and/or metadata to Google Colab notebooks. It's strongly
> suggested that you download the Colab as a .ipynb file, and edit tags and/or metadata using Jupyter _before_
> committing the code if you want to keep some outputs.
[detect-secrets]: https://github.com/Yelp/detect-secrets
[detect-secrets-caveats]: https://github.com/Yelp/detect-secrets#caveats
[detect-secrets-keyword-detector]: https://github.com/Yelp/detect-secrets/blob/master/detect_secrets/plugins/keyword.py
[detect-secrets-plugins]: https://github.com/Yelp/detect-secrets#currently-supported-plugins
[nbstripout]: https://github.com/kynan/nbstripout
[pre-commit]: https://pre-commit.com/
3 changes: 1 addition & 2 deletions {{ cookiecutter.repo_name }}/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
detect-secrets
coverage
flake8
detect-secrets==1.0.3
myst-parser
pre-commit
pytest
Expand Down

0 comments on commit f3f3516

Please sign in to comment.