Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
matweldon committed Mar 26, 2024
0 parents commit e17d67f
Show file tree
Hide file tree
Showing 57 changed files with 5,906 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
PARTY_1_PROJECT=pprl-party-1
PARTY_2_PROJECT=pprl-party-2

WORKLOAD_AUTHOR_PROJECT=pprl-party-1
WORKLOAD_AUTHOR_PROJECT_REGION=europe-west2

WORKLOAD_OPERATOR_PROJECT=pprl-party-2
WORKLOAD_OPERATOR_PROJECT_ZONE=europe-west2-c
17 changes: 17 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[flake8]
exclude =
venv*,
__pycache__,
node_modules,
bower_components,
migrations
max-complexity = 9
max-line-length = 99
# Rule definitions: http://flake8.pycqa.org/en/latest/user/error-codes.html
# D203: 1 blank line required before class docstring
# E203: whitespace before ',', ';', or ':'
# W503: line break before binary operator
extend-ignore =
D203,
E203,
W503
41 changes: 41 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: CI

on:
pull_request:
push:
branches:
- main
- "dev*"

jobs:
build:

runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.10", "3.11"]

steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Update pip and install test dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[test]"
- name: Run tests
run: |
python -m pytest #--cov=pplink_client --cov-fail-under=89 test
- name: Install and run linters
if: |
matrix.python-version == '3.11' &&
matrix.os == 'ubuntu-latest'
run: |
python -m pip install ".[lint]"
python -m ruff check src test
python -m ruff format --check src test
42 changes: 42 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

!.gitkeep

# data
data/*
secrets/*
*.json

# logs
log/*



# environment
.env

# documentation
/.quarto/
/_site/

# tests
.tox/
.coverage
.mypy_cache/
.pytest_cache/
.hypothesis/
.ruff_cache/


# system
.DS_Store
.vscode/

# cache
*/__pycache__/
*/**/__pycache__/

# build
build/
dist/
*.egg-info/
*.egg
42 changes: 42 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
# Run 'pre-commit autoupdate' to update hook versions
repos:
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
name: nbstripout - Strip outputs from notebooks (auto-fixes)
args:
- --extra-keys
- "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId"
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-added-large-files
name: Check for files larger than 5 MB
args: [ "--maxkb=5120" ]
- id: end-of-file-fixer
name: Check for a blank line at the end of scripts (auto-fixes)
exclude: '\.Rd'
- id: trailing-whitespace
name: Check for trailing whitespaces (auto-fixes)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.0
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/Yelp/detect-secrets
rev: v1.4.0
hooks:
- id: detect-secrets
name: detect-secrets - Detect secrets in staged code
args: [ "--baseline", ".secrets.baseline", '--exclude-files', '.*\.(ipynb|qmd)$', ]
exclude: .*/tests/.*|^\.cruft\.json$
- id: detect-secrets
name: 'detect-secrets-jupyter'
args: ['--exclude-files', '.*[^i][^p][^y][^n][^b]$', '--exclude-lines', '"(hash|id|image/\w+)":.*', ]
94 changes: 94 additions & 0 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"version": "1.0.3",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "AzureStorageKeyDetector"
},
{
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
},
{
"name": "CloudantDetector"
},
{
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
},
{
"name": "IbmCosHmacDetector"
},
{
"name": "JwtTokenDetector"
},
{
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "PrivateKeyDetector"
},
{
"name": "SlackDetector"
},
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {},
"generated_at": "2021-06-14T10:43:14Z"
}
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM --platform=linux/amd64 python:3.11-slim-bookworm

ENV PYTHONUNBUFFERED=True
ENV PRODUCTION=1

COPY pyproject.toml .
ADD src/pprl src/pprl
RUN python -m pip install --upgrade pip
RUN python -m pip install --no-cache-dir .

COPY .env .
COPY scripts/server.py .

CMD [ "python", "server.py" ]
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Crown copyright Office for National Statistics

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Empty file added MANIFEST.in
Empty file.
49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# `pprl`: Privacy-Preserving Record Linkage

## Installation

To install the package, clone this repository and install locally via pip:

```shell
git clone https://github.com/datasciencecampus/pprl.git
cd pprl
python -m pip install .
```

> [!NOTE]
> If you are developing on (or contributing to) the project, install the package as editable with the `dev` optional dependencies:
>
> ```shell
> python -m pip install -e ".[dev]"
> ```
We encourage the use of pre-commit hooks.
To install these, run the following command from the root directory of the repository:
```shell
pre-commit install
```
## Running app.py

The app can be used to convert records to bloom filters and download them
locally without doing the following steps. However, if you want to upload the
data to Google Cloud Platfrom the following steps must be taken:

* Recieve the service account private key in a JSON file from the cloud admin and put the file in the the "secrets" folder.
* If you have received the file from the cloud admin skip this step. Otherwise if you are the cloud admin to create this file go to Service Acounts in the Google Cloud console. Click on the service account of interest e.g. "party-1-service-account-name..." or "party-2-service-account-name...". Click on keys, add key and create new key. Click on JSON and press create to download the service account private key.
* Edit the .env file to include the GOOGLE_APPLICATION_CREDENTIALS environmental variable as below:


Ensure you insert your own absolute path to the root directory of the `pprl` project and the name of the service account private key JSON without the enclosing sharp brackets "<>". This absolute path be found entering "pwd" in a UNIX terminal or "cd" in a Windows terminal when in the root directory of the `pprl` project. Ensure the quotations wrap the key value.

```shell
GOOGLE_APPLICATION_CREDENTIALS = "<insert absolute path to pprl>/secrets/<service account private key filename>"
```

## Configuring GCP

```shell
$ gcloud auth login
$ gcloud auth configure-docker <GCP region>-docker.pkg.dev
```
Loading

0 comments on commit e17d67f

Please sign in to comment.