Skip to content

Commit

Permalink
Merge pull request #21 from datasciencecampus/simplify-imports
Browse files Browse the repository at this point in the history
Simplify imports
  • Loading branch information
matweldon authored Mar 26, 2024
2 parents 6b6c590 + 1dfafe8 commit 8d80158
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 20 deletions.
5 changes: 2 additions & 3 deletions docs/tutorials/linkage_example_febrl.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ import pandas as pd
import numpy as np
from recordlinkage.datasets import load_febrl4
import pprl.embedder.features as feat
from pprl import config
from pprl.embedder.embedder import EmbeddedDataFrame, Embedder
from pprl import EmbeddedDataFrame, Embedder, config
from pprl.embedder import features as feat
datadir = config.DIR_DATA_INTERIM
```
Expand Down
5 changes: 2 additions & 3 deletions docs/tutorials/run-through.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ import os
import pandas as pd
import pprl.embedder.features as feat
from pprl import config
from pprl.embedder.embedder import EmbeddedDataFrame, Embedder
from pprl import EmbeddedDataFrame, Embedder, config
from pprl.embedder import features as feat
filestem = config.DIR_DATA_INTERIM
```
Expand Down
3 changes: 1 addition & 2 deletions scripts/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from google.auth import identity_pool
from google.cloud import storage

from pprl import config, encryption
from pprl.embedder.embedder import EmbeddedDataFrame, Embedder
from pprl import EmbeddedDataFrame, Embedder, config, encryption
from pprl.utils.server_utils import add_private_index

## CLOUD FUNCTIONS
Expand Down
4 changes: 4 additions & 0 deletions src/pprl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Privacy-preserving record linkage via Bloom filter embeddings."""

from .embedder import EmbeddedDataFrame, Embedder

__all__ = ["EmbeddedDataFrame", "Embedder"]
4 changes: 4 additions & 0 deletions src/pprl/embedder/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Tools for generating our Bloom filter embeddings and matchings."""

from .embedder import EmbeddedDataFrame, Embedder

__all__ = ["EmbeddedDataFrame", "Embedder"]
22 changes: 11 additions & 11 deletions test/embedder/test_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pandas as pd
from hypothesis import HealthCheck, given, settings

import pprl.embedder.features as feat
from pprl.embedder import embedder as em
from pprl import EmbeddedDataFrame, Embedder
from pprl.embedder import features as feat

from .strategies import st_matrix_and_indices, st_posdef_matrices

Expand Down Expand Up @@ -36,7 +36,7 @@ def test_calculate_norm(matrix_and_indices):
self_mock = mock.Mock()
self_mock.embedder.scm_matrix = scm_matrix

result = em.EmbeddedDataFrame._calculate_norm(self_mock, bf_indices)
result = EmbeddedDataFrame._calculate_norm(self_mock, bf_indices)

expected = alt_calculate_norm(scm_matrix, bf_indices)

Expand All @@ -57,10 +57,10 @@ def test_update_norms(posdef_matrix):
df = pd.DataFrame(
dict(idx=[x for x in range(nrows)], bf_indices=[list(range(i)) for i in range(nrows)])
)
embedder_mock = mock.Mock(em.Embedder)
embedder_mock = mock.Mock(Embedder)
embedder_mock.scm_matrix = posdef_matrix
embedder_mock.checksum = "1234"
edf = em.EmbeddedDataFrame(df, embedder_mock, update_norms=False)
edf = EmbeddedDataFrame(df, embedder_mock, update_norms=False)
columns0 = list(edf.columns)
_ = edf.update_norms()
columns1 = list(edf.columns)
Expand All @@ -69,7 +69,7 @@ def test_update_norms(posdef_matrix):
columns2 = list(edf.columns)
bf_norms2 = list(edf["bf_norms"])

assert isinstance(_, em.EmbeddedDataFrame)
assert isinstance(_, EmbeddedDataFrame)
assert set(columns1).difference(columns0) == {"bf_norms"}
assert columns1 == columns2
assert bf_norms1 == bf_norms2
Expand All @@ -85,7 +85,7 @@ def test_embed_colspec():
)
)

embedder = em.Embedder(
embedder = Embedder(
feature_factory={
"name": feat.gen_name_features,
"dob": feat.gen_dateofbirth_features,
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_embed_name_sex_features():

colspec = dict(column1="name", column2="sex")

embedder = em.Embedder(
embedder = Embedder(
feature_factory={
"name": feat.gen_name_features,
"sex": feat.gen_sex_features,
Expand Down Expand Up @@ -151,7 +151,7 @@ def test_embed_dob_features():

colspec = dict(column1="dob")

embedder = em.Embedder(
embedder = Embedder(
feature_factory={
"dob": feat.gen_dateofbirth_features,
},
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_embed_all_features():

colspec = dict(column1="name", column2="sex")

embedder = em.Embedder(
embedder = Embedder(
feature_factory={
"name": feat.gen_name_features,
"sex": feat.gen_sex_features,
Expand All @@ -200,7 +200,7 @@ def test_SimilarityArray_match():
df2.index = df2.name
colspec = dict(name="name")

embedder = em.Embedder(
embedder = Embedder(
feature_factory=dict(name=feat.gen_name_features),
ff_args=dict(name=dict(ngram_length=[2])),
bf_size=1024,
Expand Down
2 changes: 1 addition & 1 deletion test/embedder/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from hypothesis import strategies as st
from metaphone import doublemetaphone

import pprl.embedder.features as feat
from pprl.embedder import features as feat

from .strategies import (
NAMES,
Expand Down

0 comments on commit 8d80158

Please sign in to comment.