From e6f3dfe1f27b5e37337973599a7fa4cf21ac589e Mon Sep 17 00:00:00 2001 From: Henry Wilde Date: Wed, 3 Apr 2024 12:58:06 +0100 Subject: [PATCH] Make tutorial file paths local (#42) --- docs/tutorials/example-febrl.qmd | 11 +++-------- docs/tutorials/run-through.qmd | 10 ++++------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/docs/tutorials/example-febrl.qmd b/docs/tutorials/example-febrl.qmd index c228a62..5c674cf 100644 --- a/docs/tutorials/example-febrl.qmd +++ b/docs/tutorials/example-febrl.qmd @@ -20,8 +20,6 @@ from recordlinkage.datasets import load_febrl4 from pprl import EmbeddedDataFrame, Embedder, config from pprl.embedder import features as feat - -datadir = config.DIR_DATA_INTERIM ``` ## Load the data @@ -45,9 +43,6 @@ feb4b["true_id"] = ( .iloc[:, 0].astype("int") .to_list() ) - -feb4a.to_csv(os.path.join(datadir, "febrl_data_1.csv")) -feb4b.to_csv(os.path.join(datadir, "febrl_data_2.csv")) ``` ## Create a feature factory @@ -128,9 +123,9 @@ edf2 = embedder.embed(feb4b, colspec=colspec) Store the embedded datasets and their embedder to file. ```{python} -edf1.to_json(os.path.join(datadir, "party1_data.json")) -edf2.to_json(os.path.join(datadir, "party2_data.json")) -embedder.to_pickle(os.path.join(datadir, "embedder.pkl")) +edf1.to_json("party1_data.json") +edf2.to_json("party2_data.json") +embedder.to_pickle("embedder.pkl") ``` ## Calculate similarity diff --git a/docs/tutorials/run-through.qmd b/docs/tutorials/run-through.qmd index f844207..39201f6 100644 --- a/docs/tutorials/run-through.qmd +++ b/docs/tutorials/run-through.qmd @@ -21,8 +21,6 @@ import pandas as pd from pprl import EmbeddedDataFrame, Embedder, config from pprl.embedder import features as feat - -filestem = config.DIR_DATA_INTERIM ``` ## Data set-up @@ -160,9 +158,9 @@ matching server. For this purpose, it's possible to pickle the entire `Embedder` object. ```{python} -embedder.to_pickle(os.path.join(filestem, "embedder.pkl")) +embedder.to_pickle("embedder.pkl") -embedder_copy = Embedder.from_pickle(os.path.join(filestem, "embedder.pkl")) +embedder_copy = Embedder.from_pickle("embedder.pkl") ``` The copy has the same functionality as the original: @@ -194,9 +192,9 @@ The EDF objects are just a thin wrapper around `pandas.DataFrame` instances, so you can serialise to JSON using the normal methods. ```{python} -edf1.to_json(os.path.join(filestem, "edf1.json")) +edf1.to_json("edf1.json") -edf1_copy = pd.read_json(os.path.join(filestem, "edf1.json")) +edf1_copy = pd.read_json("edf1.json") print(isinstance(edf1_copy,EmbeddedDataFrame)) print(isinstance(edf1_copy,pd.DataFrame))