Skip to content

Commit

Permalink
Merge pull request #46 from mindsdb/fix/typeinfer_0018
Browse files Browse the repository at this point in the history
Fix: support for type infer 0.0.18
  • Loading branch information
paxcema authored Dec 25, 2023
2 parents 0aaa84a + e1a20db commit 3784dba
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion dataprep_ml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataprep_ml.base import StatisticalAnalysis, DataAnalysis

__version__ = '0.0.21'
__version__ = '0.0.22'
__name__ = "dataprep_ml"


Expand Down
4 changes: 2 additions & 2 deletions dataprep_ml/insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dateutil.parser import parse as parse_dt

from type_infer.dtype import dtype
from type_infer.infer import infer_types
from type_infer.api import infer_types

from dataprep_ml.cleaners import cleaner
from dataprep_ml.cleaners import _clean_float
Expand All @@ -35,7 +35,7 @@ def analyze_dataset(df: pd.DataFrame, target: Optional[str] = None, args: Option
else:
args['target'] = target

type_information = infer_types(df, args.get('pct_invalid', 2))
type_information = infer_types(df)
stats = statistical_analysis(df, type_information.dtypes, args, type_information.identifiers)
return DataAnalysis(type_information=type_information, statistical_analysis=stats)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dataprep-ml"
version = "0.0.21"
version = "0.0.22"
description = "Automated dataframe analysis for Machine Learning pipelines."
authors = ["MindsDB Inc. <[email protected]>"]
license = "GPL-3.0"
Expand Down
12 changes: 6 additions & 6 deletions tests/integration_tests/test_cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd

from type_infer.infer import infer_types
from type_infer.api import infer_types

from dataprep_ml.cleaners import cleaner
from dataprep_ml.imputers import NumericalImputer, CategoricalImputer
Expand All @@ -12,7 +12,7 @@ class TestCleaners(unittest.TestCase):

def test_0_airline_sentiment(self):
df = pd.read_csv("tests/data/airline_sentiment_sample.csv")
inferred_types = infer_types(df, pct_invalid=0)
inferred_types = infer_types(df, config={'pct_invalid': 0})
target = 'airline_sentiment'
tss = {
'is_timeseries': False,
Expand All @@ -32,7 +32,7 @@ def test_0_airline_sentiment(self):

def test_1_hdi(self):
df = pd.read_csv("tests/data/hdi.csv")
inferred_types = infer_types(df, pct_invalid=0)
inferred_types = infer_types(df, config={'pct_invalid': 0})
target = 'Development Index'
tss = {
'is_timeseries': False,
Expand All @@ -54,7 +54,7 @@ def test_2_imputers(self):
df = df.rename(columns={'GDP ($ per capita)': 'GDP', 'Area (sq. mi.)': 'Area', 'Literacy (%)': 'Literacy'})
df['Infant mortality '] = df['Infant mortality '].apply(lambda x: 'High' if x >= 20 else 'Low')

inferred_types = infer_types(df, pct_invalid=0)
inferred_types = infer_types(df, config={'pct_invalid': 0})
target = 'Development Index'
tss = {
'is_timeseries': False,
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_3_timeseries_dedupe(self):
})

# inferred types are the same for both DataFrames
inferred_types = infer_types(df_correct, pct_invalid=0)
inferred_types = infer_types(df_correct, config={'pct_invalid': 0})
target = 'z'
tss = {
'is_timeseries': True,
Expand Down Expand Up @@ -208,7 +208,7 @@ def test_4_timeseries_dedupe(self):
'order_by': 'T',
'group_by': 'Country'
}
inferred_types = infer_types(data, pct_invalid=0)
inferred_types = infer_types(data, config={'pct_invalid': 0})
transformed = cleaner(data=data,
dtype_dict=inferred_types.dtypes,
pct_invalid=0.1,
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/test_insights.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
import pandas as pd

from type_infer.infer import infer_types
from type_infer.api import infer_types

from dataprep_ml.base import StatisticalAnalysis
from dataprep_ml.insights import statistical_analysis
Expand All @@ -10,7 +10,7 @@
class TestInsights(unittest.TestCase):
def test_0_hdi(self):
df = pd.read_csv("tests/data/hdi.csv")
inferred_types = infer_types(df, pct_invalid=0)
inferred_types = infer_types(df, config={'pct_invalid': 0})
args = {'target': 'Development Index'}
sa = statistical_analysis(data=df,
dtypes=inferred_types.dtypes,
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/test_splitters.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import unittest
import pandas as pd

from type_infer.infer import infer_types
from type_infer.api import infer_types

from dataprep_ml.splitters import splitter


class TestSplitters(unittest.TestCase):
def test_0_hdi(self):
df = pd.read_csv("tests/data/hdi.csv")
inferred_types = infer_types(df, pct_invalid=0)
inferred_types = infer_types(df, config={'pct_invalid': 0})
target = 'Development Index'

train_pct, dev_pct, test_pct = 0.8, 0.1, 0.1
Expand Down

0 comments on commit 3784dba

Please sign in to comment.