diff --git a/sklearn2pmml/preprocessing/__init__.py b/sklearn2pmml/preprocessing/__init__.py index f96532d..d180d9b 100644 --- a/sklearn2pmml/preprocessing/__init__.py +++ b/sklearn2pmml/preprocessing/__init__.py @@ -573,6 +573,9 @@ class RegExTransformer(BaseEstimator, TransformerMixin): def __init__(self, pattern, re_flavour): super(RegExTransformer, self).__init__() self.pattern = pattern + re_flavours = ["pcre", "pcre2", "re"] + if re_flavour not in re_flavours: + raise ValueError("Regular Expressions flavour {0} not in {1}".format(re_flavour, re_flavours)) self.re_flavour = re_flavour def _regex_func(self, regex_engine): @@ -592,7 +595,7 @@ def transform(self, X): class MatchesTransformer(RegExTransformer): """Match RE pattern.""" - def __init__(self, pattern, re_flavour = None): + def __init__(self, pattern, re_flavour = "re"): super(MatchesTransformer, self).__init__(pattern = pattern, re_flavour = re_flavour) def _regex_func(self, regex_engine): @@ -604,7 +607,7 @@ def matches(x): class ReplaceTransformer(RegExTransformer): """Replace all RE pattern matches.""" - def __init__(self, pattern, replacement, re_flavour = None): + def __init__(self, pattern, replacement, re_flavour = "re"): super(ReplaceTransformer, self).__init__(pattern = pattern, re_flavour = re_flavour) self.replacement = replacement diff --git a/sklearn2pmml/preprocessing/regex.py b/sklearn2pmml/preprocessing/regex.py index c2eb6cd..15d14e9 100644 --- a/sklearn2pmml/preprocessing/regex.py +++ b/sklearn2pmml/preprocessing/regex.py @@ -59,15 +59,6 @@ def replace(self, replacement, x): return self.pattern_.substitute(replacement, x) def make_regex_engine(pattern, re_flavour): - if re_flavour is None: - try: - import pcre2 - - re_flavour = "pcre2" - except ImportError: - warnings.warn("Perl Compatible Regular Expressions (PCRE) library is not available, falling back to built-in Regular Expressions (RE) library. Transformation results might not be reproducible between Python and PMML environments when using more complex patterns", Warning) - re_flavour = "re" - if re_flavour == "pcre": return PCREEngine(pattern) elif re_flavour == "pcre2": @@ -75,5 +66,4 @@ def make_regex_engine(pattern, re_flavour): elif re_flavour == "re": return REEngine(pattern) else: - re_flavours = ["pcre", "pcre2", "re"] - raise ValueError("Regular Expressions flavour {0} not in {1}".format(re_flavour, re_flavours)) + raise ValueError(re_flavour) diff --git a/sklearn2pmml/preprocessing/tests/__init__.py b/sklearn2pmml/preprocessing/tests/__init__.py index 0987824..648e080 100644 --- a/sklearn2pmml/preprocessing/tests/__init__.py +++ b/sklearn2pmml/preprocessing/tests/__init__.py @@ -657,14 +657,14 @@ class MatchesTransformerTest(TransformerTest): def test_transform(self): X = Series(["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]) - transformer = MatchesTransformer("ar?y", re_flavour = "re") + transformer = MatchesTransformer("ar?y") self.assertEqual([True, True, False, False, True, False, False, False, False, False, False, False], self._transform1d(transformer, X).tolist()) class ReplaceTransformerTest(TransformerTest): def test_transform(self): X = Series(["A", "B", "BA", "BB", "BAB", "ABBA", "BBBB"]) - transformer = ReplaceTransformer("B+", "c", re_flavour = "re") + transformer = ReplaceTransformer("B+", "c") self.assertEqual(["A", "c", "cA", "c", "cAc", "AcA", "c"], self._transform1d(transformer, X).tolist()) vectorizer = CountVectorizer(token_pattern = r"\w+") pipeline = make_pipeline(transformer, vectorizer)