diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2247ff1..73e5845 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: os: [windows-latest] # python-version: [2.7, 3.5, 3.6, 3.7, 3.8] python-version: [3.9] - tox-env: [py27, py35, py36, py37, py38, py39, py310] + tox-env: [py27, py36, py37, py38, py39, py310] steps: - uses: actions/checkout@v2 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c001007..da7557d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,18 @@ Changelog --------- +`0.46.0`_ (2022-02-12) ++++++++++++++++++++++++++ + +* **[New]** ``pypinyin.contrib.tone_convert`` 模块新增 ``to_finals_tone`` 、``to_finals_tone2`` + 以及 ``to_finals_tone3`` 函数,用于将拼音转换为 ``Style.FINALS_TONE`` 、 ``Style.FINALS_TONE2`` + 以及 ``Style.FINALS_TONE3`` 风格的结果。 +* **[Changed]** 将 ``pypinyin.contrib.tone_convert`` 模块中 ``to_tone2`` 、 ``to_tone3``、 + ``tone_to_tone2`` 及 ``tone_to_tone3`` 函数的 ``neutral_tone_with_5`` 参数重命名为统一的 + ``neutral_tone_with_five`` 参数名称,兼容已有代码传入的 ``neutral_tone_with_5`` 参数。建议升级版本后择期修改为 + 使用新的 ``neutral_tone_with_five`` 参数名称。 + + `0.45.0`_ (2022-01-23) +++++++++++++++++++++++++ @@ -960,3 +972,4 @@ __ https://github.com/mozillazg/python-pinyin/issues/8 .. _0.43.0: https://github.com/mozillazg/python-pinyin/compare/v0.42.1...v0.43.0 .. _0.44.0: https://github.com/mozillazg/python-pinyin/compare/v0.43.0...v0.44.0 .. _0.45.0: https://github.com/mozillazg/python-pinyin/compare/v0.44.0...v0.45.0 +.. _0.46.0: https://github.com/mozillazg/python-pinyin/compare/v0.45.0...v0.46.0 diff --git a/README.rst b/README.rst index 105e7e9..9da44a1 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ 将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。 -基于 `hotoo/pinyin `__ 开发。 +最初版本的代码参考了 `hotoo/pinyin `__ 的实现。 * Documentation: http://pypinyin.rtfd.io/ * GitHub: https://github.com/mozillazg/python-pinyin @@ -166,6 +166,29 @@ y,w,ü (yu) 都不是声母。 详见 `strict 参数的影响`_ 。 + +如何将某一风格的拼音转换为其他风格的拼音 +++++++++++++++++++++++++++++++++++++++++++++ + +可以通过 ``pypinyin.contrib.tone_convert`` 模块对标准拼音进行转换,得到不同风格的拼音。 +比如将 ``zhōng`` 转换为 ``zhong``,或者获取拼音中的声母或韵母数据: + +.. code-block:: python + + >>> from pypinyin.contrib.tone_convert import to_normal, to_tone, to_initials, to_finals + >>> to_normal('zhōng') + 'zhong' + >>> to_tone('zhong1') + 'zhōng' + >>> to_initials('zhōng') + 'zh' + >>> to_finals('zhōng') + 'ong' + +更多拼音转换的辅助函数,详见 ``pypinyin.contrib.tone_convert`` 模块的 +`文档 `__ 。 + + 如何减少内存占用 ++++++++++++++++++++ diff --git a/docs/contrib.rst b/docs/contrib.rst index 0a51218..2966182 100644 --- a/docs/contrib.rst +++ b/docs/contrib.rst @@ -14,6 +14,9 @@ contrib .. autofunction:: pypinyin.contrib.tone_convert.to_tone3 .. autofunction:: pypinyin.contrib.tone_convert.to_initials .. autofunction:: pypinyin.contrib.tone_convert.to_finals +.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone +.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone2 +.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone3 .. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal .. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2 diff --git a/docs/index.rst b/docs/index.rst index 934f76d..2749012 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,7 +11,7 @@ 将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。 -基于 `hotoo/pinyin `__ 开发。 +最初版本的代码参考了 `hotoo/pinyin `__ 的实现。 * Documentation: http://pypinyin.rtfd.io * GitHub: https://github.com/mozillazg/python-pinyin diff --git a/pypinyin/contrib/tone_convert.py b/pypinyin/contrib/tone_convert.py index d22886e..00777fb 100644 --- a/pypinyin/contrib/tone_convert.py +++ b/pypinyin/contrib/tone_convert.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -# 向后兼容 from pypinyin.style._tone_convert import ( # noqa to_normal, to_tone, @@ -9,6 +8,9 @@ to_tone3, to_initials, to_finals, + to_finals_tone, + to_finals_tone2, + to_finals_tone3, tone_to_normal, tone_to_tone2, tone_to_tone3, @@ -18,6 +20,7 @@ tone3_to_normal, tone3_to_tone, tone3_to_tone2, + # 向后兼容 _improve_tone3, _get_number_from_pinyin, _v_to_u, diff --git a/pypinyin/contrib/tone_convert.pyi b/pypinyin/contrib/tone_convert.pyi index d2323ce..38afff4 100644 --- a/pypinyin/contrib/tone_convert.pyi +++ b/pypinyin/contrib/tone_convert.pyi @@ -10,19 +10,25 @@ def to_normal(pinyin: Text, v_to_u: bool = ...) -> Text: ... def to_tone(pinyin: Text) -> Text: ... -def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... -def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... def to_initials(pinyin: Text, strict: bool = ...) -> Text: ... def to_finals(pinyin: Text, strict: bool = ..., v_to_u: bool = ...) -> Text: ... +def to_finals_tone(pinyin: Text, strict: bool = ...) -> Text: ... + +def to_finals_tone2(pinyin: Text, strict: bool = ..., v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... + +def to_finals_tone3(pinyin: Text, strict: bool = ..., v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... + def tone_to_normal(tone: Text, v_to_u: bool = ...) -> Text: ... -def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... -def tone_to_tone3(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def tone_to_tone3(tone: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... def tone2_to_normal(tone2: Text, v_to_u: bool = ...) -> Text: ... @@ -36,7 +42,7 @@ def tone3_to_tone(tone3: Text) -> Text: ... def tone3_to_tone2(tone3: Text, v_to_u: bool = ...) -> Text: ... -def _improve_tone3(tone3: Text, neutral_tone_with_5: bool = ...) -> Text: ... +def _improve_tone3(tone3: Text, neutral_tone_with_five: bool = ...) -> Text: ... def _get_number_from_pinyin(pinyin: Text) -> Optional[int]: ... diff --git a/pypinyin/style/__init__.py b/pypinyin/style/__init__.py index 6bd17f5..a483408 100644 --- a/pypinyin/style/__init__.py +++ b/pypinyin/style/__init__.py @@ -25,7 +25,11 @@ def convert(pinyin, style, strict, default=None, **kwargs): def register(style, func=None): - """注册一个拼音风格实现 + """注册一个拼音风格实现。 + 自定义的函数应当使用 ``**kwargs`` 来兼容后续可能会新增的关键字参数, + 当前默认会传递如下参数: + + * ``strict`` :: diff --git a/pypinyin/style/_tone_convert.py b/pypinyin/style/_tone_convert.py index 6e5fedc..cdbea82 100644 --- a/pypinyin/style/_tone_convert.py +++ b/pypinyin/style/_tone_convert.py @@ -8,7 +8,7 @@ from pypinyin.style.tone import converter from pypinyin.style._utils import ( get_initials, replace_symbol_to_no_symbol, - get_finals + get_finals, replace_symbol_to_number ) _re_number = re.compile(r'\d') @@ -71,7 +71,7 @@ def to_tone(pinyin): return s -def to_tone2(pinyin, v_to_u=False, neutral_tone_with_5=False): +def to_tone2(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs): """将 :py:attr:`~pypinyin.Style.TONE` 或 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 @@ -80,7 +80,10 @@ def to_tone2(pinyin, v_to_u=False, neutral_tone_with_5=False): :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, 当为 False 时结果中将使用 ``v`` 表示 ``ü`` - :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声 + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数,当传入 + ``neutral_tone_with_5`` 参数时, + 将覆盖 ``neutral_tone_with_five`` 的值。 :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 Usage:: @@ -92,20 +95,22 @@ def to_tone2(pinyin, v_to_u=False, neutral_tone_with_5=False): 'zho1ng' >>> to_tone2('shang') 'shang' - >>> to_tone2('shang', neutral_tone_with_5=True) + >>> to_tone2('shang', neutral_tone_with_five=True) 'sha5ng' >>> to_tone2('lüè') 'lve4' >>> to_tone2('lüè', v_to_u=True) 'lüe4' """ + if kwargs.get('neutral_tone_with_5', None) is not None: + neutral_tone_with_five = kwargs['neutral_tone_with_5'] s = tone_to_tone3( - pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5) + pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five) s = tone3_to_tone2(s) return _fix_v_u(pinyin, s, v_to_u) -def to_tone3(pinyin, v_to_u=False, neutral_tone_with_5=False): +def to_tone3(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs): """将 :py:attr:`~pypinyin.Style.TONE` 或 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 @@ -114,7 +119,10 @@ def to_tone3(pinyin, v_to_u=False, neutral_tone_with_5=False): :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, 当为 False 时结果中将使用 ``v`` 表示 ``ü`` - :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声 + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数,当传入 + ``neutral_tone_with_5`` 参数时, + 将覆盖 ``neutral_tone_with_five`` 的值。 :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 Usage:: @@ -126,15 +134,17 @@ def to_tone3(pinyin, v_to_u=False, neutral_tone_with_5=False): 'zhong1' >>> to_tone3('shang') 'shang' - >>> to_tone3('shang', neutral_tone_with_5=True) + >>> to_tone3('shang', neutral_tone_with_five=True) 'shang5' >>> to_tone3('lüè') 'lve4' >>> to_tone3('lüè', v_to_u=True) 'lüe4' """ + if kwargs.get('neutral_tone_with_5', None) is not None: + neutral_tone_with_five = kwargs['neutral_tone_with_5'] s = tone_to_tone2( - pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5) + pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five) s = tone2_to_tone3(s) return _fix_v_u(pinyin, s, v_to_u) @@ -194,6 +204,106 @@ def to_finals(pinyin, strict=True, v_to_u=False): return finals +def to_finals_tone(pinyin, strict=True): + """将 :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为 + :py:attr:`~pypinyin.Style.FINALS_TONE` 风格的拼音 + + :param pinyin: :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 + :param strict: 返回结果是否严格遵照《汉语拼音方案》来处理声母和韵母, + 详见 :ref:`strict` + :return: :py:attr:`~pypinyin.Style.FINALS_TONE` 风格的拼音 + + Usage:: + + >>> from pypinyin.contrib.tone_convert import to_finals_tone + >>> to_finals_tone('zhōng') + 'ōng' + + """ + finals = to_finals_tone2(pinyin, strict=strict) + + finals = tone2_to_tone(finals) + + return finals + + +def to_finals_tone2(pinyin, strict=True, v_to_u=False, + neutral_tone_with_five=False): + """将 :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为 + :py:attr:`~pypinyin.Style.FINALS_TONE2` 风格的拼音 + + :param pinyin: :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 + :param strict: 返回结果是否严格遵照《汉语拼音方案》来处理声母和韵母, + 详见 :ref:`strict` + :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, + 当为 False 时结果中将使用 ``v`` 表示 ``ü`` + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :return: :py:attr:`~pypinyin.Style.FINALS_TONE2` 风格的拼音 + + Usage:: + + >>> from pypinyin.contrib.tone_convert import to_finals_tone2 + >>> to_finals_tone2('zhōng') + 'o1ng' + + """ + finals = to_finals_tone3(pinyin, strict=strict, v_to_u=v_to_u, + neutral_tone_with_five=neutral_tone_with_five) + + finals = tone3_to_tone2(finals, v_to_u=v_to_u) + + return finals + + +def to_finals_tone3(pinyin, strict=True, v_to_u=False, + neutral_tone_with_five=False): + """将 :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为 + :py:attr:`~pypinyin.Style.FINALS_TONE3` 风格的拼音 + + :param pinyin: :py:attr:`~pypinyin.Style.TONE`、 + :py:attr:`~pypinyin.Style.TONE2` 或 + :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 + :param strict: 返回结果是否严格遵照《汉语拼音方案》来处理声母和韵母, + 详见 :ref:`strict` + :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, + 当为 False 时结果中将使用 ``v`` 表示 ``ü`` + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :return: :py:attr:`~pypinyin.Style.FINALS_TONE3` 风格的拼音 + + Usage:: + + >>> from pypinyin.contrib.tone_convert import to_finals_tone3 + >>> to_finals_tone3('zhōng') + 'ong1' + + """ + finals = to_finals(pinyin, strict=strict, v_to_u=v_to_u) + if not finals: + return finals + + numbers = _re_number.findall(replace_symbol_to_number(pinyin)) + if not numbers: + if neutral_tone_with_five: + numbers = ['5'] + else: + return finals + + number = numbers[0] + finals = finals + number + + return finals + + def tone_to_normal(tone, v_to_u=False): """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为 :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音 @@ -218,14 +328,17 @@ def tone_to_normal(tone, v_to_u=False): return _v_to_u(s, v_to_u) -def tone_to_tone2(tone, v_to_u=False, neutral_tone_with_5=False): +def tone_to_tone2(tone, v_to_u=False, neutral_tone_with_five=False, **kwargs): """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 :param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音 :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, 当为 False 时结果中将使用 ``v`` 表示 ``ü`` - :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声 + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数,当传入 + ``neutral_tone_with_5`` 参数时, + 将覆盖 ``neutral_tone_with_five`` 的值。 :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音 Usage:: @@ -242,20 +355,25 @@ def tone_to_tone2(tone, v_to_u=False, neutral_tone_with_5=False): >>> tone_to_tone2('lüè', v_to_u=True) 'lüe4' """ + if kwargs.get('neutral_tone_with_5', None) is not None: + neutral_tone_with_five = kwargs['neutral_tone_with_5'] tone3 = tone_to_tone3( - tone, v_to_u=v_to_u, neutral_tone_with_5=neutral_tone_with_5) + tone, v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five) s = tone3_to_tone2(tone3) return _v_to_u(s, v_to_u) -def tone_to_tone3(tone, v_to_u=False, neutral_tone_with_5=False): +def tone_to_tone3(tone, v_to_u=False, neutral_tone_with_five=False, **kwargs): """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 :param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音 :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``, 当为 False 时结果中将使用 ``v`` 表示 ``ü`` - :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声 + :param neutral_tone_with_five: 是否使用 ``5`` 标识轻声 + :param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数,当传入 + ``neutral_tone_with_5`` 参数时, + 将覆盖 ``neutral_tone_with_five`` 的值。 :return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音 Usage:: @@ -265,15 +383,17 @@ def tone_to_tone3(tone, v_to_u=False, neutral_tone_with_5=False): 'zhong1' >>> tone_to_tone3('shang') 'shang' - >>> tone_to_tone3('shang', neutral_tone_with_5=True) + >>> tone_to_tone3('shang', neutral_tone_with_five=True) 'shang5' >>> tone_to_tone3('lüè') 'lve4' >>> tone_to_tone3('lüè', v_to_u=True) 'lüe4' """ + if kwargs.get('neutral_tone_with_5', None) is not None: + neutral_tone_with_five = kwargs['neutral_tone_with_5'] tone3 = converter.to_tone3(tone) - s = _improve_tone3(tone3, neutral_tone_with_5=neutral_tone_with_5) + s = _improve_tone3(tone3, neutral_tone_with_five=neutral_tone_with_five) return _v_to_u(s, v_to_u) @@ -423,9 +543,9 @@ def tone3_to_tone2(tone3, v_to_u=False): return _fix_v_u(tone3, s, v_to_u=v_to_u) -def _improve_tone3(tone3, neutral_tone_with_5=False): +def _improve_tone3(tone3, neutral_tone_with_five=False): number = _get_number_from_pinyin(tone3) - if number is None and neutral_tone_with_5: + if number is None and neutral_tone_with_five: tone3 = '{}5'.format(tone3) return tone3 diff --git a/pypinyin/style/_tone_convert.pyi b/pypinyin/style/_tone_convert.pyi index 4ada714..5a1e7de 100644 --- a/pypinyin/style/_tone_convert.pyi +++ b/pypinyin/style/_tone_convert.pyi @@ -10,19 +10,25 @@ def to_normal(pinyin: Text, v_to_u: bool = ...) -> Text: ... def to_tone(pinyin: Text) -> Text: ... -def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def to_tone2(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... -def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def to_tone3(pinyin: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... def to_initials(pinyin: Text, strict: bool = ...) -> Text: ... def to_finals(pinyin: Text, strict: bool = ..., v_to_u: bool = ...) -> Text: ... +def to_finals_tone(pinyin: Text, strict: bool = ...) -> Text: ... + +def to_finals_tone2(pinyin: Text, strict: bool = ..., v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... + +def to_finals_tone3(pinyin: Text, strict: bool = ..., v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... + def tone_to_normal(tone: Text, v_to_u: bool = ...) -> Text: ... -def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def tone_to_tone2(tone: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... -def tone_to_tone3(tone: Text, v_to_u: bool = ..., neutral_tone_with_5: bool = ...) -> Text: ... +def tone_to_tone3(tone: Text, v_to_u: bool = ..., neutral_tone_with_five: bool = ...) -> Text: ... def tone2_to_normal(tone2: Text, v_to_u: bool = ...) -> Text: ... @@ -36,7 +42,7 @@ def tone3_to_tone(tone3: Text) -> Text: ... def tone3_to_tone2(tone3: Text, v_to_u: bool = ...) -> Text: ... -def _improve_tone3(tone3: Text, neutral_tone_with_5: bool = ...) -> Text: ... +def _improve_tone3(tone3: Text, neutral_tone_with_five: bool = ...) -> Text: ... def _get_number_from_pinyin(pinyin: Text) -> Optional[int]: ... diff --git a/pypinyin/style/finals.py b/pypinyin/style/finals.py index bf0ebc7..fc5e08e 100644 --- a/pypinyin/style/finals.py +++ b/pypinyin/style/finals.py @@ -10,11 +10,9 @@ from pypinyin.constants import Style from pypinyin.style import register -from pypinyin.style._constants import RE_NUMBER from pypinyin.style._tone_convert import ( - tone3_to_tone2, tone2_to_tone, to_finals + to_finals, to_finals_tone, to_finals_tone2, to_finals_tone3 ) -from pypinyin.style._utils import replace_symbol_to_number class FinalsConverter(object): @@ -24,34 +22,15 @@ def to_finals(self, pinyin, **kwargs): def to_finals_tone(self, pinyin, **kwargs): """声调在韵母头上""" - finals = self.to_finals_tone2(pinyin, **kwargs) - - finals = tone2_to_tone(finals) - - return finals + return to_finals_tone(pinyin, strict=kwargs.get('strict', True)) def to_finals_tone2(self, pinyin, **kwargs): """数字声调""" - finals = self.to_finals_tone3(pinyin, **kwargs) - - finals = tone3_to_tone2(finals) - - return finals + return to_finals_tone2(pinyin, strict=kwargs.get('strict', True)) def to_finals_tone3(self, pinyin, **kwargs): """数字声调""" - finals = self.to_finals(pinyin, **kwargs) - if not finals: - return finals - - numbers = RE_NUMBER.findall(replace_symbol_to_number(pinyin)) - if not numbers: - return finals - - number = numbers[0] - finals = finals + number - - return finals + return to_finals_tone3(pinyin, strict=kwargs.get('strict', True)) converter = FinalsConverter() diff --git a/tests/contrib/test_tone_convert.py b/tests/contrib/test_tone_convert.py index b4de04f..f3bac25 100644 --- a/tests/contrib/test_tone_convert.py +++ b/tests/contrib/test_tone_convert.py @@ -18,7 +18,10 @@ to_tone2, to_tone3, to_initials, - to_finals + to_finals, + to_finals_tone, + to_finals_tone2, + to_finals_tone3, ) @@ -66,15 +69,20 @@ def test_tone_tone2(pinyin, result): assert to_tone2(pinyin) == result -@mark.parametrize('pinyin,neutral_tone_with_5,result', [ +@mark.parametrize('pinyin,neutral_tone_with_five,result', [ ['shang', False, 'shang'], ['shang', True, 'sha5ng'], ]) -def test_tone_tone2_with_neutral_tone_with_5( - pinyin, neutral_tone_with_5, result): +def test_tone_tone2_with_neutral_tone_with_five( + pinyin, neutral_tone_with_five, result): assert tone_to_tone2( - pinyin, neutral_tone_with_5=neutral_tone_with_5) == result - assert to_tone2(pinyin, neutral_tone_with_5=neutral_tone_with_5) == result + pinyin, neutral_tone_with_five=neutral_tone_with_five) == result + assert tone_to_tone2( + pinyin, neutral_tone_with_5=neutral_tone_with_five) == result + assert to_tone2(pinyin, + neutral_tone_with_five=neutral_tone_with_five) == result + assert to_tone2(pinyin, + neutral_tone_with_5=neutral_tone_with_five) == result assert tone2_to_tone(result) == pinyin assert to_tone(result) == pinyin @@ -107,16 +115,20 @@ def test_tone_tone3(pinyin, result): assert to_tone3(pinyin) == result -@mark.parametrize('pinyin,neutral_tone_with_5,result', [ +@mark.parametrize('pinyin,neutral_tone_with_five,result', [ ['shang', False, 'shang'], ['shang', True, 'shang5'], ]) -def test_tone_tone3_with_neutral_tone_with_5( - pinyin, neutral_tone_with_5, result): +def test_tone_tone3_with_neutral_tone_with_five( + pinyin, neutral_tone_with_five, result): + assert tone_to_tone3( + pinyin, neutral_tone_with_five=neutral_tone_with_five) == result assert tone_to_tone3( - pinyin, neutral_tone_with_5=neutral_tone_with_5) == result + pinyin, neutral_tone_with_5=neutral_tone_with_five) == result + assert to_tone3( + pinyin, neutral_tone_with_five=neutral_tone_with_five) == result assert to_tone3( - pinyin, neutral_tone_with_5=neutral_tone_with_5) == result + pinyin, neutral_tone_with_5=neutral_tone_with_five) == result assert tone3_to_tone(result) == pinyin assert to_tone(result) == pinyin @@ -280,6 +292,57 @@ def test_to_finals(pinyin, strict, v_to_u, result): assert to_finals(pinyin, strict=strict, v_to_u=v_to_u) == result +@mark.parametrize('pinyin,strict,result', [ + ['zhōng', True, 'ōng'], + ['zho1ng', True, 'ōng'], + ['zhong1', True, 'ōng'], + ['zhōng', False, 'ōng'], + ['yū', True, 'ǖ'], + ['yu1', True, 'ǖ'], + ['yū', False, 'ū'], +]) +def test_to_finals_tone(pinyin, strict, result): + assert to_finals_tone(pinyin, strict=strict) == result + + +@mark.parametrize('pinyin,strict,v_to_u,neutral_tone_with_five,result', [ + ['zhōng', True, False, False, 'o1ng'], + ['zhong1', True, False, False, 'o1ng'], + ['zho1ng', True, False, False, 'o1ng'], + ['zhōng', False, False, False, 'o1ng'], + ['zhong', False, False, True, 'o5ng'], + ['yū', True, False, False, 'v1'], + ['yu1', True, False, False, 'v1'], + ['yū', True, True, False, 'ü1'], + ['yū', False, False, False, 'u1'], + ['yū', False, True, False, 'u1'], +]) +def test_to_finals_tone2(pinyin, strict, v_to_u, + neutral_tone_with_five, result): + assert to_finals_tone2(pinyin, strict=strict, v_to_u=v_to_u, + neutral_tone_with_five=neutral_tone_with_five + ) == result + + +@mark.parametrize('pinyin,strict,v_to_u,neutral_tone_with_five,result', [ + ['zhōng', True, False, False, 'ong1'], + ['zhong1', True, False, False, 'ong1'], + ['zho1ng', True, False, False, 'ong1'], + ['zhōng', False, False, False, 'ong1'], + ['zhong', False, False, True, 'ong5'], + ['yū', True, False, False, 'v1'], + ['yu1', True, False, False, 'v1'], + ['yū', True, True, False, 'ü1'], + ['yū', False, False, False, 'u1'], + ['yū', False, True, False, 'u1'], +]) +def test_to_finals_tone3(pinyin, strict, v_to_u, neutral_tone_with_five, + result): + assert to_finals_tone3(pinyin, strict=strict, v_to_u=v_to_u, + neutral_tone_with_five=neutral_tone_with_five + ) == result + + # 所有拼音转换为 tone2 或 tone3 风格后,都可以再转换回原始的拼音 def test_tone_to_tone2_tone3_to_tone(): pinyin_set = set()