From e2f9570f15727f6825805421f4156a54497845ac Mon Sep 17 00:00:00 2001 From: Florent Ravenel Date: Wed, 23 Aug 2023 10:04:08 +0200 Subject: [PATCH 1/2] feat(Python): Add Get emojis from text --- Python/Python_Get_emojis_from_text.ipynb | 252 +++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 Python/Python_Get_emojis_from_text.ipynb diff --git a/Python/Python_Get_emojis_from_text.ipynb b/Python/Python_Get_emojis_from_text.ipynb new file mode 100644 index 0000000000..c3482d7c3c --- /dev/null +++ b/Python/Python_Get_emojis_from_text.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3fb35caa-2ca9-48a1-ae3e-1622152bcf58", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Naas\"" + ] + }, + { + "cell_type": "markdown", + "id": "403870b3-c415-4834-8e44-5c56e9cd5455", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Python - Get emojis from text" + ] + }, + { + "cell_type": "markdown", + "id": "1057a62b-5e25-473c-94bd-e5db02d873ce", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #python #text #emoji #nlp #string #library" + ] + }, + { + "cell_type": "markdown", + "id": "b2eae274-85d5-4723-8cfb-8ed19ff7438d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)" + ] + }, + { + "cell_type": "markdown", + "id": "a0ba4889-ee7f-43b4-a393-82fec597f371", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2023-08-23 (Created: 2023-08-23)" + ] + }, + { + "cell_type": "markdown", + "id": "ffeb11a8-69b0-4efe-a0ca-acce99538f7c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Description:** This notebook will show how to get emojis from text using Python. It is usefull for organizations that need to extract emojis from text strings." + ] + }, + { + "cell_type": "markdown", + "id": "2171a915-fb45-4b64-b07e-92b34cca6b1e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**References:**\n- [Python String - Emoji](https://www.w3schools.com/python/python_strings.asp)\n- [Python Regular Expression - Emoji](https://www.w3schools.com/python/python_regex.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "42f2c06a-731e-48ca-b8d5-319a9b2e3756", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "efb4a62f-27c7-4b0e-b927-b3e12869db2d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e349a7d-9aea-4836-829f-4f8c0d708ceb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": "import re", + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "d400aaf1-a047-47df-a3e8-05443ce1c7ef", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n- `text`: Text string to extract emojis from" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00b3fcaf-3c38-48af-89b4-e3ff560585d7", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": "text = \"This is a text with emojis \ud83d\ude0a\ud83d\ude0a\ud83d\ude0a\"", + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "c9b3fb14-756f-4e4c-8a38-c0fdfd26de2e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "95661cc9-1dee-4af5-94ed-63e6e2cc46d6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Get emojis from text" + ] + }, + { + "cell_type": "markdown", + "id": "fefced9c-8b09-4146-a1ce-d90bf2cfba8e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "This function will extract all emojis from a text string using regular expression." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99c20c70-3d65-4e93-b76d-7ca857939cad", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": "def get_emojis(text):\n emoji_list = re.findall(r\"[\\U0001F600-\\U0001F650]\", text)\n return emoji_list", + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "7c20d481-01f6-4fc2-aa1e-09ff5d2a047d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "9c4304f9-9a13-4a70-adbe-e4e3551db1a6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d230b44d-f6b4-4676-a5ba-08a545790b10", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": "emojis = get_emojis(text)\nprint(emojis)", + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "e9fd37ad-33e9-4cc6-97c5-ae71c2375b36", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From 635dade164ddbb80ee95b08fe3e707f8a3d49401 Mon Sep 17 00:00:00 2001 From: Florent Ravenel Date: Wed, 23 Aug 2023 10:12:16 +0200 Subject: [PATCH 2/2] feat: update function and description --- Python/Python_Get_emojis_from_text.ipynb | 47 ++++++++++++++++++------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/Python/Python_Get_emojis_from_text.ipynb b/Python/Python_Get_emojis_from_text.ipynb index c3482d7c3c..564b6cdf7f 100644 --- a/Python/Python_Get_emojis_from_text.ipynb +++ b/Python/Python_Get_emojis_from_text.ipynb @@ -63,7 +63,7 @@ "tags": [] }, "source": [ - "**Description:** This notebook will show how to get emojis from text using Python. It is usefull for organizations that need to extract emojis from text strings." + "**Description:** This notebook will show how to get emojis from text using `emoji` and `regex` libraries." ] }, { @@ -74,7 +74,9 @@ "tags": [] }, "source": [ - "**References:**\n- [Python String - Emoji](https://www.w3schools.com/python/python_strings.asp)\n- [Python Regular Expression - Emoji](https://www.w3schools.com/python/python_regex.asp)" + "**References:**\n", + "- [Python String - Emoji](https://www.w3schools.com/python/python_strings.asp)\n", + "- [Python Regular Expression - Emoji](https://www.w3schools.com/python/python_regex.asp)" ] }, { @@ -107,8 +109,15 @@ "papermill": {}, "tags": [] }, - "source": "import re", - "outputs": [] + "outputs": [], + "source": [ + "try:\n", + " import emoji\n", + "except:\n", + " !pip install emoji==1.6.3\n", + " import emoji\n", + "import regex" + ] }, { "cell_type": "markdown", @@ -118,7 +127,8 @@ "tags": [] }, "source": [ - "### Setup variables\n- `text`: Text string to extract emojis from" + "### Setup variables\n", + "- `text`: Text string to extract emojis from" ] }, { @@ -129,8 +139,10 @@ "papermill": {}, "tags": [] }, - "source": "text = \"This is a text with emojis \ud83d\ude0a\ud83d\ude0a\ud83d\ude0a\"", - "outputs": [] + "outputs": [], + "source": [ + "text = \"This is a text with emojis ✅😊😊\"" + ] }, { "cell_type": "markdown", @@ -173,8 +185,16 @@ "papermill": {}, "tags": [] }, - "source": "def get_emojis(text):\n emoji_list = re.findall(r\"[\\U0001F600-\\U0001F650]\", text)\n return emoji_list", - "outputs": [] + "outputs": [], + "source": [ + "def get_emojis(text):\n", + " emoji_list = []\n", + " data = regex.findall(r\"\\X\", text)\n", + " for word in data:\n", + " if any(char in emoji.UNICODE_EMOJI[\"en\"] for char in word):\n", + " emoji_list.append(word)\n", + " return emoji_list" + ] }, { "cell_type": "markdown", @@ -206,8 +226,11 @@ "papermill": {}, "tags": [] }, - "source": "emojis = get_emojis(text)\nprint(emojis)", - "outputs": [] + "outputs": [], + "source": [ + "emojis = get_emojis(text)\n", + "print(emojis)" + ] }, { "cell_type": "markdown", @@ -249,4 +272,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +}