diff --git a/Python/Python_Get_emojis_from_text.ipynb b/Python/Python_Get_emojis_from_text.ipynb new file mode 100644 index 0000000000..564b6cdf7f --- /dev/null +++ b/Python/Python_Get_emojis_from_text.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3fb35caa-2ca9-48a1-ae3e-1622152bcf58", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Naas\"" + ] + }, + { + "cell_type": "markdown", + "id": "403870b3-c415-4834-8e44-5c56e9cd5455", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Python - Get emojis from text" + ] + }, + { + "cell_type": "markdown", + "id": "1057a62b-5e25-473c-94bd-e5db02d873ce", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #python #text #emoji #nlp #string #library" + ] + }, + { + "cell_type": "markdown", + "id": "b2eae274-85d5-4723-8cfb-8ed19ff7438d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)" + ] + }, + { + "cell_type": "markdown", + "id": "a0ba4889-ee7f-43b4-a393-82fec597f371", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2023-08-23 (Created: 2023-08-23)" + ] + }, + { + "cell_type": "markdown", + "id": "ffeb11a8-69b0-4efe-a0ca-acce99538f7c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Description:** This notebook will show how to get emojis from text using `emoji` and `regex` libraries." + ] + }, + { + "cell_type": "markdown", + "id": "2171a915-fb45-4b64-b07e-92b34cca6b1e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**References:**\n", + "- [Python String - Emoji](https://www.w3schools.com/python/python_strings.asp)\n", + "- [Python Regular Expression - Emoji](https://www.w3schools.com/python/python_regex.asp)" + ] + }, + { + "cell_type": "markdown", + "id": "42f2c06a-731e-48ca-b8d5-319a9b2e3756", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "efb4a62f-27c7-4b0e-b927-b3e12869db2d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e349a7d-9aea-4836-829f-4f8c0d708ceb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "try:\n", + " import emoji\n", + "except:\n", + " !pip install emoji==1.6.3\n", + " import emoji\n", + "import regex" + ] + }, + { + "cell_type": "markdown", + "id": "d400aaf1-a047-47df-a3e8-05443ce1c7ef", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- `text`: Text string to extract emojis from" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00b3fcaf-3c38-48af-89b4-e3ff560585d7", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "text = \"This is a text with emojis ✅😊😊\"" + ] + }, + { + "cell_type": "markdown", + "id": "c9b3fb14-756f-4e4c-8a38-c0fdfd26de2e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "95661cc9-1dee-4af5-94ed-63e6e2cc46d6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Get emojis from text" + ] + }, + { + "cell_type": "markdown", + "id": "fefced9c-8b09-4146-a1ce-d90bf2cfba8e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "This function will extract all emojis from a text string using regular expression." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99c20c70-3d65-4e93-b76d-7ca857939cad", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_emojis(text):\n", + " emoji_list = []\n", + " data = regex.findall(r\"\\X\", text)\n", + " for word in data:\n", + " if any(char in emoji.UNICODE_EMOJI[\"en\"] for char in word):\n", + " emoji_list.append(word)\n", + " return emoji_list" + ] + }, + { + "cell_type": "markdown", + "id": "7c20d481-01f6-4fc2-aa1e-09ff5d2a047d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "9c4304f9-9a13-4a70-adbe-e4e3551db1a6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d230b44d-f6b4-4676-a5ba-08a545790b10", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "emojis = get_emojis(text)\n", + "print(emojis)" + ] + }, + { + "cell_type": "markdown", + "id": "e9fd37ad-33e9-4cc6-97c5-ae71c2375b36", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}