diff --git a/tutorials/20-structured-data.ipynb b/tutorials/20-structured-data.ipynb new file mode 100644 index 000000000..8f4745bf2 --- /dev/null +++ b/tutorials/20-structured-data.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Import dependencies\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --quiet \"evadb[document,notebook]\"\n", + "import evadb\n", + "cursor = evadb.connect().cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Set your OpenAI key as an environment variable\n", + "import os\n", + "#os.environ['OPENAI_API_KEY'] = ''\n", + "open_ai_key = os.environ.get(\"OPENAI_API_KEY\", \"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# set up the extract columns UDF available at functions/extract_columns.py\n", + "cursor.query(\"\"\"CREATE FUNCTION IF NOT EXISTS ExtractColumns\n", + " IMPL '../evadb/functions/extract_columns.py';\n", + " \"\"\").execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []\n" + ] + } + ], + "source": [ + "# # delete the table if it already exists\n", + "cursor.query(\"\"\"DROP TABLE IF EXISTS InputUnstructured\n", + " \"\"\").execute()\n", + "\n", + "# create the table specifying the type of the prompt column\n", + "cursor.query(\"\"\"CREATE TABLE IF NOT EXISTS InputUnstructured (\n", + " input_rows TEXT)\n", + " \"\"\").execute()\n", + "\n", + "table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n", + "print(table)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "input_rows_list = [\"The touch screen on my tablet stopped working for no reason.\",\n", + "# \"Why does my computer take so long to start up? It's been like this for weeks.\",\n", + "# \"My phone battery dies too quickly. I just bought it!\",\n", + " \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n", + " \"The software update completely messed up my computer. Now nothing works properly.\"]\n", + "\n", + "for input_row in input_rows_list:\n", + " cursor.query(f\"\"\"INSERT INTO InputUnstructured (input_rows) VALUES (\"{input_row}\")\"\"\").execute()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " _row_id input_rows\n", + "0 1 The touch screen on my tablet stopped working ...\n", + "1 2 My headphones won't connect to my phone anymor...\n", + "2 3 The software update completely messed up my co...\n" + ] + } + ], + "source": [ + "table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n", + "print(table)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"Issue Category\": \"hardware\",\n", + " \"Raw Issue String\": \"The touch screen on my tablet stopped working for no reason.\",\n", + " \"Issue Component\": \"touch screen\"\n", + "}\n", + "{\n", + " \"Issue Category\": \"hardware\",\n", + " \"Raw Issue String\": \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n", + " \"Issue Component\": \"headphones\"\n", + "}\n", + "{\n", + " \"Issue Category\": \"software\",\n", + " \"Raw Issue String\": \"The software update completely messed up my computer. Now nothing works properly.\",\n", + " \"Issue Component\": \"computer\"\n", + "}\n" + ] + } + ], + "source": [ + "table = cursor.query(\"SELECT ExtractColumns(input_rows) FROM InputUnstructured;\").df()\n", + "\n", + "for _, row in table.iterrows():\n", + " print(row['response'])\n", + "#print(table.iloc[1]['response'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}