-
Notifications
You must be signed in to change notification settings - Fork 265
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding notebook for structured data conversion
new file: 20-structured-data.ipynb
- Loading branch information
Showing
1 changed file
with
198 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import dependencies\n", | ||
"import os\n", | ||
"import json" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Note: you may need to restart the kernel to use updated packages.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%pip install --quiet \"evadb[document,notebook]\"\n", | ||
"import evadb\n", | ||
"cursor = evadb.connect().cursor()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Set your OpenAI key as an environment variable\n", | ||
"import os\n", | ||
"#os.environ['OPENAI_API_KEY'] = ''\n", | ||
"open_ai_key = os.environ.get(\"OPENAI_API_KEY\", \"\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"<evadb.models.storage.batch.Batch at 0x7f97cc872950>" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# set up the extract columns UDF available at functions/extract_columns.py\n", | ||
"cursor.query(\"\"\"CREATE FUNCTION IF NOT EXISTS ExtractColumns\n", | ||
" IMPL '../evadb/functions/extract_columns.py';\n", | ||
" \"\"\").execute()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Empty DataFrame\n", | ||
"Columns: []\n", | ||
"Index: []\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# # delete the table if it already exists\n", | ||
"cursor.query(\"\"\"DROP TABLE IF EXISTS InputUnstructured\n", | ||
" \"\"\").execute()\n", | ||
"\n", | ||
"# create the table specifying the type of the prompt column\n", | ||
"cursor.query(\"\"\"CREATE TABLE IF NOT EXISTS InputUnstructured (\n", | ||
" input_rows TEXT)\n", | ||
" \"\"\").execute()\n", | ||
"\n", | ||
"table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n", | ||
"print(table)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 23, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"input_rows_list = [\"The touch screen on my tablet stopped working for no reason.\",\n", | ||
"# \"Why does my computer take so long to start up? It's been like this for weeks.\",\n", | ||
"# \"My phone battery dies too quickly. I just bought it!\",\n", | ||
" \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n", | ||
" \"The software update completely messed up my computer. Now nothing works properly.\"]\n", | ||
"\n", | ||
"for input_row in input_rows_list:\n", | ||
" cursor.query(f\"\"\"INSERT INTO InputUnstructured (input_rows) VALUES (\"{input_row}\")\"\"\").execute()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" _row_id input_rows\n", | ||
"0 1 The touch screen on my tablet stopped working ...\n", | ||
"1 2 My headphones won't connect to my phone anymor...\n", | ||
"2 3 The software update completely messed up my co...\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n", | ||
"print(table)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 25, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"{\n", | ||
" \"Issue Category\": \"hardware\",\n", | ||
" \"Raw Issue String\": \"The touch screen on my tablet stopped working for no reason.\",\n", | ||
" \"Issue Component\": \"touch screen\"\n", | ||
"}\n", | ||
"{\n", | ||
" \"Issue Category\": \"hardware\",\n", | ||
" \"Raw Issue String\": \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n", | ||
" \"Issue Component\": \"headphones\"\n", | ||
"}\n", | ||
"{\n", | ||
" \"Issue Category\": \"software\",\n", | ||
" \"Raw Issue String\": \"The software update completely messed up my computer. Now nothing works properly.\",\n", | ||
" \"Issue Component\": \"computer\"\n", | ||
"}\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"table = cursor.query(\"SELECT ExtractColumns(input_rows) FROM InputUnstructured;\").df()\n", | ||
"\n", | ||
"for _, row in table.iterrows():\n", | ||
" print(row['response'])\n", | ||
"#print(table.iloc[1]['response'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "env", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |