Skip to content

Commit

Permalink
Adding notebook for structured data conversion
Browse files Browse the repository at this point in the history
	new file:   20-structured-data.ipynb
  • Loading branch information
hershd23 committed Nov 3, 2023
1 parent 26dc5bb commit 38f52e3
Showing 1 changed file with 198 additions and 0 deletions.
198 changes: 198 additions & 0 deletions tutorials/20-structured-data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Import dependencies\n",
"import os\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install --quiet \"evadb[document,notebook]\"\n",
"import evadb\n",
"cursor = evadb.connect().cursor()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Set your OpenAI key as an environment variable\n",
"import os\n",
"#os.environ['OPENAI_API_KEY'] = ''\n",
"open_ai_key = os.environ.get(\"OPENAI_API_KEY\", \"\")\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<evadb.models.storage.batch.Batch at 0x7f97cc872950>"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# set up the extract columns UDF available at functions/extract_columns.py\n",
"cursor.query(\"\"\"CREATE FUNCTION IF NOT EXISTS ExtractColumns\n",
" IMPL '../evadb/functions/extract_columns.py';\n",
" \"\"\").execute()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []\n"
]
}
],
"source": [
"# # delete the table if it already exists\n",
"cursor.query(\"\"\"DROP TABLE IF EXISTS InputUnstructured\n",
" \"\"\").execute()\n",
"\n",
"# create the table specifying the type of the prompt column\n",
"cursor.query(\"\"\"CREATE TABLE IF NOT EXISTS InputUnstructured (\n",
" input_rows TEXT)\n",
" \"\"\").execute()\n",
"\n",
"table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n",
"print(table)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"input_rows_list = [\"The touch screen on my tablet stopped working for no reason.\",\n",
"# \"Why does my computer take so long to start up? It's been like this for weeks.\",\n",
"# \"My phone battery dies too quickly. I just bought it!\",\n",
" \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n",
" \"The software update completely messed up my computer. Now nothing works properly.\"]\n",
"\n",
"for input_row in input_rows_list:\n",
" cursor.query(f\"\"\"INSERT INTO InputUnstructured (input_rows) VALUES (\"{input_row}\")\"\"\").execute()\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" _row_id input_rows\n",
"0 1 The touch screen on my tablet stopped working ...\n",
"1 2 My headphones won't connect to my phone anymor...\n",
"2 3 The software update completely messed up my co...\n"
]
}
],
"source": [
"table = cursor.query(\"SELECT * FROM InputUnstructured;\").df()\n",
"print(table)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"Issue Category\": \"hardware\",\n",
" \"Raw Issue String\": \"The touch screen on my tablet stopped working for no reason.\",\n",
" \"Issue Component\": \"touch screen\"\n",
"}\n",
"{\n",
" \"Issue Category\": \"hardware\",\n",
" \"Raw Issue String\": \"My headphones won't connect to my phone anymore, even though they used to work just fine.\",\n",
" \"Issue Component\": \"headphones\"\n",
"}\n",
"{\n",
" \"Issue Category\": \"software\",\n",
" \"Raw Issue String\": \"The software update completely messed up my computer. Now nothing works properly.\",\n",
" \"Issue Component\": \"computer\"\n",
"}\n"
]
}
],
"source": [
"table = cursor.query(\"SELECT ExtractColumns(input_rows) FROM InputUnstructured;\").df()\n",
"\n",
"for _, row in table.iterrows():\n",
" print(row['response'])\n",
"#print(table.iloc[1]['response'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 38f52e3

Please sign in to comment.