Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH deploy build #136

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add notebooks examples
cccs-eric committed Nov 3, 2023
commit ac4c22765c69c5671f5bd35d076fa911405fbcd5
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c9b06840",
"metadata": {},
"source": [
"# Configuration and Usage"
]
},
{
"cell_type": "code",
"execution_count": 1,
@@ -15,7 +23,7 @@
},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession"
"from pyspark.sql import SparkSession\n"
]
},
{
@@ -63,7 +71,7 @@
"outputs": [],
"source": [
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = 'all'"
"InteractiveShell.ast_node_interactivity = 'all'\n"
]
},
{
@@ -82,7 +90,7 @@
},
"outputs": [],
"source": [
"%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
"%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
]
},
{
@@ -101,7 +109,7 @@
"outputs": [],
"source": [
"%config SparkSql.cacheTTL=3600\n",
"%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\""
"%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\"\n"
]
},
{
@@ -143,7 +151,7 @@
"source": [
"df = spark.read.json(\"file:/path/to/contacts.json\")\n",
"df.createOrReplaceTempView(\"CONTACTS_TABLE\")\n",
"df.printSchema()"
"df.printSchema()\n"
]
},
{
@@ -179,7 +187,7 @@
"source": [
"df = spark.read.json(\"file:/path/to/conversations.json\")\n",
"df.createOrReplaceTempView(\"MESSAGES_TABLE\")\n",
"df.printSchema()"
"df.printSchema()\n"
]
},
{
@@ -207,7 +215,7 @@
}
],
"source": [
"%sparksql --refresh all"
"%sparksql --refresh all\n"
]
},
{
@@ -280,7 +288,7 @@
}
],
"source": [
"%sparksql SHOW TABLES"
"%sparksql SHOW TABLES\n"
]
},
{
@@ -563,7 +571,7 @@
"SELECT\n",
" *\n",
"FROM\n",
" contacts_table AS con"
" contacts_table AS con\n"
]
},
{
@@ -601,7 +609,7 @@
"source": [
"%%sparksql --view the_exploded_table --output skip\n",
"SELECT\n",
" *, \n",
" *,\n",
" explode(con.phoneNumbers) as phoneNumber\n",
"FROM\n",
" contacts_table AS con\n"
@@ -678,7 +686,7 @@
}
],
"source": [
"%sparksql SHOW TABLES"
"%sparksql SHOW TABLES\n"
]
},
{
@@ -1073,7 +1081,7 @@
"source": [
"%%sparksql --dataframe the_exploded_dataframe --output skip\n",
"SELECT\n",
" *, \n",
" *,\n",
" explode(con.phoneNumbers) as phoneNumber\n",
"FROM\n",
" contacts_table AS con\n"
@@ -1116,7 +1124,7 @@
}
],
"source": [
"the_exploded_dataframe.select('phoneNumber').show()"
"the_exploded_dataframe.select('phoneNumber').show()\n"
]
},
{
@@ -1173,7 +1181,7 @@
" contacts_table AS con\n",
"--end-sparksql\n",
"'''\n",
"print(sql)"
"print(sql)\n"
]
},
{
@@ -1187,13 +1195,13 @@
"sql = '''\n",
"--start-sparksql\n",
"SELECT\n",
" *, \n",
" *,\n",
" explode(con.phoneNumbers) as phoneNumber\n",
"FROM\n",
" contacts_table AS con\n",
"--end-sparksql\n",
"'''\n",
"print(sql)"
"print(sql)\n"
]
},
{
@@ -1225,7 +1233,7 @@
}
],
"source": [
"spark.sql(sql).show()"
"spark.sql(sql).show()\n"
]
},
{
@@ -1290,7 +1298,7 @@
}
],
"source": [
"%%sparksql?"
"%%sparksql?\n"
]
},
{
@@ -1389,7 +1397,7 @@
" TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
"FROM\n",
"RANGE\n",
" (1, 400000, 1, 100) \n",
" (1, 400000, 1, 100)\n",
"UNION\n",
"SELECT\n",
" id,\n",
@@ -1401,7 +1409,7 @@
" TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
"FROM\n",
"RANGE\n",
" (1, 40000, 1, 100) \n"
" (1, 40000, 1, 100)\n"
]
},
{
@@ -1484,14 +1492,6 @@
" contacts_table AS con\n",
" INNER JOIN messages_table AS mes ON mes.`first Name` = con.`first Name`\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39fbee24-f69b-474a-903d-bf38d170ee0d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
24 changes: 8 additions & 16 deletions docs/example/SparkDataframe.ipynb
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
"import ipywidgets as widgets\n",
"out = widgets.Output()\n",
"with out:\n",
" spark = SparkSession.builder.getOrCreate()"
" spark = SparkSession.builder.getOrCreate()\n"
]
},
{
@@ -37,7 +37,7 @@
],
"source": [
"df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
"df"
"df\n"
]
},
{
@@ -51,7 +51,7 @@
"source": [
"from jupyterlab_sql_editor.ipython.sparkdf import register_display\n",
"from jupyterlab_sql_editor.outputters.outputters import _display_results\n",
"register_display()"
"register_display()\n"
]
},
{
@@ -114,7 +114,7 @@
"source": [
"# change default display behaviour\n",
"df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
"df"
"df\n"
]
},
{
@@ -126,7 +126,7 @@
},
"outputs": [],
"source": [
"pdf = df.limit(1).toPandas()"
"pdf = df.limit(1).toPandas()\n"
]
},
{
@@ -156,7 +156,7 @@
],
"source": [
"# _display_results lets you configure the output\n",
"_display_results(pdf, output=\"html\", show_nonprinting=False)"
"_display_results(pdf, output=\"html\", show_nonprinting=False)\n"
]
},
{
@@ -181,7 +181,7 @@
}
],
"source": [
"_display_results(pdf, output=\"text\")"
"_display_results(pdf, output=\"text\")\n"
]
},
{
@@ -210,16 +210,8 @@
],
"source": [
"df = spark.read.json(\"file:/path/to/contacts.json\")\n",
"_display_results(pdf, output=\"json\")"
"_display_results(pdf, output=\"json\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1447b3e6-955b-4269-bc04-6395a9673036",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
20 changes: 10 additions & 10 deletions docs/example/SparkSQLEscapeControlChars.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8ba62d82",
"metadata": {},
"source": [
"# Escaping Control Characters"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -13,7 +21,7 @@
"\n",
"spark = SparkSession.builder.getOrCreate()\n",
"\n",
"%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
"%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
]
},
{
@@ -53,7 +61,7 @@
" '\\\\\\\\t' AS two_backslash_and_t,\n",
" '\\\\\\\\\\t' AS two_backslash_and_tab\n",
"--end-sparksql\n",
"''').show()"
"''').show()\n"
]
},
{
@@ -191,14 +199,6 @@
" '\\\\\\\\t' AS two_backslash_and_t,\n",
" '\\\\\\\\\\t' AS two_backslash_and_tab\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "383afdca",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
10 changes: 1 addition & 9 deletions docs/example/SparkSyntaxDemo.ipynb
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
"id": "2eba0bf1",
"metadata": {},
"source": [
"# Spark Syntax Demo Notebook"
"# Spark Syntax Demo"
]
},
{
@@ -86,14 +86,6 @@
"\n",
"spark.sql(sql).show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb776ce4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Loading