Add notebooks examples

CybercentreCanada · cccs-eric · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
commit ac4c22765c69c5671f5bd35d076fa911405fbcd5
diff --git a/docs/example/Spark.ipynb → docs/example/SparkConfigurationUsage.ipynb b/docs/example/Spark.ipynb → docs/example/SparkConfigurationUsage.ipynb
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c9b06840",
+   "metadata": {},
+   "source": [
+    "# Configuration and Usage"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -15,7 +23,7 @@
    },
    "outputs": [],
    "source": [
-    "from pyspark.sql import SparkSession"
+    "from pyspark.sql import SparkSession\n"
    ]
   },
   {
@@ -63,7 +71,7 @@
    "outputs": [],
    "source": [
     "from IPython.core.interactiveshell import InteractiveShell\n",
-    "InteractiveShell.ast_node_interactivity = 'all'"
+    "InteractiveShell.ast_node_interactivity = 'all'\n"
    ]
   },
   {
@@ -82,7 +90,7 @@
    },
    "outputs": [],
    "source": [
-    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
+    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
    ]
   },
   {
@@ -101,7 +109,7 @@
    "outputs": [],
    "source": [
     "%config SparkSql.cacheTTL=3600\n",
-    "%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\""
+    "%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\"\n"
    ]
   },
   {
@@ -143,7 +151,7 @@
    "source": [
     "df = spark.read.json(\"file:/path/to/contacts.json\")\n",
     "df.createOrReplaceTempView(\"CONTACTS_TABLE\")\n",
-    "df.printSchema()"
+    "df.printSchema()\n"
    ]
   },
   {
@@ -179,7 +187,7 @@
    "source": [
     "df = spark.read.json(\"file:/path/to/conversations.json\")\n",
     "df.createOrReplaceTempView(\"MESSAGES_TABLE\")\n",
-    "df.printSchema()"
+    "df.printSchema()\n"
    ]
   },
   {
@@ -207,7 +215,7 @@
     }
    ],
    "source": [
-    "%sparksql --refresh all"
+    "%sparksql --refresh all\n"
    ]
   },
   {
@@ -280,7 +288,7 @@
     }
    ],
    "source": [
-    "%sparksql SHOW TABLES"
+    "%sparksql SHOW TABLES\n"
    ]
   },
   {
@@ -563,7 +571,7 @@
     "SELECT\n",
     "    *\n",
     "FROM\n",
-    "    contacts_table AS con"
+    "    contacts_table AS con\n"
    ]
   },
   {
@@ -601,7 +609,7 @@
    "source": [
     "%%sparksql --view the_exploded_table --output skip\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n"
@@ -678,7 +686,7 @@
     }
    ],
    "source": [
-    "%sparksql SHOW TABLES"
+    "%sparksql SHOW TABLES\n"
    ]
   },
   {
@@ -1073,7 +1081,7 @@
    "source": [
     "%%sparksql --dataframe the_exploded_dataframe --output skip\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n"
@@ -1116,7 +1124,7 @@
     }
    ],
    "source": [
-    "the_exploded_dataframe.select('phoneNumber').show()"
+    "the_exploded_dataframe.select('phoneNumber').show()\n"
    ]
   },
   {
@@ -1173,7 +1181,7 @@
     "    contacts_table AS con\n",
     "--end-sparksql\n",
     "'''\n",
-    "print(sql)"
+    "print(sql)\n"
    ]
   },
   {
@@ -1187,13 +1195,13 @@
     "sql = '''\n",
     "--start-sparksql\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n",
     "--end-sparksql\n",
     "'''\n",
-    "print(sql)"
+    "print(sql)\n"
    ]
   },
   {
@@ -1225,7 +1233,7 @@
     }
    ],
    "source": [
-    "spark.sql(sql).show()"
+    "spark.sql(sql).show()\n"
    ]
   },
   {
@@ -1290,7 +1298,7 @@
     }
    ],
    "source": [
-    "%%sparksql?"
+    "%%sparksql?\n"
    ]
   },
   {
@@ -1389,7 +1397,7 @@
     "    TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
     "FROM\n",
     "RANGE\n",
-    "    (1, 400000, 1, 100) \n",
+    "    (1, 400000, 1, 100)\n",
     "UNION\n",
     "SELECT\n",
     "    id,\n",
@@ -1401,7 +1409,7 @@
     "    TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
     "FROM\n",
     "RANGE\n",
-    "    (1, 40000, 1, 100) \n"
+    "    (1, 40000, 1, 100)\n"
    ]
   },
   {
@@ -1484,14 +1492,6 @@
     "    contacts_table AS con\n",
     "    INNER JOIN messages_table AS mes ON mes.`first Name` = con.`first Name`\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "39fbee24-f69b-474a-903d-bf38d170ee0d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/example/SparkDataframe.ipynb b/docs/example/SparkDataframe.ipynb
@@ -13,7 +13,7 @@
     "import ipywidgets as widgets\n",
     "out = widgets.Output()\n",
     "with out:\n",
-    "    spark = SparkSession.builder.getOrCreate()"
+    "    spark = SparkSession.builder.getOrCreate()\n"
    ]
   },
   {
@@ -37,7 +37,7 @@
    ],
    "source": [
     "df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
-    "df"
+    "df\n"
    ]
   },
   {
@@ -51,7 +51,7 @@
    "source": [
     "from jupyterlab_sql_editor.ipython.sparkdf import register_display\n",
     "from jupyterlab_sql_editor.outputters.outputters import _display_results\n",
-    "register_display()"
+    "register_display()\n"
    ]
   },
   {
@@ -114,7 +114,7 @@
    "source": [
     "# change default display behaviour\n",
     "df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
-    "df"
+    "df\n"
    ]
   },
   {
@@ -126,7 +126,7 @@
    },
    "outputs": [],
    "source": [
-    "pdf = df.limit(1).toPandas()"
+    "pdf = df.limit(1).toPandas()\n"
    ]
   },
   {
@@ -156,7 +156,7 @@
    ],
    "source": [
     "# _display_results lets you configure the output\n",
-    "_display_results(pdf, output=\"html\", show_nonprinting=False)"
+    "_display_results(pdf, output=\"html\", show_nonprinting=False)\n"
    ]
   },
   {
@@ -181,7 +181,7 @@
     }
    ],
    "source": [
-    "_display_results(pdf, output=\"text\")"
+    "_display_results(pdf, output=\"text\")\n"
    ]
   },
   {
@@ -210,16 +210,8 @@
    ],
    "source": [
     "df = spark.read.json(\"file:/path/to/contacts.json\")\n",
-    "_display_results(pdf, output=\"json\")"
+    "_display_results(pdf, output=\"json\")\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1447b3e6-955b-4269-bc04-6395a9673036",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/example/SparkSQLEscapeControlChars.ipynb b/docs/example/SparkSQLEscapeControlChars.ipynb
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8ba62d82",
+   "metadata": {},
+   "source": [
+    "# Escaping Control Characters"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -13,7 +21,7 @@
     "\n",
     "spark = SparkSession.builder.getOrCreate()\n",
     "\n",
-    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
+    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
    ]
   },
   {
@@ -53,7 +61,7 @@
     "    '\\\\\\\\t' AS two_backslash_and_t,\n",
     "    '\\\\\\\\\\t' AS two_backslash_and_tab\n",
     "--end-sparksql\n",
-    "''').show()"
+    "''').show()\n"
    ]
   },
   {
@@ -191,14 +199,6 @@
     "    '\\\\\\\\t' AS two_backslash_and_t,\n",
     "    '\\\\\\\\\\t' AS two_backslash_and_tab\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "383afdca",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/example/SparkSyntaxDemo.ipynb b/docs/example/SparkSyntaxDemo.ipynb
@@ -5,7 +5,7 @@
    "id": "2eba0bf1",
    "metadata": {},
    "source": [
-    "# Spark Syntax Demo Notebook"
+    "# Spark Syntax Demo"
    ]
   },
   {
@@ -86,14 +86,6 @@
     "\n",
     "spark.sql(sql).show()\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cb776ce4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {