diff --git a/.gitignore b/.gitignore index 8bebb32e..5e145cee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ /venv/ -**/.venv/ -**/.DS_Store /node_modules/ /packages/*.whl /dist/ diff --git a/examples/index.json b/examples/index.json index 8c00bcbf..dfde82c6 100644 --- a/examples/index.json +++ b/examples/index.json @@ -10,9 +10,12 @@ "category": "Featured", "apps": [ "cpuinfo", + "orbit", "regularization", + "wordle", "plotly", - "ipyleaflet" + "ipyleaflet", + "camera" ] }, { @@ -24,11 +27,45 @@ "file_download", "insert_ui", "input_update", + "modules", "extra_packages", + "static_content", "fetch", "ipywidgets" ] }, + { + "category": "Inputs", + "apps": [ + "input_text", + "input_numeric", + "input_slider", + "input_checkbox", + "input_switch", + "input_checkbox_group", + "input_select", + "input_radio", + "input_text_area", + "input_date", + "input_date_range", + "input_password" + ] + }, + { + "category": "Outputs", + "apps": [ + "output_text", + "output_text_verbatim", + "output_ui", + "output_plot", + "output_table", + "output_data_frame_grid" + ] + }, + { + "category": "Layout", + "apps": ["shinyswatch", "layout_sidebar", "layout_two_column"] + }, { "category": "Reactivity", "apps": [ @@ -38,17 +75,6 @@ "reactive_value" ] }, - { - "category": "Shiny Core", - "apps": [ - "modules", - "plot_interact_basic", - "plot_interact_exclude", - "orbit", - "wordle", - "static_content" - ] - }, { "category": "Interactive plots", "apps": [ diff --git a/examples/python/app_with_plot/app.py b/examples/python/app_with_plot/app.py index 9d666e8f..959af30f 100644 --- a/examples/python/app_with_plot/app.py +++ b/examples/python/app_with_plot/app.py @@ -1,14 +1,26 @@ import matplotlib.pyplot as plt import numpy as np -from shiny import render -from shiny.express import ui, input +from shiny import App, render, ui -with ui.sidebar(): - ui.input_slider("n", "N", 0, 100, 20) +app_ui = ui.page_fluid( + ui.layout_sidebar( + ui.panel_sidebar( + ui.input_slider("n", "N", 0, 100, 20), + ), + ui.panel_main( + ui.output_plot("histogram"), + ), + ), +) -@render.plot(alt="A histogram") -def histogram(): - np.random.seed(19680801) - x = 100 + 15 * np.random.randn(437) - plt.hist(x, input.n(), density=True) +def server(input, output, session): + @output + @render.plot(alt="A histogram") + def histogram(): + np.random.seed(19680801) + x = 100 + 15 * np.random.randn(437) + plt.hist(x, input.n(), density=True) + + +app = App(app_ui, server, debug=True) diff --git a/examples/python/basic_app/app.py b/examples/python/basic_app/app.py index 6e04a762..f0fc6350 100644 --- a/examples/python/basic_app/app.py +++ b/examples/python/basic_app/app.py @@ -1,10 +1,16 @@ -from shiny import render -from shiny.express import ui, input +from shiny import App, render, ui +app_ui = ui.page_fluid( + ui.input_slider("n", "N", 0, 100, 20), + ui.output_text_verbatim("txt"), +) -ui.input_slider("n", "N", 0, 100, 20), +def server(input, output, session): + @output + @render.text + def txt(): + return f"n*2 is {input.n() * 2}" -@render.text -def txt(): - return f"n*2 is {input.n() * 2}" + +app = App(app_ui, server) diff --git a/examples/python/cpuinfo/app.py b/examples/python/cpuinfo/app.py index cb338b46..2b9e58db 100644 --- a/examples/python/cpuinfo/app.py +++ b/examples/python/cpuinfo/app.py @@ -3,16 +3,20 @@ if "pyodide" in sys.modules: # psutil doesn't work on pyodide--use fake data instead from fakepsutil import cpu_count, cpu_percent + + shinylive_message = "Note: the CPU data is simulated when running in Shinylive." else: from psutil import cpu_count, cpu_percent + shinylive_message = "" + +from math import ceil + import matplotlib +import matplotlib.pyplot as plt import numpy as np import pandas as pd -from helpers import plot_cpu - -from shiny import reactive, render -from shiny.express import input, ui, output +from shiny import App, reactive, render, ui # The agg matplotlib backend seems to be a little more efficient than the default when # running on macOS, and also gives more consistent results across operating systems @@ -23,9 +27,83 @@ # secs between samples SAMPLE_PERIOD = 1 + ncpu = cpu_count(logical=True) -ui.page_opts(fillable=True) +app_ui = ui.page_fluid( + ui.tags.style( + """ + /* Don't apply fade effect, it's constantly recalculating */ + .recalculating { + opacity: 1; + } + tbody > tr:last-child { + /*border: 3px solid var(--bs-dark);*/ + box-shadow: + 0 0 2px 1px #fff, /* inner white */ + 0 0 4px 2px #0ff, /* middle cyan */ + 0 0 5px 3px #00f; /* outer blue */ + } + #table table { + table-layout: fixed; + width: %s; + font-size: 0.8em; + } + th, td { + text-align: center; + } + """ + % f"{ncpu*4}em" + ), + ui.h3("CPU Usage %", class_="mt-2"), + ui.layout_sidebar( + ui.panel_sidebar( + ui.input_select( + "cmap", + "Colormap", + { + "inferno": "inferno", + "viridis": "viridis", + "copper": "copper", + "prism": "prism (not recommended)", + }, + ), + ui.p(ui.input_action_button("reset", "Clear history", class_="btn-sm")), + ui.input_switch("hold", "Freeze output", value=False), + shinylive_message, + class_="mb-3", + ), + ui.panel_main( + ui.div( + {"class": "card mb-3"}, + ui.div( + {"class": "card-body"}, + ui.h5({"class": "card-title mt-0"}, "Graphs"), + ui.output_plot("plot", height=f"{ncpu * 40}px"), + ), + ui.div( + {"class": "card-footer"}, + ui.input_numeric("sample_count", "Number of samples per graph", 50), + ), + ), + ui.div( + {"class": "card"}, + ui.div( + {"class": "card-body"}, + ui.h5({"class": "card-title m-0"}, "Heatmap"), + ), + ui.div( + {"class": "card-body overflow-auto pt-0"}, + ui.output_table("table"), + ), + ui.div( + {"class": "card-footer"}, + ui.input_numeric("table_rows", "Rows to display", 5), + ), + ), + ), + ), +) @reactive.Calc @@ -34,95 +112,127 @@ def cpu_current(): return cpu_percent(percpu=True) -cpu_history = reactive.Value(None) - +def server(input, output, session): + cpu_history = reactive.Value(None) -@reactive.Calc -def cpu_history_with_hold(): - # If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a - # regular read - if not input.hold(): - return cpu_history() - else: - # Even if frozen, we still want to respond to input.reset() - input.reset() - with reactive.isolate(): + @reactive.Calc + def cpu_history_with_hold(): + # If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a + # regular read + if not input.hold(): return cpu_history() - - -@reactive.Effect -def collect_cpu_samples(): - """cpu_percent() reports just the current CPU usage sample; this Effect gathers - them up and stores them in the cpu_history reactive value, in a numpy 2D array - (rows are CPUs, columns are time).""" - - new_data = np.vstack(cpu_current()) - with reactive.isolate(): - if cpu_history() is None: - cpu_history.set(new_data) else: - combined_data = np.hstack([cpu_history(), new_data]) - # Throw away extra data so we don't consume unbounded amounts of memory - if combined_data.shape[1] > MAX_SAMPLES: - combined_data = combined_data[:, -MAX_SAMPLES:] - cpu_history.set(combined_data) - - -@reactive.Effect(priority=100) -@reactive.event(input.reset) -def reset_history(): - cpu_history.set(None) - - -ui.tags.style( - """ - /* Don't apply fade effect, it's constantly recalculating */ - .recalculating { - opacity: 1; - } - """ -) - -with ui.sidebar(): - ui.input_select( - "cmap", - "Colormap", - { - "inferno": "inferno", - "viridis": "viridis", - "copper": "copper", - "prism": "prism (not recommended)", - }, - ), - ui.input_action_button("reset", "Clear history", class_="btn-sm") - ui.input_switch("hold", "Freeze output", value=False) - -with ui.card(): - with ui.navset_bar(title="CPU %"): - with ui.nav_panel(title="Graphs"): - ui.input_numeric("sample_count", "Number of samples per graph", 50) - - @render.plot - def plot(): - return plot_cpu( - cpu_history_with_hold(), input.sample_count(), ncpu, input.cmap() - ) - - with ui.nav_panel(title="Heatmap"): - ui.input_numeric("table_rows", "Rows to display", 15) - - @output(suspend_when_hidden=False) - @render.table - def table(): - history = cpu_history_with_hold() - latest = pd.DataFrame(history).transpose().tail(input.table_rows()) - if latest.shape[0] == 0: - return latest - return ( - latest.style.format(precision=0) - .hide(axis="index") - .set_table_attributes( - 'class="dataframe shiny-table table table-borderless font-monospace"' - ) - .background_gradient(cmap=input.cmap(), vmin=0, vmax=100) - ) + # Even if frozen, we still want to respond to input.reset() + input.reset() + with reactive.isolate(): + return cpu_history() + + @reactive.Effect + def collect_cpu_samples(): + """cpu_percent() reports just the current CPU usage sample; this Effect gathers + them up and stores them in the cpu_history reactive value, in a numpy 2D array + (rows are CPUs, columns are time).""" + + new_data = np.vstack(cpu_current()) + with reactive.isolate(): + if cpu_history() is None: + cpu_history.set(new_data) + else: + combined_data = np.hstack([cpu_history(), new_data]) + # Throw away extra data so we don't consume unbounded amounts of memory + if combined_data.shape[1] > MAX_SAMPLES: + combined_data = combined_data[:, -MAX_SAMPLES:] + cpu_history.set(combined_data) + + @reactive.Effect(priority=100) + @reactive.event(input.reset) + def reset_history(): + cpu_history.set(None) + + @output + @render.plot + def plot(): + history = cpu_history_with_hold() + + if history is None: + history = np.array([]) + history.shape = (ncpu, 0) + + nsamples = input.sample_count() + + # Throw away samples too old to fit on the plot + if history.shape[1] > nsamples: + history = history[:, -nsamples:] + + ncols = 2 + nrows = int(ceil(ncpu / ncols)) + fig, axeses = plt.subplots( + nrows=nrows, + ncols=ncols, + squeeze=False, + ) + for i in range(0, ncols * nrows): + row = i // ncols + col = i % ncols + axes = axeses[row, col] + if i >= len(history): + axes.set_visible(False) + continue + data = history[i] + axes.yaxis.set_label_position("right") + axes.yaxis.tick_right() + axes.set_xlim(-(nsamples - 1), 0) + axes.set_ylim(0, 100) + + assert len(data) <= nsamples + + # Set up an array of x-values that will right-align the data relative to the + # plotting area + x = np.arange(0, len(data)) + x = np.flip(-x) + + # Color bars by cmap + color = plt.get_cmap(input.cmap())(data / 100) + axes.bar(x, data, color=color, linewidth=0, width=1.0) + + axes.set_yticks([25, 50, 75]) + for ytl in axes.get_yticklabels(): + if col == ncols - 1 or i == ncpu - 1 or True: + ytl.set_fontsize(7) + else: + ytl.set_visible(False) + hide_ticks(axes.yaxis) + for xtl in axes.get_xticklabels(): + xtl.set_visible(False) + hide_ticks(axes.xaxis) + axes.grid(True, linewidth=0.25) + + return fig + + @output + @render.table + def table(): + history = cpu_history_with_hold() + latest = pd.DataFrame(history).transpose().tail(input.table_rows()) + if latest.shape[0] == 0: + return latest + return ( + latest.style.format(precision=0) + .hide(axis="index") + .set_table_attributes( + 'class="dataframe shiny-table table table-borderless font-monospace"' + ) + .background_gradient(cmap=input.cmap(), vmin=0, vmax=100) + ) + + +def hide_ticks(axis): + for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]: + for tick in ticks: + tick.tick1line.set_visible(False) + tick.tick2line.set_visible(False) + tick.label1.set_visible(False) + tick.label2.set_visible(False) + + +app = App(app_ui, server) diff --git a/examples/python/cpuinfo/helpers.py b/examples/python/cpuinfo/helpers.py deleted file mode 100644 index 00b65fc7..00000000 --- a/examples/python/cpuinfo/helpers.py +++ /dev/null @@ -1,69 +0,0 @@ -from math import ceil - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - - -def hide_ticks(axis): - for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]: - for tick in ticks: - tick.tick1line.set_visible(False) - tick.tick2line.set_visible(False) - tick.label1.set_visible(False) - tick.label2.set_visible(False) - - -def plot_cpu(history, nsamples, ncpu, cmap): - if history is None: - history = np.array([]) - history.shape = (ncpu, 0) - - # Throw away samples too old to fit on the plot - if history.shape[1] > nsamples: - history = history[:, -nsamples:] - - ncols = 2 - nrows = int(ceil(ncpu / ncols)) - fig, axeses = plt.subplots( - nrows=nrows, - ncols=ncols, - squeeze=False, - ) - for i in range(0, ncols * nrows): - row = i // ncols - col = i % ncols - axes = axeses[row, col] - if i >= len(history): - axes.set_visible(False) - continue - data = history[i] - axes.yaxis.set_label_position("right") - axes.yaxis.tick_right() - axes.set_xlim(-(nsamples - 1), 0) - axes.set_ylim(0, 100) - - assert len(data) <= nsamples - - # Set up an array of x-values that will right-align the data relative to the - # plotting area - x = np.arange(0, len(data)) - x = np.flip(-x) - - # Color bars by cmap - color = plt.get_cmap(cmap)(data / 100) - axes.bar(x, data, color=color, linewidth=0, width=1.0) - - axes.set_yticks([25, 50, 75]) - for ytl in axes.get_yticklabels(): - if col == ncols - 1 or i == ncpu - 1 or True: - ytl.set_fontsize(7) - else: - ytl.set_visible(False) - hide_ticks(axes.yaxis) - for xtl in axes.get_xticklabels(): - xtl.set_visible(False) - hide_ticks(axes.xaxis) - axes.grid(True, linewidth=0.25) - - return fig diff --git a/examples/python/file_download/app.py b/examples/python/file_download/app.py index c68ec25c..34635e79 100644 --- a/examples/python/file_download/app.py +++ b/examples/python/file_download/app.py @@ -5,51 +5,71 @@ import matplotlib.pyplot as plt import numpy as np -from shiny.express import ui, input -from shiny import render - -with ui.layout_columns(): - with ui.card(): - ui.card_header("Download a pre-existing file, using its existing name on disk.") - - @render.download(label="Download CSV", filename="mtcars.csv") - def download1(): - # This is the simplest case. The implementation simply returns the path to a - # file on disk. - path = Path(__file__).parent / "mtcars.csv" - return str(path) - - with ui.card(): - ui.card_header("Download a PNG that is generated dynamically.") - ui.input_text("title", "Plot title", "Random scatter plot") - ui.input_slider("num_points", "Number of data points", 1, 100, 50) - - @render.download(label="Download PNG", filename="image.png") - def download2(): - # Another way to implement a file download is by yielding bytes; either all at - # once, like in this case, or by yielding multiple times. When using this - # approach, you should pass a filename argument to @session.download, which - # determines what the browser will name the downloaded file. - x = np.random.uniform(size=input.num_points()) - y = np.random.uniform(size=input.num_points()) - plt.figure() - plt.scatter(x, y) - plt.title(input.title()) - with io.BytesIO() as buf: - plt.savefig(buf, format="png") - yield buf.getvalue() - - with ui.card(): - ui.card_header("Download a file with name that is generated dynamically.") - - @render.download( - label="Dynamic file name", - filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv", - ) - async def download3(): - # This version uses a function to generate the filename. It also yields data - # multiple times. - await asyncio.sleep(0.25) - yield "one,two,three\n" - yield "新,1,2\n" - yield "型,4,5\n" +from shiny import App, ui + + +# A card component wrapper. +def ui_card(title, *args): + return ( + ui.div( + {"class": "card mb-4"}, + ui.div(title, class_="card-header"), + ui.div({"class": "card-body"}, *args), + ), + ) + + +app_ui = ui.page_fluid( + ui_card( + "Download a pre-existing file, using its existing name on disk.", + ui.download_button("download1", "Download CSV"), + ), + ui_card( + "Download a PNG that is generated dynamically.", + ui.input_text("title", "Plot title", "Random scatter plot"), + ui.input_slider("num_points", "Number of data points", 1, 100, 50), + ui.download_button("download2", "Download PNG"), + ), + ui_card( + "Download a file with name that is generated dynamically.", + ui.download_button("download3", "Download CSV"), + ), +) + + +def server(input, output, session): + @session.download() + def download1(): + # This is the simplest case. The implementation simply returns the path to a + # file on disk. + path = Path(__file__).parent / "mtcars.csv" + return str(path) + + @session.download(filename="image.png") + def download2(): + # Another way to implement a file download is by yielding bytes; either all at + # once, like in this case, or by yielding multiple times. When using this + # approach, you should pass a filename argument to @session.download, which + # determines what the browser will name the downloaded file. + x = np.random.uniform(size=input.num_points()) + y = np.random.uniform(size=input.num_points()) + plt.figure() + plt.scatter(x, y) + plt.title(input.title()) + with io.BytesIO() as buf: + plt.savefig(buf, format="png") + yield buf.getvalue() + + @session.download( + filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv" + ) + async def download3(): + # This version uses a function to generate the filename. It also yields data + # multiple times. + await asyncio.sleep(0.25) + yield "one,two,three\n" + yield "新,1,2\n" + yield "型,4,5\n" + + +app = App(app_ui, server) diff --git a/examples/python/file_upload/app.py b/examples/python/file_upload/app.py index 14fade80..8b279894 100644 --- a/examples/python/file_upload/app.py +++ b/examples/python/file_upload/app.py @@ -2,13 +2,57 @@ from math import ceil from typing import List -from shiny import render -from shiny.express import ui, input +from shiny import App, render, ui +app_ui = ui.page_fluid( + ui.input_file("file1", "Choose a file to upload:", multiple=True), + ui.input_radio_buttons("type", "Type:", ["Binary", "Text"]), + ui.output_text_verbatim("file_content"), +) -MAX_SIZE = 50000 -ui.input_file("file1", "Choose a file to upload:", multiple=True), -ui.input_radio_buttons("type", "Type:", ["Text", "Binary"]), + +def server(input, output, session): + MAX_SIZE = 50000 + + @output + @render.text + def file_content(): + file_infos = input.file1() + if not file_infos: + return + + # file_infos is a list of dicts; each dict represents one file. Example: + # [ + # { + # 'name': 'data.csv', + # 'size': 2601, + # 'type': 'text/csv', + # 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv' + # } + # ] + out_str = "" + for file_info in file_infos: + out_str += ( + "=" * 47 + + "\n" + + file_info["name"] + + "\nMIME type: " + + str(mimetypes.guess_type(file_info["name"])[0]) + ) + if file_info["size"] > MAX_SIZE: + out_str += f"\nTruncating at {MAX_SIZE} bytes." + + out_str += "\n" + "=" * 47 + "\n" + + if input.type() == "Text": + with open(file_info["datapath"], "r") as f: + out_str += f.read(MAX_SIZE) + else: + with open(file_info["datapath"], "rb") as f: + data = f.read(MAX_SIZE) + out_str += format_hexdump(data) + + return out_str def format_hexdump(data: bytes) -> str: @@ -29,41 +73,4 @@ def group_into_blocks(x: List[str], blocksize: int): ] -@render.text -def file_content(): - file_infos = input.file1() - if not file_infos: - return - - # file_infos is a list of dicts; each dict represents one file. Example: - # [ - # { - # 'name': 'data.csv', - # 'size': 2601, - # 'type': 'text/csv', - # 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv' - # } - # ] - out_str = "" - for file_info in file_infos: - out_str += ( - "=" * 47 - + "\n" - + file_info["name"] - + "\nMIME type: " - + str(mimetypes.guess_type(file_info["name"])[0]) - ) - if file_info["size"] > MAX_SIZE: - out_str += f"\nTruncating at {MAX_SIZE} bytes." - - out_str += "\n" + "=" * 47 + "\n" - - if input.type() == "Text": - with open(file_info["datapath"], "r") as f: - out_str += f.read(MAX_SIZE) - else: - with open(file_info["datapath"], "rb") as f: - data = f.read(MAX_SIZE) - out_str += format_hexdump(data) - - return out_str +app = App(app_ui, server) diff --git a/examples/python/multiple_source_files/app.py b/examples/python/multiple_source_files/app.py index 4f056560..32884c04 100644 --- a/examples/python/multiple_source_files/app.py +++ b/examples/python/multiple_source_files/app.py @@ -1,11 +1,18 @@ from shiny import App, render, ui -from shiny.express import ui, input from utils import square -ui.input_slider("n", "N", 0, 100, 20), +app_ui = ui.page_fluid( + ui.input_slider("n", "N", 0, 100, 20), + ui.output_text_verbatim("txt"), +) -@render.text -def txt(): - val = square(input.n()) - return f"{input.n()} squared is {val}" +def server(input, output, session): + @output + @render.text + def txt(): + val = square(input.n()) + return f"{input.n()} squared is {val}" + + +app = App(app_ui, server, debug=True) diff --git a/examples/python/read_local_csv_file/app.py b/examples/python/read_local_csv_file/app.py index 5b5834b2..e02ebb05 100644 --- a/examples/python/read_local_csv_file/app.py +++ b/examples/python/read_local_csv_file/app.py @@ -1,13 +1,22 @@ from pathlib import Path import pandas -from shiny import render +from shiny import App, render, ui -# We need to import something from express to activate express mode -import shiny.express +app_ui = ui.page_fluid( + ui.output_table("table"), +) -@render.table -def data_frame(): - infile = Path(__file__).parent / "mtcars.csv" - return pandas.read_csv(infile) +def server(input, output, session): + @output + @render.table + def table(): + infile = Path(__file__).parent / "mtcars.csv" + df = pandas.read_csv(infile) + # Use the DataFrame's to_html() function to convert it to an HTML table, and + # then wrap with ui.HTML() so Shiny knows to treat it as raw HTML. + return df + + +app = App(app_ui, server) diff --git a/examples/python/regularization/app.py b/examples/python/regularization/app.py index da824cf3..2576cad8 100644 --- a/examples/python/regularization/app.py +++ b/examples/python/regularization/app.py @@ -8,49 +8,202 @@ # Import custom Python Functions from local file from compare import compare, sim_data -from shiny import reactive, render -from shiny.express import ui, input +from shiny import App, reactive, render, ui # data nsims = 100 sim = [sim_data(n=1000) for i in range(0, nsims)] -# app - -ui.tags.script( - src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" -), -ui.tags.script("if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);") - - -@reactive.Calc -def models(): - sim_alpha = [compare(df, alpha=input.a()) for df in sim] - sim_alpha = pd.concat(sim_alpha) - - return sim_alpha - - -with ui.div(class_="col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"): - ui.h3("How Does Regularization Strength Affect Coefficient Estimates?"), - -with ui.div(class_="col-md-78 col-lg-5 py-4 mx-auto"): - ui.input_slider( - "a", - "Select a Regularization Strength:", - min=0.000000001, - max=1, - value=0.1, - step=0.01, - width="100%", - ) - ui.p( - {"class": "pt-4 small"}, - "(Each time you change the slider input, the simulation will take some time to run.)", - ) - -with ui.div(class_="col-lg-11 py-5 mx-auto"): +# app +app_ui = ui.page_fixed( + # add head that allows LaTeX to be displayed via MathJax + ui.head_content( + ui.tags.script( + src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" + ), + ui.tags.script( + "if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);" + ), + ), + ui.column( + 10, + {"class": "col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"}, + # Title + ui.h1("How Does Regularization Strength Affect Coefficient Estimates?"), + # input slider + ), + ui.column( + 10, + {"class": "col-md-78 col-lg-5 py-4 mx-auto"}, + # Title + ui.input_slider( + "a", + "Select a Regularization Strength:", + 0.000000001, + 1, + 0.1, + step=0.01, + width="100%", + ), + ui.p( + {"class": "pt-4 small"}, + "(Each time you change the slider input, the simulation will take some time to run.)", + ), + ), + ui.column( + 12, + {"class": "col-lg-11 py-5 mx-auto"}, + # output plot + ui.output_plot("plot"), + ), + # Explanation and Explore text row with two equal-width columns + ui.row( + ui.column( + 10, + {"class": "col-lg-6 py-5 mx-auto"}, + ui.h4("Explanation"), + ui.p( + """ + When we train Machine Learning models like linear regressions, logistic + regressions, or neural networks, we do so by defining a loss function + and minimizing that loss function. A loss function is a metric for + measuring how your model is performing where lower is better. For + example, Mean Squared Error is a loss function that measures the squared + distance (on average) between a model's guesses and the true values.""" + ), + # LaTeX + ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$"), + ui.p( + """ + Regularization works by adding a penalty to the loss function in order + to penalize large model parameters. In Linear Regression, the penalty + increases when the size of the coefficients increases. Because the loss + function is made up of two things: the original loss function (the MSE, + here) and the penalty, predictors must 'pull their weight' by reducing + the MSE enough to be 'worth' the penalty. This causes small, unimportant + predictors to have small or zero coefficients.""" + ), + ui.p( + """ + LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO + adds a penalty to the loss function by taking the absolute value of each + parameter/coefficient, and adding them all together. Ridge adds a + penalty to the loss function by taking the square of each + parameter/coefficient, and adding them all together.""" + ), + # LaTeX + ui.p( + "$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$" + ), + ui.p( + "$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$" + ), + ui.p( + """ + When using regularization, we must choose the regularization strength + (see slider above) which is a number that scales how harshly we + penalize. If we multiply the penalty by 0, that's the same as not having + a penalty at all. But if we multiply the penalty by 500, that would + penalize the parameters a lot more.""" + ), + ui.p("$$\\lambda \\text{ is the regularization strength.}$$"), + ), + ), + ui.row( + ui.column( + 10, + {"class": "col-lg-6 py-5 mx-auto"}, + ui.h4("Explore"), + ui.h5("Comparing LASSO, Ridge, and Linear Regression"), + ui.p( + """ + With the slider at 0.1 (the default) look at the boxplot at the top of + the page. This shows the coefficients from 1000 simulated data sets. For + each data set the 'vowels' (A, E, I, O, U, Y, W) do have some + relationship with the outcome (X) that our model is predicting. A has + the largest effect then E, I, O, U, Y and finally W has the smallest + effect on X. The Consonants (B,C,D,G,H,J,K) have absolutely no effect on + X.""" + ), + ui.p("Look at the Graph and ask yourself these questions:"), + ui.tags.ul( + ui.tags.li( + """ + Which model (Linear, LASSO, Ridge) tends to have the highest + coefficients? What does this tell you about the various + penalties each model has?""" + ), + ui.tags.li( + """ + What happens to the LASSO coefficients for the Consonant + predictors (B-K) which have no real effect on X?""" + ), + ui.tags.li( + """ + The Linear and Ridge Coefficients look similar for the + Consonants (B-K) but what's slightly different between them? + What does that tell you about what Ridge penalties do?""" + ), + ui.tags.li( + """ + Are the larger effects (A-I) affected differently than the + smaller effects (O-W) when you increase the Regularization + Strength?""" + ), + ), + ui.h5("Comparing Different Regularization Strengths"), + ui.p( + """ + Now, using the slider at the top of the page, change the Regularization + Strength. Try values that are very low, moderate, and very high.""" + ), + ui.p("Look at the Graph and ask yourself these questions:"), + ui.tags.ul( + ui.tags.li( + """ + What happens to the LASSO and Ridge models when the Regularization + Strength is almost 0?""" + ), + ui.tags.li( + """ + What happens to the LASSO model's coefficients when the + Regularization Strength is very high?""" + ), + ui.tags.li( + """ + Do the Linear Regression coefficients change when you change + Regularization Strength? (if so, why, if not, why not?)""" + ), + ), + ), + ), + # output plots separated by real effects (vowels), and zero-effects (consonants) + ui.column( + 12, + {"class": "col-lg-11 py-5 mx-auto text-center"}, + ui.h2("Plots Separated by Vowels and Consonants"), + ), + ui.column( + 12, + {"class": "col-lg-11 mb-5 pb-5 mx-auto"}, + ui.output_plot("plotVOWELS"), + ui.output_plot("plotCONSONANTS"), + ), +) + + +def server(input, output, session): + # reactive Calc that runs LASSO, Ridge, and Linear models on generated data + @reactive.Calc + def models(): + sim_alpha = [compare(df, alpha=input.a()) for df in sim] + sim_alpha = pd.concat(sim_alpha) + + return sim_alpha + + # output plot of all simulation coefficients + @output @render.plot() def plot(): # get data from reactive Calc @@ -85,95 +238,8 @@ def plot(): ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig - -with ui.div(class_="col-lg-6 py-5 mx-auto"): - ui.markdown( - """ - ### Explanation - - When we train Machine Learning models like linear regressions, logistic - regressions, or neural networks, we do so by defining a loss function - and minimizing that loss function. A loss function is a metric for - measuring how your model is performing where lower is better. For - example, Mean Squared Error is a loss function that measures the squared - distance (on average) between a model's guesses and the true values. - """ - ) - # LaTeX - ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$") - ui.p( - """ - Regularization works by adding a penalty to the loss function in order - to penalize large model parameters. In Linear Regression, the penalty - increases when the size of the coefficients increases. Because the loss - function is made up of two things: the original loss function (the MSE, - here) and the penalty, predictors must 'pull their weight' by reducing - the MSE enough to be 'worth' the penalty. This causes small, unimportant - predictors to have small or zero coefficients. - - LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO - adds a penalty to the loss function by taking the absolute value of each - parameter/coefficient, and adding them all together. Ridge adds a - penalty to the loss function by taking the square of each - parameter/coefficient, and adding them all together.""" - ) - # LaTeX - ui.p( - "$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$" - ) - ui.p( - "$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$" - ) - ui.p( - """ - When using regularization, we must choose the regularization strength - (see slider above) which is a number that scales how harshly we - penalize. If we multiply the penalty by 0, that's the same as not having - a penalty at all. But if we multiply the penalty by 500, that would - penalize the parameters a lot more.""" - ) - ui.p("$$\\lambda \\text{ is the regularization strength.}$$") - - -with ui.div(class_="col-lg-6 py-5 mx-auto"): - ui.markdown( - """ - ### Explore - - #### Comparing LASSO, Ridge, and Linear Regression - With the slider at 0.1 (the default) look at the boxplot at the top of the page. This shows the - coefficients from 1000 simulated data sets. For each data set the 'vowels' (A, E, I, O, U, Y, W) - do have some relationship with the outcome (X) that our model is predicting. A has the largest - effect then E, I, O, U, Y and finally W has the smallest effect on X. The Consonants (B,C,D,G,H,J,K) - have absolutely no effect on X. - - Look at the Graph and ask yourself these questions: - - Which model (Linear, LASSO, Ridge) tends to have the highest coefficients? What does this tell - you about the various penalties each model has? - - What happens to the LASSO coefficients for the Consonant predictors (B-K) which have no real - effect on X? - - The Linear and Ridge Coefficients look similar for the Consonants (B-K) but what's slightly - different between them? What does that tell you about what Ridge penalties do? - - Are the larger effects (A-I) affected differently than the smaller effects (O-W) when you increase - the Regularization Strength? - - #### Comparing Different Regularization Strengths - Now, using the slider at the top of the page, change the Regularization Strength. Try values that - are very low, moderate, and very high. - - Look at the Graph and ask yourself these questions: - - What happens to the LASSO and Ridge models when the Regularization Strength is almost 0? - - What happens to the LASSO model's coefficients when the Regularization Strength is very high? - - Do the Linear Regression coefficients change when you change Regularization Strength? (if so, why, - if not, why not?) - """ - ) - -with ui.div(class_="col-lg-11 py-5 mx-auto text-center"): - ui.h2("Plots Separated by Vowels and Consonants") - -with ui.div(class_="col-lg-11 mb-5 pb-5 mx-auto"): - + # output plot of all simulation coefficients (vowels only) + @output @render.plot() def plotVOWELS(): # get data from reactive Calc @@ -195,6 +261,8 @@ def plotVOWELS(): ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig + # output plot of all simulation coefficients (consonants only) + @output @render.plot() def plotCONSONANTS(): # get data from reactive Calc @@ -218,3 +286,6 @@ def plotCONSONANTS(): tt = "CONSONANT Coefficient Estimates when alpha = " + str(input.a()) ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig + + +app = App(app_ui, server)