diff --git a/.gitignore b/.gitignore index 5e145cee..8bebb32e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /venv/ +**/.venv/ +**/.DS_Store /node_modules/ /packages/*.whl /dist/ diff --git a/examples/index.json b/examples/index.json index dfde82c6..8c00bcbf 100644 --- a/examples/index.json +++ b/examples/index.json @@ -10,12 +10,9 @@ "category": "Featured", "apps": [ "cpuinfo", - "orbit", "regularization", - "wordle", "plotly", - "ipyleaflet", - "camera" + "ipyleaflet" ] }, { @@ -27,45 +24,11 @@ "file_download", "insert_ui", "input_update", - "modules", "extra_packages", - "static_content", "fetch", "ipywidgets" ] }, - { - "category": "Inputs", - "apps": [ - "input_text", - "input_numeric", - "input_slider", - "input_checkbox", - "input_switch", - "input_checkbox_group", - "input_select", - "input_radio", - "input_text_area", - "input_date", - "input_date_range", - "input_password" - ] - }, - { - "category": "Outputs", - "apps": [ - "output_text", - "output_text_verbatim", - "output_ui", - "output_plot", - "output_table", - "output_data_frame_grid" - ] - }, - { - "category": "Layout", - "apps": ["shinyswatch", "layout_sidebar", "layout_two_column"] - }, { "category": "Reactivity", "apps": [ @@ -75,6 +38,17 @@ "reactive_value" ] }, + { + "category": "Shiny Core", + "apps": [ + "modules", + "plot_interact_basic", + "plot_interact_exclude", + "orbit", + "wordle", + "static_content" + ] + }, { "category": "Interactive plots", "apps": [ diff --git a/examples/python/app_with_plot/app.py b/examples/python/app_with_plot/app.py index 959af30f..9d666e8f 100644 --- a/examples/python/app_with_plot/app.py +++ b/examples/python/app_with_plot/app.py @@ -1,26 +1,14 @@ import matplotlib.pyplot as plt import numpy as np -from shiny import App, render, ui +from shiny import render +from shiny.express import ui, input -app_ui = ui.page_fluid( - ui.layout_sidebar( - ui.panel_sidebar( - ui.input_slider("n", "N", 0, 100, 20), - ), - ui.panel_main( - ui.output_plot("histogram"), - ), - ), -) +with ui.sidebar(): + ui.input_slider("n", "N", 0, 100, 20) -def server(input, output, session): - @output - @render.plot(alt="A histogram") - def histogram(): - np.random.seed(19680801) - x = 100 + 15 * np.random.randn(437) - plt.hist(x, input.n(), density=True) - - -app = App(app_ui, server, debug=True) +@render.plot(alt="A histogram") +def histogram(): + np.random.seed(19680801) + x = 100 + 15 * np.random.randn(437) + plt.hist(x, input.n(), density=True) diff --git a/examples/python/basic_app/app.py b/examples/python/basic_app/app.py index f0fc6350..6e04a762 100644 --- a/examples/python/basic_app/app.py +++ b/examples/python/basic_app/app.py @@ -1,16 +1,10 @@ -from shiny import App, render, ui +from shiny import render +from shiny.express import ui, input -app_ui = ui.page_fluid( - ui.input_slider("n", "N", 0, 100, 20), - ui.output_text_verbatim("txt"), -) +ui.input_slider("n", "N", 0, 100, 20), -def server(input, output, session): - @output - @render.text - def txt(): - return f"n*2 is {input.n() * 2}" - -app = App(app_ui, server) +@render.text +def txt(): + return f"n*2 is {input.n() * 2}" diff --git a/examples/python/cpuinfo/app.py b/examples/python/cpuinfo/app.py index 2b9e58db..cb338b46 100644 --- a/examples/python/cpuinfo/app.py +++ b/examples/python/cpuinfo/app.py @@ -3,20 +3,16 @@ if "pyodide" in sys.modules: # psutil doesn't work on pyodide--use fake data instead from fakepsutil import cpu_count, cpu_percent - - shinylive_message = "Note: the CPU data is simulated when running in Shinylive." else: from psutil import cpu_count, cpu_percent - shinylive_message = "" - -from math import ceil - import matplotlib -import matplotlib.pyplot as plt import numpy as np import pandas as pd -from shiny import App, reactive, render, ui +from helpers import plot_cpu + +from shiny import reactive, render +from shiny.express import input, ui, output # The agg matplotlib backend seems to be a little more efficient than the default when # running on macOS, and also gives more consistent results across operating systems @@ -27,83 +23,9 @@ # secs between samples SAMPLE_PERIOD = 1 - ncpu = cpu_count(logical=True) -app_ui = ui.page_fluid( - ui.tags.style( - """ - /* Don't apply fade effect, it's constantly recalculating */ - .recalculating { - opacity: 1; - } - tbody > tr:last-child { - /*border: 3px solid var(--bs-dark);*/ - box-shadow: - 0 0 2px 1px #fff, /* inner white */ - 0 0 4px 2px #0ff, /* middle cyan */ - 0 0 5px 3px #00f; /* outer blue */ - } - #table table { - table-layout: fixed; - width: %s; - font-size: 0.8em; - } - th, td { - text-align: center; - } - """ - % f"{ncpu*4}em" - ), - ui.h3("CPU Usage %", class_="mt-2"), - ui.layout_sidebar( - ui.panel_sidebar( - ui.input_select( - "cmap", - "Colormap", - { - "inferno": "inferno", - "viridis": "viridis", - "copper": "copper", - "prism": "prism (not recommended)", - }, - ), - ui.p(ui.input_action_button("reset", "Clear history", class_="btn-sm")), - ui.input_switch("hold", "Freeze output", value=False), - shinylive_message, - class_="mb-3", - ), - ui.panel_main( - ui.div( - {"class": "card mb-3"}, - ui.div( - {"class": "card-body"}, - ui.h5({"class": "card-title mt-0"}, "Graphs"), - ui.output_plot("plot", height=f"{ncpu * 40}px"), - ), - ui.div( - {"class": "card-footer"}, - ui.input_numeric("sample_count", "Number of samples per graph", 50), - ), - ), - ui.div( - {"class": "card"}, - ui.div( - {"class": "card-body"}, - ui.h5({"class": "card-title m-0"}, "Heatmap"), - ), - ui.div( - {"class": "card-body overflow-auto pt-0"}, - ui.output_table("table"), - ), - ui.div( - {"class": "card-footer"}, - ui.input_numeric("table_rows", "Rows to display", 5), - ), - ), - ), - ), -) +ui.page_opts(fillable=True) @reactive.Calc @@ -112,127 +34,95 @@ def cpu_current(): return cpu_percent(percpu=True) -def server(input, output, session): - cpu_history = reactive.Value(None) +cpu_history = reactive.Value(None) + - @reactive.Calc - def cpu_history_with_hold(): - # If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a - # regular read - if not input.hold(): +@reactive.Calc +def cpu_history_with_hold(): + # If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a + # regular read + if not input.hold(): + return cpu_history() + else: + # Even if frozen, we still want to respond to input.reset() + input.reset() + with reactive.isolate(): return cpu_history() + + +@reactive.Effect +def collect_cpu_samples(): + """cpu_percent() reports just the current CPU usage sample; this Effect gathers + them up and stores them in the cpu_history reactive value, in a numpy 2D array + (rows are CPUs, columns are time).""" + + new_data = np.vstack(cpu_current()) + with reactive.isolate(): + if cpu_history() is None: + cpu_history.set(new_data) else: - # Even if frozen, we still want to respond to input.reset() - input.reset() - with reactive.isolate(): - return cpu_history() - - @reactive.Effect - def collect_cpu_samples(): - """cpu_percent() reports just the current CPU usage sample; this Effect gathers - them up and stores them in the cpu_history reactive value, in a numpy 2D array - (rows are CPUs, columns are time).""" - - new_data = np.vstack(cpu_current()) - with reactive.isolate(): - if cpu_history() is None: - cpu_history.set(new_data) - else: - combined_data = np.hstack([cpu_history(), new_data]) - # Throw away extra data so we don't consume unbounded amounts of memory - if combined_data.shape[1] > MAX_SAMPLES: - combined_data = combined_data[:, -MAX_SAMPLES:] - cpu_history.set(combined_data) - - @reactive.Effect(priority=100) - @reactive.event(input.reset) - def reset_history(): - cpu_history.set(None) - - @output - @render.plot - def plot(): - history = cpu_history_with_hold() - - if history is None: - history = np.array([]) - history.shape = (ncpu, 0) - - nsamples = input.sample_count() - - # Throw away samples too old to fit on the plot - if history.shape[1] > nsamples: - history = history[:, -nsamples:] - - ncols = 2 - nrows = int(ceil(ncpu / ncols)) - fig, axeses = plt.subplots( - nrows=nrows, - ncols=ncols, - squeeze=False, - ) - for i in range(0, ncols * nrows): - row = i // ncols - col = i % ncols - axes = axeses[row, col] - if i >= len(history): - axes.set_visible(False) - continue - data = history[i] - axes.yaxis.set_label_position("right") - axes.yaxis.tick_right() - axes.set_xlim(-(nsamples - 1), 0) - axes.set_ylim(0, 100) - - assert len(data) <= nsamples - - # Set up an array of x-values that will right-align the data relative to the - # plotting area - x = np.arange(0, len(data)) - x = np.flip(-x) - - # Color bars by cmap - color = plt.get_cmap(input.cmap())(data / 100) - axes.bar(x, data, color=color, linewidth=0, width=1.0) - - axes.set_yticks([25, 50, 75]) - for ytl in axes.get_yticklabels(): - if col == ncols - 1 or i == ncpu - 1 or True: - ytl.set_fontsize(7) - else: - ytl.set_visible(False) - hide_ticks(axes.yaxis) - for xtl in axes.get_xticklabels(): - xtl.set_visible(False) - hide_ticks(axes.xaxis) - axes.grid(True, linewidth=0.25) - - return fig - - @output - @render.table - def table(): - history = cpu_history_with_hold() - latest = pd.DataFrame(history).transpose().tail(input.table_rows()) - if latest.shape[0] == 0: - return latest - return ( - latest.style.format(precision=0) - .hide(axis="index") - .set_table_attributes( - 'class="dataframe shiny-table table table-borderless font-monospace"' - ) - .background_gradient(cmap=input.cmap(), vmin=0, vmax=100) - ) - - -def hide_ticks(axis): - for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]: - for tick in ticks: - tick.tick1line.set_visible(False) - tick.tick2line.set_visible(False) - tick.label1.set_visible(False) - tick.label2.set_visible(False) - - -app = App(app_ui, server) + combined_data = np.hstack([cpu_history(), new_data]) + # Throw away extra data so we don't consume unbounded amounts of memory + if combined_data.shape[1] > MAX_SAMPLES: + combined_data = combined_data[:, -MAX_SAMPLES:] + cpu_history.set(combined_data) + + +@reactive.Effect(priority=100) +@reactive.event(input.reset) +def reset_history(): + cpu_history.set(None) + + +ui.tags.style( + """ + /* Don't apply fade effect, it's constantly recalculating */ + .recalculating { + opacity: 1; + } + """ +) + +with ui.sidebar(): + ui.input_select( + "cmap", + "Colormap", + { + "inferno": "inferno", + "viridis": "viridis", + "copper": "copper", + "prism": "prism (not recommended)", + }, + ), + ui.input_action_button("reset", "Clear history", class_="btn-sm") + ui.input_switch("hold", "Freeze output", value=False) + +with ui.card(): + with ui.navset_bar(title="CPU %"): + with ui.nav_panel(title="Graphs"): + ui.input_numeric("sample_count", "Number of samples per graph", 50) + + @render.plot + def plot(): + return plot_cpu( + cpu_history_with_hold(), input.sample_count(), ncpu, input.cmap() + ) + + with ui.nav_panel(title="Heatmap"): + ui.input_numeric("table_rows", "Rows to display", 15) + + @output(suspend_when_hidden=False) + @render.table + def table(): + history = cpu_history_with_hold() + latest = pd.DataFrame(history).transpose().tail(input.table_rows()) + if latest.shape[0] == 0: + return latest + return ( + latest.style.format(precision=0) + .hide(axis="index") + .set_table_attributes( + 'class="dataframe shiny-table table table-borderless font-monospace"' + ) + .background_gradient(cmap=input.cmap(), vmin=0, vmax=100) + ) diff --git a/examples/python/cpuinfo/helpers.py b/examples/python/cpuinfo/helpers.py new file mode 100644 index 00000000..00b65fc7 --- /dev/null +++ b/examples/python/cpuinfo/helpers.py @@ -0,0 +1,69 @@ +from math import ceil + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + + +def hide_ticks(axis): + for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]: + for tick in ticks: + tick.tick1line.set_visible(False) + tick.tick2line.set_visible(False) + tick.label1.set_visible(False) + tick.label2.set_visible(False) + + +def plot_cpu(history, nsamples, ncpu, cmap): + if history is None: + history = np.array([]) + history.shape = (ncpu, 0) + + # Throw away samples too old to fit on the plot + if history.shape[1] > nsamples: + history = history[:, -nsamples:] + + ncols = 2 + nrows = int(ceil(ncpu / ncols)) + fig, axeses = plt.subplots( + nrows=nrows, + ncols=ncols, + squeeze=False, + ) + for i in range(0, ncols * nrows): + row = i // ncols + col = i % ncols + axes = axeses[row, col] + if i >= len(history): + axes.set_visible(False) + continue + data = history[i] + axes.yaxis.set_label_position("right") + axes.yaxis.tick_right() + axes.set_xlim(-(nsamples - 1), 0) + axes.set_ylim(0, 100) + + assert len(data) <= nsamples + + # Set up an array of x-values that will right-align the data relative to the + # plotting area + x = np.arange(0, len(data)) + x = np.flip(-x) + + # Color bars by cmap + color = plt.get_cmap(cmap)(data / 100) + axes.bar(x, data, color=color, linewidth=0, width=1.0) + + axes.set_yticks([25, 50, 75]) + for ytl in axes.get_yticklabels(): + if col == ncols - 1 or i == ncpu - 1 or True: + ytl.set_fontsize(7) + else: + ytl.set_visible(False) + hide_ticks(axes.yaxis) + for xtl in axes.get_xticklabels(): + xtl.set_visible(False) + hide_ticks(axes.xaxis) + axes.grid(True, linewidth=0.25) + + return fig diff --git a/examples/python/file_download/app.py b/examples/python/file_download/app.py index 34635e79..c68ec25c 100644 --- a/examples/python/file_download/app.py +++ b/examples/python/file_download/app.py @@ -5,71 +5,51 @@ import matplotlib.pyplot as plt import numpy as np -from shiny import App, ui - - -# A card component wrapper. -def ui_card(title, *args): - return ( - ui.div( - {"class": "card mb-4"}, - ui.div(title, class_="card-header"), - ui.div({"class": "card-body"}, *args), - ), - ) - - -app_ui = ui.page_fluid( - ui_card( - "Download a pre-existing file, using its existing name on disk.", - ui.download_button("download1", "Download CSV"), - ), - ui_card( - "Download a PNG that is generated dynamically.", - ui.input_text("title", "Plot title", "Random scatter plot"), - ui.input_slider("num_points", "Number of data points", 1, 100, 50), - ui.download_button("download2", "Download PNG"), - ), - ui_card( - "Download a file with name that is generated dynamically.", - ui.download_button("download3", "Download CSV"), - ), -) - - -def server(input, output, session): - @session.download() - def download1(): - # This is the simplest case. The implementation simply returns the path to a - # file on disk. - path = Path(__file__).parent / "mtcars.csv" - return str(path) - - @session.download(filename="image.png") - def download2(): - # Another way to implement a file download is by yielding bytes; either all at - # once, like in this case, or by yielding multiple times. When using this - # approach, you should pass a filename argument to @session.download, which - # determines what the browser will name the downloaded file. - x = np.random.uniform(size=input.num_points()) - y = np.random.uniform(size=input.num_points()) - plt.figure() - plt.scatter(x, y) - plt.title(input.title()) - with io.BytesIO() as buf: - plt.savefig(buf, format="png") - yield buf.getvalue() - - @session.download( - filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv" - ) - async def download3(): - # This version uses a function to generate the filename. It also yields data - # multiple times. - await asyncio.sleep(0.25) - yield "one,two,three\n" - yield "新,1,2\n" - yield "型,4,5\n" - - -app = App(app_ui, server) +from shiny.express import ui, input +from shiny import render + +with ui.layout_columns(): + with ui.card(): + ui.card_header("Download a pre-existing file, using its existing name on disk.") + + @render.download(label="Download CSV", filename="mtcars.csv") + def download1(): + # This is the simplest case. The implementation simply returns the path to a + # file on disk. + path = Path(__file__).parent / "mtcars.csv" + return str(path) + + with ui.card(): + ui.card_header("Download a PNG that is generated dynamically.") + ui.input_text("title", "Plot title", "Random scatter plot") + ui.input_slider("num_points", "Number of data points", 1, 100, 50) + + @render.download(label="Download PNG", filename="image.png") + def download2(): + # Another way to implement a file download is by yielding bytes; either all at + # once, like in this case, or by yielding multiple times. When using this + # approach, you should pass a filename argument to @session.download, which + # determines what the browser will name the downloaded file. + x = np.random.uniform(size=input.num_points()) + y = np.random.uniform(size=input.num_points()) + plt.figure() + plt.scatter(x, y) + plt.title(input.title()) + with io.BytesIO() as buf: + plt.savefig(buf, format="png") + yield buf.getvalue() + + with ui.card(): + ui.card_header("Download a file with name that is generated dynamically.") + + @render.download( + label="Dynamic file name", + filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv", + ) + async def download3(): + # This version uses a function to generate the filename. It also yields data + # multiple times. + await asyncio.sleep(0.25) + yield "one,two,three\n" + yield "新,1,2\n" + yield "型,4,5\n" diff --git a/examples/python/file_upload/app.py b/examples/python/file_upload/app.py index 8b279894..14fade80 100644 --- a/examples/python/file_upload/app.py +++ b/examples/python/file_upload/app.py @@ -2,57 +2,13 @@ from math import ceil from typing import List -from shiny import App, render, ui +from shiny import render +from shiny.express import ui, input -app_ui = ui.page_fluid( - ui.input_file("file1", "Choose a file to upload:", multiple=True), - ui.input_radio_buttons("type", "Type:", ["Binary", "Text"]), - ui.output_text_verbatim("file_content"), -) - -def server(input, output, session): - MAX_SIZE = 50000 - - @output - @render.text - def file_content(): - file_infos = input.file1() - if not file_infos: - return - - # file_infos is a list of dicts; each dict represents one file. Example: - # [ - # { - # 'name': 'data.csv', - # 'size': 2601, - # 'type': 'text/csv', - # 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv' - # } - # ] - out_str = "" - for file_info in file_infos: - out_str += ( - "=" * 47 - + "\n" - + file_info["name"] - + "\nMIME type: " - + str(mimetypes.guess_type(file_info["name"])[0]) - ) - if file_info["size"] > MAX_SIZE: - out_str += f"\nTruncating at {MAX_SIZE} bytes." - - out_str += "\n" + "=" * 47 + "\n" - - if input.type() == "Text": - with open(file_info["datapath"], "r") as f: - out_str += f.read(MAX_SIZE) - else: - with open(file_info["datapath"], "rb") as f: - data = f.read(MAX_SIZE) - out_str += format_hexdump(data) - - return out_str +MAX_SIZE = 50000 +ui.input_file("file1", "Choose a file to upload:", multiple=True), +ui.input_radio_buttons("type", "Type:", ["Text", "Binary"]), def format_hexdump(data: bytes) -> str: @@ -73,4 +29,41 @@ def group_into_blocks(x: List[str], blocksize: int): ] -app = App(app_ui, server) +@render.text +def file_content(): + file_infos = input.file1() + if not file_infos: + return + + # file_infos is a list of dicts; each dict represents one file. Example: + # [ + # { + # 'name': 'data.csv', + # 'size': 2601, + # 'type': 'text/csv', + # 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv' + # } + # ] + out_str = "" + for file_info in file_infos: + out_str += ( + "=" * 47 + + "\n" + + file_info["name"] + + "\nMIME type: " + + str(mimetypes.guess_type(file_info["name"])[0]) + ) + if file_info["size"] > MAX_SIZE: + out_str += f"\nTruncating at {MAX_SIZE} bytes." + + out_str += "\n" + "=" * 47 + "\n" + + if input.type() == "Text": + with open(file_info["datapath"], "r") as f: + out_str += f.read(MAX_SIZE) + else: + with open(file_info["datapath"], "rb") as f: + data = f.read(MAX_SIZE) + out_str += format_hexdump(data) + + return out_str diff --git a/examples/python/multiple_source_files/app.py b/examples/python/multiple_source_files/app.py index 32884c04..4f056560 100644 --- a/examples/python/multiple_source_files/app.py +++ b/examples/python/multiple_source_files/app.py @@ -1,18 +1,11 @@ from shiny import App, render, ui +from shiny.express import ui, input from utils import square -app_ui = ui.page_fluid( - ui.input_slider("n", "N", 0, 100, 20), - ui.output_text_verbatim("txt"), -) +ui.input_slider("n", "N", 0, 100, 20), -def server(input, output, session): - @output - @render.text - def txt(): - val = square(input.n()) - return f"{input.n()} squared is {val}" - - -app = App(app_ui, server, debug=True) +@render.text +def txt(): + val = square(input.n()) + return f"{input.n()} squared is {val}" diff --git a/examples/python/read_local_csv_file/app.py b/examples/python/read_local_csv_file/app.py index e02ebb05..5b5834b2 100644 --- a/examples/python/read_local_csv_file/app.py +++ b/examples/python/read_local_csv_file/app.py @@ -1,22 +1,13 @@ from pathlib import Path import pandas -from shiny import App, render, ui +from shiny import render -app_ui = ui.page_fluid( - ui.output_table("table"), -) +# We need to import something from express to activate express mode +import shiny.express -def server(input, output, session): - @output - @render.table - def table(): - infile = Path(__file__).parent / "mtcars.csv" - df = pandas.read_csv(infile) - # Use the DataFrame's to_html() function to convert it to an HTML table, and - # then wrap with ui.HTML() so Shiny knows to treat it as raw HTML. - return df - - -app = App(app_ui, server) +@render.table +def data_frame(): + infile = Path(__file__).parent / "mtcars.csv" + return pandas.read_csv(infile) diff --git a/examples/python/regularization/app.py b/examples/python/regularization/app.py index 2576cad8..da824cf3 100644 --- a/examples/python/regularization/app.py +++ b/examples/python/regularization/app.py @@ -8,202 +8,49 @@ # Import custom Python Functions from local file from compare import compare, sim_data -from shiny import App, reactive, render, ui +from shiny import reactive, render +from shiny.express import ui, input # data nsims = 100 sim = [sim_data(n=1000) for i in range(0, nsims)] - # app -app_ui = ui.page_fixed( - # add head that allows LaTeX to be displayed via MathJax - ui.head_content( - ui.tags.script( - src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" - ), - ui.tags.script( - "if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);" - ), - ), - ui.column( - 10, - {"class": "col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"}, - # Title - ui.h1("How Does Regularization Strength Affect Coefficient Estimates?"), - # input slider - ), - ui.column( - 10, - {"class": "col-md-78 col-lg-5 py-4 mx-auto"}, - # Title - ui.input_slider( - "a", - "Select a Regularization Strength:", - 0.000000001, - 1, - 0.1, - step=0.01, - width="100%", - ), - ui.p( - {"class": "pt-4 small"}, - "(Each time you change the slider input, the simulation will take some time to run.)", - ), - ), - ui.column( - 12, - {"class": "col-lg-11 py-5 mx-auto"}, - # output plot - ui.output_plot("plot"), - ), - # Explanation and Explore text row with two equal-width columns - ui.row( - ui.column( - 10, - {"class": "col-lg-6 py-5 mx-auto"}, - ui.h4("Explanation"), - ui.p( - """ - When we train Machine Learning models like linear regressions, logistic - regressions, or neural networks, we do so by defining a loss function - and minimizing that loss function. A loss function is a metric for - measuring how your model is performing where lower is better. For - example, Mean Squared Error is a loss function that measures the squared - distance (on average) between a model's guesses and the true values.""" - ), - # LaTeX - ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$"), - ui.p( - """ - Regularization works by adding a penalty to the loss function in order - to penalize large model parameters. In Linear Regression, the penalty - increases when the size of the coefficients increases. Because the loss - function is made up of two things: the original loss function (the MSE, - here) and the penalty, predictors must 'pull their weight' by reducing - the MSE enough to be 'worth' the penalty. This causes small, unimportant - predictors to have small or zero coefficients.""" - ), - ui.p( - """ - LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO - adds a penalty to the loss function by taking the absolute value of each - parameter/coefficient, and adding them all together. Ridge adds a - penalty to the loss function by taking the square of each - parameter/coefficient, and adding them all together.""" - ), - # LaTeX - ui.p( - "$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$" - ), - ui.p( - "$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$" - ), - ui.p( - """ - When using regularization, we must choose the regularization strength - (see slider above) which is a number that scales how harshly we - penalize. If we multiply the penalty by 0, that's the same as not having - a penalty at all. But if we multiply the penalty by 500, that would - penalize the parameters a lot more.""" - ), - ui.p("$$\\lambda \\text{ is the regularization strength.}$$"), - ), - ), - ui.row( - ui.column( - 10, - {"class": "col-lg-6 py-5 mx-auto"}, - ui.h4("Explore"), - ui.h5("Comparing LASSO, Ridge, and Linear Regression"), - ui.p( - """ - With the slider at 0.1 (the default) look at the boxplot at the top of - the page. This shows the coefficients from 1000 simulated data sets. For - each data set the 'vowels' (A, E, I, O, U, Y, W) do have some - relationship with the outcome (X) that our model is predicting. A has - the largest effect then E, I, O, U, Y and finally W has the smallest - effect on X. The Consonants (B,C,D,G,H,J,K) have absolutely no effect on - X.""" - ), - ui.p("Look at the Graph and ask yourself these questions:"), - ui.tags.ul( - ui.tags.li( - """ - Which model (Linear, LASSO, Ridge) tends to have the highest - coefficients? What does this tell you about the various - penalties each model has?""" - ), - ui.tags.li( - """ - What happens to the LASSO coefficients for the Consonant - predictors (B-K) which have no real effect on X?""" - ), - ui.tags.li( - """ - The Linear and Ridge Coefficients look similar for the - Consonants (B-K) but what's slightly different between them? - What does that tell you about what Ridge penalties do?""" - ), - ui.tags.li( - """ - Are the larger effects (A-I) affected differently than the - smaller effects (O-W) when you increase the Regularization - Strength?""" - ), - ), - ui.h5("Comparing Different Regularization Strengths"), - ui.p( - """ - Now, using the slider at the top of the page, change the Regularization - Strength. Try values that are very low, moderate, and very high.""" - ), - ui.p("Look at the Graph and ask yourself these questions:"), - ui.tags.ul( - ui.tags.li( - """ - What happens to the LASSO and Ridge models when the Regularization - Strength is almost 0?""" - ), - ui.tags.li( - """ - What happens to the LASSO model's coefficients when the - Regularization Strength is very high?""" - ), - ui.tags.li( - """ - Do the Linear Regression coefficients change when you change - Regularization Strength? (if so, why, if not, why not?)""" - ), - ), - ), - ), - # output plots separated by real effects (vowels), and zero-effects (consonants) - ui.column( - 12, - {"class": "col-lg-11 py-5 mx-auto text-center"}, - ui.h2("Plots Separated by Vowels and Consonants"), - ), - ui.column( - 12, - {"class": "col-lg-11 mb-5 pb-5 mx-auto"}, - ui.output_plot("plotVOWELS"), - ui.output_plot("plotCONSONANTS"), - ), -) - - -def server(input, output, session): - # reactive Calc that runs LASSO, Ridge, and Linear models on generated data - @reactive.Calc - def models(): - sim_alpha = [compare(df, alpha=input.a()) for df in sim] - sim_alpha = pd.concat(sim_alpha) - - return sim_alpha - - # output plot of all simulation coefficients - @output + +ui.tags.script( + src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" +), +ui.tags.script("if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);") + + +@reactive.Calc +def models(): + sim_alpha = [compare(df, alpha=input.a()) for df in sim] + sim_alpha = pd.concat(sim_alpha) + + return sim_alpha + + +with ui.div(class_="col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"): + ui.h3("How Does Regularization Strength Affect Coefficient Estimates?"), + +with ui.div(class_="col-md-78 col-lg-5 py-4 mx-auto"): + ui.input_slider( + "a", + "Select a Regularization Strength:", + min=0.000000001, + max=1, + value=0.1, + step=0.01, + width="100%", + ) + ui.p( + {"class": "pt-4 small"}, + "(Each time you change the slider input, the simulation will take some time to run.)", + ) + +with ui.div(class_="col-lg-11 py-5 mx-auto"): + @render.plot() def plot(): # get data from reactive Calc @@ -238,8 +85,95 @@ def plot(): ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig - # output plot of all simulation coefficients (vowels only) - @output + +with ui.div(class_="col-lg-6 py-5 mx-auto"): + ui.markdown( + """ + ### Explanation + + When we train Machine Learning models like linear regressions, logistic + regressions, or neural networks, we do so by defining a loss function + and minimizing that loss function. A loss function is a metric for + measuring how your model is performing where lower is better. For + example, Mean Squared Error is a loss function that measures the squared + distance (on average) between a model's guesses and the true values. + """ + ) + # LaTeX + ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$") + ui.p( + """ + Regularization works by adding a penalty to the loss function in order + to penalize large model parameters. In Linear Regression, the penalty + increases when the size of the coefficients increases. Because the loss + function is made up of two things: the original loss function (the MSE, + here) and the penalty, predictors must 'pull their weight' by reducing + the MSE enough to be 'worth' the penalty. This causes small, unimportant + predictors to have small or zero coefficients. + + LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO + adds a penalty to the loss function by taking the absolute value of each + parameter/coefficient, and adding them all together. Ridge adds a + penalty to the loss function by taking the square of each + parameter/coefficient, and adding them all together.""" + ) + # LaTeX + ui.p( + "$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$" + ) + ui.p( + "$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$" + ) + ui.p( + """ + When using regularization, we must choose the regularization strength + (see slider above) which is a number that scales how harshly we + penalize. If we multiply the penalty by 0, that's the same as not having + a penalty at all. But if we multiply the penalty by 500, that would + penalize the parameters a lot more.""" + ) + ui.p("$$\\lambda \\text{ is the regularization strength.}$$") + + +with ui.div(class_="col-lg-6 py-5 mx-auto"): + ui.markdown( + """ + ### Explore + + #### Comparing LASSO, Ridge, and Linear Regression + With the slider at 0.1 (the default) look at the boxplot at the top of the page. This shows the + coefficients from 1000 simulated data sets. For each data set the 'vowels' (A, E, I, O, U, Y, W) + do have some relationship with the outcome (X) that our model is predicting. A has the largest + effect then E, I, O, U, Y and finally W has the smallest effect on X. The Consonants (B,C,D,G,H,J,K) + have absolutely no effect on X. + + Look at the Graph and ask yourself these questions: + - Which model (Linear, LASSO, Ridge) tends to have the highest coefficients? What does this tell + you about the various penalties each model has? + - What happens to the LASSO coefficients for the Consonant predictors (B-K) which have no real + effect on X? + - The Linear and Ridge Coefficients look similar for the Consonants (B-K) but what's slightly + different between them? What does that tell you about what Ridge penalties do? + - Are the larger effects (A-I) affected differently than the smaller effects (O-W) when you increase + the Regularization Strength? + + #### Comparing Different Regularization Strengths + Now, using the slider at the top of the page, change the Regularization Strength. Try values that + are very low, moderate, and very high. + + Look at the Graph and ask yourself these questions: + - What happens to the LASSO and Ridge models when the Regularization Strength is almost 0? + - What happens to the LASSO model's coefficients when the Regularization Strength is very high? + - Do the Linear Regression coefficients change when you change Regularization Strength? (if so, why, + if not, why not?) + """ + ) + +with ui.div(class_="col-lg-11 py-5 mx-auto text-center"): + ui.h2("Plots Separated by Vowels and Consonants") + +with ui.div(class_="col-lg-11 mb-5 pb-5 mx-auto"): + @render.plot() def plotVOWELS(): # get data from reactive Calc @@ -261,8 +195,6 @@ def plotVOWELS(): ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig - # output plot of all simulation coefficients (consonants only) - @output @render.plot() def plotCONSONANTS(): # get data from reactive Calc @@ -286,6 +218,3 @@ def plotCONSONANTS(): tt = "CONSONANT Coefficient Estimates when alpha = " + str(input.a()) ax2.set(xlabel="", ylabel="Coefficient Value", title=tt) return fig - - -app = App(app_ui, server)