From 6cfd72731a441f44ed00b488a4cbb7de9c0e8441 Mon Sep 17 00:00:00 2001 From: Nics-Github Date: Wed, 8 Nov 2023 09:44:11 -0500 Subject: [PATCH] Data masking --- .../day_25solutions_function_writing.qmd | 29 ++++++---- .../day_25solutions_function_writing.html | 58 ++++++++++--------- docs/index.html | 10 ++-- docs/search.json | 17 ++++-- index.qmd | 23 ++++---- 5 files changed, 76 insertions(+), 61 deletions(-) diff --git a/course-materials/in-class-activies/day_25solutions_function_writing.qmd b/course-materials/in-class-activies/day_25solutions_function_writing.qmd index 3218337..8b927e4 100644 --- a/course-materials/in-class-activies/day_25solutions_function_writing.qmd +++ b/course-materials/in-class-activies/day_25solutions_function_writing.qmd @@ -24,7 +24,7 @@ The `congress_age` dataset contains the age of members of the United States cong library(tidyverse) library(babynames) -view(babynames) +#view(babynames) ``` ## Exercise 1 @@ -33,10 +33,10 @@ Write a function called `count_name` that, when given a name (e.g.,`Angelica`, ` ```{r} # Write your code below -count_name <- function(nombre = "Nicholas"){ +count_name <- function(data = babynames , nombre = "Nicholas"){ if(is.element(nombre, babynames$name)) { - babynames |> - filter(name == nombre ) |> + data |> + filter(name == nombre) |> group_by(name,year) |> reframe(year= year, n = sum(n)) @@ -49,13 +49,14 @@ count_name <- function(nombre = "Nicholas"){ ``` + ## Bonus! The `count_name` function should return one row per year that matches (and generate an error message if there are no matches). Run the function once with the argument `Ezekiel` and once with `Ezze`. ```{r} # Paste the code from Exercise 1 below and adjust it -count_name("Ezekiel") +count_name(data=babynames, nombre = "Ezekiel") #count_name("Ezze") ``` @@ -67,10 +68,14 @@ Challenge: Add an `else if` after your `if` and before your `else` to catch an e ```{r} # Write your code below -grab_name <- function(nombre ="Nicholas",ano = 1984){ + +library(tidyverse) +library(babynames) + +grab_name <- function(data = babynames, nombre ,ano ){ if(is.element(nombre,babynames$name)) { - babynames |> + data |> filter(name == nombre & year == ano) |> pull(name,year) } @@ -81,7 +86,7 @@ grab_name <- function(nombre ="Nicholas",ano = 1984){ stop("Name and year not found") } } -grab_name() +grab_name( nombre = "Samantha", ano = 1950) ``` @@ -93,13 +98,13 @@ Write a function called `count_name_graph()` that will use the function `countna ```{r} # Write your code below -count_name_graph <- function(nombre = "Nicholas"){ - count_name( nombre )|> +count_name_graph <- function(data, nombre = "Nicholas"){ + count_name(data, nombre )|> ggplot(aes(x=year, y=n))+ geom_line()+ labs(title = paste("Babies named", nombre)) } -count_name_graph("Nicholas") +count_name_graph(data=babynames, nombre = "Nicholas") ``` # Generalizing Functions for Data Frames @@ -131,5 +136,5 @@ summary_prop <- function(data, condition){ summary_prop(data=congress_age, condition = age >= 25 & age <= 55) -summary_prop(data=count_name(), condition = n>100) +summary_prop(data=count_name(data = babynames, nombre = "Ezekiel"), condition = n>100) ``` diff --git a/docs/course-materials/in-class-activies/day_25solutions_function_writing.html b/docs/course-materials/in-class-activies/day_25solutions_function_writing.html index eb777d4..c78362d 100644 --- a/docs/course-materials/in-class-activies/day_25solutions_function_writing.html +++ b/docs/course-materials/in-class-activies/day_25solutions_function_writing.html @@ -199,17 +199,17 @@

User-defined Functions

library(tidyverse)
 library(babynames)
 
-view(babynames)
+#view(babynames)

Exercise 1

Write a function called count_name that, when given a name (e.g.,Angelica, Ezekiel, or Riley) as an argument, returns the total number of births by year from the babynames data frame in the babynames package that match that name.

# Write your code below
-count_name <- function(nombre = "Nicholas"){
+count_name <- function(data = babynames , nombre = "Nicholas"){
   if(is.element(nombre, babynames$name)) {
-    babynames |>
-    filter(name ==  nombre ) |>
+    data |>
+    filter(name == nombre) |>
     group_by(name,year) |>
     reframe(year= year, n = sum(n)) 
       
@@ -225,7 +225,7 @@ 

Bonus!

The count_name function should return one row per year that matches (and generate an error message if there are no matches). Run the function once with the argument Ezekiel and once with Ezze.

# Paste the code from Exercise 1 below and adjust it
-count_name("Ezekiel")
+count_name(data=babynames, nombre = "Ezekiel")
# A tibble: 141 × 3
    name     year     n
@@ -251,24 +251,28 @@ 

Exercise 2

Challenge: Add an else if after your if and before your else to catch an error if people choose a year less than 1880 (outside of the range of our data).

# Write your code below
-grab_name <- function(nombre ="Nicholas",ano = 1984){
-
-  if(is.element(nombre,babynames$name))  {
-    babynames |>
-    filter(name == nombre & year == ano) |>
-    pull(name,year)
-  }
-  else if(ano < 1880){
-    stop("Choose a year after 1880")
-  }
-  else{
-    stop("Name and year not found")
-  }
-}
-grab_name()
+ +library(tidyverse) +library(babynames) + +grab_name <- function(data = babynames, nombre ,ano ){ + + if(is.element(nombre,babynames$name)) { + data |> + filter(name == nombre & year == ano) |> + pull(name,year) + } + else if(ano < 1880){ + stop("Choose a year after 1880") + } + else{ + stop("Name and year not found") + } +} +grab_name( nombre = "Samantha", ano = 1950)
-
      1984       1984 
-"Nicholas" "Nicholas" 
+
      1950 
+"Samantha" 
@@ -280,13 +284,13 @@

Exercise 3

Write a function called count_name_graph() that will use the function countname() to make a line graph that plots the year and the number of babies in a given year. The graph’s title should be “the”Babies Named (name of baby)” . The paste() function in r will help with your title, use it to put two strings together. Label your x and y axes.

# Write your code below
-count_name_graph <- function(nombre = "Nicholas"){
-  count_name( nombre )|>
+count_name_graph <- function(data, nombre = "Nicholas"){
+  count_name(data, nombre )|>
     ggplot(aes(x=year, y=n))+
     geom_line()+
     labs(title = paste("Babies named", nombre))
 }
-count_name_graph("Nicholas")
+count_name_graph(data=babynames, nombre = "Nicholas")

@@ -323,12 +327,12 @@

Exercise 4

<int> <dbl> 1 10657 0.572 -
summary_prop(data=count_name(), condition = n>100)
+
summary_prop(data=count_name(data = babynames, nombre = "Ezekiel"), condition = n>100)
# A tibble: 1 × 2
   number_with_condition prop_w_condition
                   <int>            <dbl>
-1                   201            0.901
+1 44 0.312
diff --git a/docs/index.html b/docs/index.html index 464d797..15a7cd3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -253,7 +253,7 @@

Schedule

Day 26 Iteration

-

Do we need more class time

+

Do we need more class time?

-

Solution to Monday’s in class activity

+

Solution to Monday’s in class activity

Lecture: across() and map()

In class activity

More on masking

diff --git a/docs/search.json b/docs/search.json index 4851b1f..7ee3fac 100644 --- a/docs/search.json +++ b/docs/search.json @@ -480,35 +480,35 @@ "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-1", "title": "Lec 20: In-class Exercise: Write Functions", "section": "Exercise 1", - "text": "Exercise 1\nWrite a function called count_name that, when given a name (e.g.,Angelica, Ezekiel, or Riley) as an argument, returns the total number of births by year from the babynames data frame in the babynames package that match that name.\n\n# Write your code below\ncount_name <- function(nombre = \"Nicholas\"){\n if(is.element(nombre, babynames$name)) {\n babynames |>\n filter(name == nombre ) |>\n group_by(name,year) |>\n reframe(year= year, n = sum(n)) \n \n }\n else{\n stop(\"Name not found\")\n }\n}" + "text": "Exercise 1\nWrite a function called count_name that, when given a name (e.g.,Angelica, Ezekiel, or Riley) as an argument, returns the total number of births by year from the babynames data frame in the babynames package that match that name.\n\n# Write your code below\ncount_name <- function(data = babynames , nombre = \"Nicholas\"){\n if(is.element(nombre, babynames$name)) {\n data |>\n filter(name == nombre) |>\n group_by(name,year) |>\n reframe(year= year, n = sum(n)) \n \n }\n else{\n stop(\"Name not found\")\n }\n}" }, { "objectID": "course-materials/in-class-activies/day_25solutions_function_writing.html#bonus", "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#bonus", "title": "Lec 20: In-class Exercise: Write Functions", "section": "Bonus!", - "text": "Bonus!\nThe count_name function should return one row per year that matches (and generate an error message if there are no matches). Run the function once with the argument Ezekiel and once with Ezze.\n\n# Paste the code from Exercise 1 below and adjust it\ncount_name(\"Ezekiel\")\n\n# A tibble: 141 × 3\n name year n\n <chr> <dbl> <int>\n 1 Ezekiel 1880 16\n 2 Ezekiel 1881 22\n 3 Ezekiel 1882 11\n 4 Ezekiel 1883 14\n 5 Ezekiel 1884 13\n 6 Ezekiel 1885 10\n 7 Ezekiel 1886 17\n 8 Ezekiel 1887 11\n 9 Ezekiel 1888 16\n10 Ezekiel 1889 14\n# ℹ 131 more rows\n\n#count_name(\"Ezze\")" + "text": "Bonus!\nThe count_name function should return one row per year that matches (and generate an error message if there are no matches). Run the function once with the argument Ezekiel and once with Ezze.\n\n# Paste the code from Exercise 1 below and adjust it\ncount_name(data=babynames, nombre = \"Ezekiel\")\n\n# A tibble: 141 × 3\n name year n\n <chr> <dbl> <int>\n 1 Ezekiel 1880 16\n 2 Ezekiel 1881 22\n 3 Ezekiel 1882 11\n 4 Ezekiel 1883 14\n 5 Ezekiel 1884 13\n 6 Ezekiel 1885 10\n 7 Ezekiel 1886 17\n 8 Ezekiel 1887 11\n 9 Ezekiel 1888 16\n10 Ezekiel 1889 14\n# ℹ 131 more rows\n\n#count_name(\"Ezze\")" }, { "objectID": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-2", "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-2", "title": "Lec 20: In-class Exercise: Write Functions", "section": "Exercise 2", - "text": "Exercise 2\nWrite a function called grab_name that, when given a name and a year as an argument, returns the rows from the babynames data frame in the babynames package that match that name for that year (and returns an error if that name and year combination does not match any rows). Run the function once with the arguments Ezekiel and 1883 and once with Ezze and 1883.\nChallenge: Add an else if after your if and before your else to catch an error if people choose a year less than 1880 (outside of the range of our data).\n\n# Write your code below\ngrab_name <- function(nombre =\"Nicholas\",ano = 1984){\n\n if(is.element(nombre,babynames$name)) {\n babynames |>\n filter(name == nombre & year == ano) |>\n pull(name,year)\n }\n else if(ano < 1880){\n stop(\"Choose a year after 1880\")\n }\n else{\n stop(\"Name and year not found\")\n }\n}\ngrab_name()\n\n 1984 1984 \n\"Nicholas\" \"Nicholas\"" + "text": "Exercise 2\nWrite a function called grab_name that, when given a name and a year as an argument, returns the rows from the babynames data frame in the babynames package that match that name for that year (and returns an error if that name and year combination does not match any rows). Run the function once with the arguments Ezekiel and 1883 and once with Ezze and 1883.\nChallenge: Add an else if after your if and before your else to catch an error if people choose a year less than 1880 (outside of the range of our data).\n\n# Write your code below\n\nlibrary(tidyverse)\nlibrary(babynames)\n\ngrab_name <- function(data = babynames, nombre ,ano ){\n\n if(is.element(nombre,babynames$name)) {\n data |>\n filter(name == nombre & year == ano) |>\n pull(name,year)\n }\n else if(ano < 1880){\n stop(\"Choose a year after 1880\")\n }\n else{\n stop(\"Name and year not found\")\n }\n}\ngrab_name( nombre = \"Samantha\", ano = 1950)\n\n 1950 \n\"Samantha\"" }, { "objectID": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-3", "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-3", "title": "Lec 20: In-class Exercise: Write Functions", "section": "Exercise 3", - "text": "Exercise 3\nWrite a function called count_name_graph() that will use the function countname() to make a line graph that plots the year and the number of babies in a given year. The graph’s title should be “the”Babies Named (name of baby)” . The paste() function in r will help with your title, use it to put two strings together. Label your x and y axes.\n\n# Write your code below\ncount_name_graph <- function(nombre = \"Nicholas\"){\n count_name( nombre )|>\n ggplot(aes(x=year, y=n))+\n geom_line()+\n labs(title = paste(\"Babies named\", nombre))\n}\ncount_name_graph(\"Nicholas\")" + "text": "Exercise 3\nWrite a function called count_name_graph() that will use the function countname() to make a line graph that plots the year and the number of babies in a given year. The graph’s title should be “the”Babies Named (name of baby)” . The paste() function in r will help with your title, use it to put two strings together. Label your x and y axes.\n\n# Write your code below\ncount_name_graph <- function(data, nombre = \"Nicholas\"){\n count_name(data, nombre )|>\n ggplot(aes(x=year, y=n))+\n geom_line()+\n labs(title = paste(\"Babies named\", nombre))\n}\ncount_name_graph(data=babynames, nombre = \"Nicholas\")" }, { "objectID": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-4", "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-4", "title": "Lec 20: In-class Exercise: Write Functions", "section": "Exercise 4", - "text": "Exercise 4\nWrite a function called summary_prop that will take a data frame as an argument, filter to a condition, and compute the proportion of that condition.\nRun the function with the arguments congress_age and age >= 25 & age <= 55 .\nYou can use your function on count_name(\"Ezekiel\"), n>100. (Note: You probably used summarize() for the code for exercise 1, that sometimes returns grouped data. This code will work better if you add a pipe to an ungroup(). Alternatively you can just use reframe() instead of summarize()).\n\n# Write your code below\nsummary_prop <- function(data, condition){\n data |> \n filter( {{condition}} )|>\n summarise(\n number_with_condition = n(),\n prop_w_condition = number_with_condition/nrow(data)\n )\n}\n \nsummary_prop(data=congress_age, condition = age >= 25 & age <= 55)\n\n# A tibble: 1 × 2\n number_with_condition prop_w_condition\n <int> <dbl>\n1 10657 0.572\n\nsummary_prop(data=count_name(), condition = n>100)\n\n# A tibble: 1 × 2\n number_with_condition prop_w_condition\n <int> <dbl>\n1 201 0.901" + "text": "Exercise 4\nWrite a function called summary_prop that will take a data frame as an argument, filter to a condition, and compute the proportion of that condition.\nRun the function with the arguments congress_age and age >= 25 & age <= 55 .\nYou can use your function on count_name(\"Ezekiel\"), n>100. (Note: You probably used summarize() for the code for exercise 1, that sometimes returns grouped data. This code will work better if you add a pipe to an ungroup(). Alternatively you can just use reframe() instead of summarize()).\n\n# Write your code below\nsummary_prop <- function(data, condition){\n data |> \n filter( {{condition}} )|>\n summarise(\n number_with_condition = n(),\n prop_w_condition = number_with_condition/nrow(data)\n )\n}\n \nsummary_prop(data=congress_age, condition = age >= 25 & age <= 55)\n\n# A tibble: 1 × 2\n number_with_condition prop_w_condition\n <int> <dbl>\n1 10657 0.572\n\nsummary_prop(data=count_name(data = babynames, nombre = \"Ezekiel\"), condition = n>100)\n\n# A tibble: 1 × 2\n number_with_condition prop_w_condition\n <int> <dbl>\n1 44 0.312" }, { "objectID": "course-materials/lectures/Day_26_across_maps.html#across-and-map", @@ -586,5 +586,12 @@ "title": "SDS 192 Fall '23", "section": "", "text": "Source code to generate the course webpage for Smith College: Introduction to Data Science https://nics-github.github.io/SDS192/. Most of the content is in either:\n\n_quarto.yml: Set theme of webpage along with links in navigational bar.\nindex.qmd: A single Moodle-style page that lists all announcements and lectures notes in reverse-chronological order.\nPS.qmd: All problem sets/homeworks.\nprojects.qmd: Details on the mini-projects and term project.\nsyllabus.qmd: Course info/description, topics, materials, evaluation, and expectations.\n\n\n\nThis webpage is built/compiled using R Markdown Websites. To compile this webpage for yourself, do the following:\n\nGet the contents of this directory/repository:\n\nIf you are not familiar with GitHub, click the green “Clone or download” button on the top-right -> Download ZIP -> Unzip SDS192-master.zip.\nIf you are familiar with GitHub, clone this repository.\n\nDouble-click the SDS192.Rproj to open RStudio.\nIf you haven’t already, install the following R packages:\n\nrmarkdown and devtools\nAt the top of index.Rmd: all CRAN R packages listed .\nAt the top of index.Rmd: the emo and patchwork packages must be installed from GitHub using the devtools::install_github() function.\n\nGo to the “Build” pane of RStudio -> More -> Configure Build Tools… -> Ensure that “Project build tools” is set to “Webpage”.\nClick “Build Website”.\nThe website will display in the Viewer pane. The resulting index.html file and all other files for the webpage will be saved in the docs/ folder.\n\n\n\n\nTo publish/deploy this webpage and make it viewable on the web, you need to either:\n\nCopy the contents of the docs/ folder to your personal webpage or whatever domain hosting service you use.\nUse GitHub pages as I do. RStudio’s R Markdown Websites page gives instructions on how.\n\n\n\n\nThe format for this site was borrowed from Albert Kim’s SDS 192 course\nIf not created by me or referred to in some other way the activities and projects for this course are from:\n\nThe above mentioned website\nData Science in a box.\nBen Baumer’s SDS 192 course page" + }, + { + "objectID": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-1-masked-data", + "href": "course-materials/in-class-activies/day_25solutions_function_writing.html#exercise-1-masked-data", + "title": "Lec 20: In-class Exercise: Write Functions", + "section": "Exercise 1 masked data", + "text": "Exercise 1 masked data\nWrite a function called count_name that, when given a name (e.g.,Angelica, Ezekiel, or Riley) as an argument, returns the total number of births by year from the babynames data frame in the babynames package that match that name.\n\n# Write your code below\ncount_name <- function(data , nombre = \"Nicholas\"){\n if(is.element(nombre, babynames$name)) {\n data |>\n ## Why do I not need to mask {{nombre}}?\n filter(name == nombre) |>\n group_by(name,year) |>\n reframe(year= year, n = sum(n)) \n \n }\n else{\n stop(\"Name not found\")\n }\n}" } ] \ No newline at end of file diff --git a/index.qmd b/index.qmd index 000aae6..6be839c 100644 --- a/index.qmd +++ b/index.qmd @@ -64,25 +64,25 @@ h1{font-weight: 400;} # Day 26 Iteration -[Do we need more class time](https://forms.gle/239mzry4vtcMCfsv9) +[Do we need more class time](https://forms.gle/239mzry4vtcMCfsv9)? ::: {.callout-tip collapse="true"} # Reminder Project 2 due Friday -Project 2 notes: +Project 2 notes: -- Project 2 due Friday +- Project 2 due Friday -- Render your project +- **Render** your project -- Comment your code in fec_analysis.qmd +- **Comment** your code in `fec_analysis.qmd` - Make your code readable with spaces and short lines -- filter( == ) vs filter( %in% ) +- `filter( == )` vs `filter( %in% )` ::: -[Solution to Monday's in class activity](./course-materials/in-class-activies/day_25solutions_writing_functions.qmd) +[Solution to Monday's in class activity](./course-materials/in-class-activies/day_25solutions_writing_functions.html) [Lecture: `across()` and `map()`](course-materials/lectures/Day_26_across_maps.qmd) @@ -90,7 +90,6 @@ Project 2 notes: [More on masking](https://rlang.r-lib.org/reference/topic-data-mask.html) - # Day 25 Function Writing [lecture](course-materials/lectures/functions.qmd) @@ -101,9 +100,9 @@ Project 2 notes: # Day 23 Project 2 -# Day 22 Project 2 +# Day 22 Project 2 -Reminder lab-5 is due Wednesday. +Reminder lab-5 is due Wednesday. [Introduce Project 2](https://moodle.smith.edu/) @@ -111,7 +110,7 @@ Time to get familiar with `fec20` # Day 21 Lab 5 Cleaning -You can find lab 5 by way of [moodle](https://moodle.smith.edu/). +You can find lab 5 by way of [moodle](https://moodle.smith.edu/). # Day 20 Cleaning @@ -141,7 +140,7 @@ planes |> inner_join(flights, by = "tailnum") |> ``` -#### 4b The number of planes included in planes that flew from NYC is: +#### 4b The number of planes included in planes that flew from NYC is: ```{r solution-to-4b} #| echo: false