Moving MTH 190 to SDS 192

Nics-Github · Aug 28, 2023 · c6301d2 · c6301d2
1 parent 63fe0c0
commit c6301d2
Show file tree

Hide file tree

Showing 928 changed files with 196,986 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+
+/.quarto/
diff --git a/MTH_190_Intro_to_DS.Rproj b/MTH_190_Intro_to_DS.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/MY-MDSR-SOLUTIONS/Chapter 1 Problems/desktop.ini b/MY-MDSR-SOLUTIONS/Chapter 1 Problems/desktop.ini
diff --git a/MY-MDSR-SOLUTIONS/Chapter 2 Problems/Chapter 2 Problems.Rproj b/MY-MDSR-SOLUTIONS/Chapter 2 Problems/Chapter 2 Problems.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/MY-MDSR-SOLUTIONS/Chapter 2 Problems/Problem_1.Rmd b/MY-MDSR-SOLUTIONS/Chapter 2 Problems/Problem_1.Rmd
@@ -0,0 +1,23 @@
+---
+title: "2_7_1"
+author: "Schwab"
+date: "2/21/2022"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(mdsr)
+```
+
+Problem 1 (Easy): Consider the following data graphic.
+
+Q: The am variable takes the value 0 if the car has automatic transmission and 1 if the car has manual transmission. How could you differentiate the cars in the graphic based on their transmission type?
+
+A: set color = am
+
+```{r}
+ggplot(mtcars, aes(disp,mpg,color=am)) +
+  geom_point()
+```
diff --git a/MY-MDSR-SOLUTIONS/Chapter 2 Problems/desktop.ini b/MY-MDSR-SOLUTIONS/Chapter 2 Problems/desktop.ini
diff --git a/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Chapter 3 Problems.Rproj b/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Chapter 3 Problems.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Chapter 3 extended exmple.Rmd b/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Chapter 3 extended exmple.Rmd
@@ -0,0 +1,96 @@
+---
+title: 'Extended example: Historical Baby Names'
+author: "Schwab"
+date: "3/6/2022"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(mdsr)
+BabynamesDist <- make_babynames_dist()
+wtd_quantile <- Hmisc::wtd.quantile
+```
+
+```{r}
+com_fem <- BabynamesDist %>%
+  filter(n > 100, sex == "F") %>% 
+  group_by(name) %>%
+  mutate(wgt = est_alive_today / sum(est_alive_today)) %>%
+  summarize(
+    N = n(), 
+    est_num_alive = sum(est_alive_today),
+    quantiles = list(
+      wtd_quantile(
+        age_today, est_alive_today, probs = 1:3/4, na.rm = TRUE
+      )
+    )
+  ) %>%
+  mutate(measures = list(c("q1_age", "median_age", "q3_age"))) %>%
+  unnest(cols = c(quantiles, measures)) %>%
+  pivot_wider(names_from = measures, values_from = quantiles) %>%
+  arrange(desc(est_num_alive)) %>%
+  head(25)
+```
+
+
+```{r}
+w_plot <- ggplot(
+  data = com_fem, 
+  aes(x = reorder(name, -median_age), y = median_age)
+) + 
+  xlab(NULL) + 
+  ylab("Age (in years)") + 
+  ggtitle("Median ages for females with the 25 most common names")
+
+```
+
+```{r}
+w_plot <- w_plot + 
+  geom_linerange(
+    aes(ymin = q1_age, ymax = q3_age), 
+    color = "#f3d478", 
+    size = 4.5, 
+    alpha = 0.8
+  )
+```
+
+```{r}
+w_plot <- w_plot +
+  geom_point(
+    fill = "#ed3324", 
+    color = "white", 
+    size = 2, 
+    shape = 21
+  )
+context <- tribble(
+  ~median_age, ~x, ~label, 
+  65, 24, "median",
+  29, 16, "25th", 
+  48, 16, "75th percentile",
+)
+
+age_breaks <- 1:7 * 10 + 5
+
+w_plot + 
+  geom_point(
+    aes(y = 60, x = 24), 
+    fill = "#ed3324", 
+    color = "white", 
+    size = 2, 
+    shape = 21
+  ) + 
+  geom_text(data = context, aes(x = x, label = label)) + 
+  geom_point(aes(y = 24, x = 16), shape = 17) + 
+  geom_point(aes(y = 56, x = 16), shape = 17) +
+  geom_hline(
+    data = tibble(x = age_breaks), 
+    aes(yintercept = x), 
+    linetype = 3
+  ) +
+  scale_y_continuous(breaks = age_breaks) + 
+  coord_flip()
+
+
+```
diff --git a/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_1.Rmd b/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_1.Rmd
@@ -0,0 +1,41 @@
+---
+title: "Problem_1"
+author: "Schwab"
+date: "2/21/2022"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(mdsr)
+library(babynames)
+```
+
+Problem 1 (Easy): Angelica Schuyler Church (1756–1814) was the daughter of New York Governer Philip Schuyler and sister of Elizabeth Schuyler Hamilton. Angelica, New York was named after her. Using the babynames package generate a plot of the reported proportion of babies born with the name Angelica over time and interpret the figure.
+
+```{r Angelica-name-plot}
+#This uses the filter verb, which the students need for most of the chapter 3 problems. 
+
+babynames %>%
+  filter(name == "Angelica") %>% #Filter for only the names Angelica
+  ggplot( aes(year,prop))+
+  geom_point()          #I think this is the plot that the question is asking for. 
+```
+
+
+
+```{r Angelica-baby-names-suming-gender}
+#This chunk uses filter verbs that are not cover until chapter 4, but demonstrates how to combine male and female Angelica babies. 
+
+Angelica <- babynames %>%
+  filter(name == "Angelica") %>% #Filter for only the names Angelica
+  group_by(year) %>%            #Group by year to combine both male and female births
+  mutate(total_gender_prop = sum(prop))  #This should sum the proportion of male and female births
+  
+
+Angelica %>%
+  ggplot(aes(year,total_gender_prop))+
+  geom_point()      
+```
+
diff --git a/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_10.Rmd b/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_10.Rmd
@@ -0,0 +1,127 @@
+---
+title: "Problem_10"
+author: "Schwab"
+date: "3/2/2022"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(mdsr)
+library(ggthemes)
+library(stringr)
+babynames_dist <- make_babynames_dist()
+options(scipen=1000)
+update_geom_defaults("bar",   list(fill = "yellow","blue"))
+```
+
+I'm not done with this problem and I'm stuck. How do I do this without the verbs? 
+
+Problem 10 (Hard): Use the  function in the mdsr package to recreate the “Deadest Names” graphic from FiveThirtyEight (https://fivethirtyeight.com/features/how-to-tell-someones-age-when-all-you-know-is-her-name).
+
+```{r}
+glimpse(babynames_dist)
+```
+
+```{r}
+babynames_dist %>%
+  filter(name=="Sam")
+```
+
+```{r}
+babynames_dist %>%
+  mutate(est_dead=n-est_alive_today) %>%
+  #filter(sex=="M") %>%
+  group_by(name) %>%
+  summarise(percent_dead=100*sum(est_dead)/sum(n)) %>%
+  filter(name=="Sam")
+  
+```
+
+```{r}
+babynames_dist %>%
+  group_by(sex,name) %>%
+  summarise(sum(n),dead=sum(n)-sum((trunc(est_alive_today))),percent_dead=dead/sum(n)) %>%
+  filter(name=="Mabel")
+```
+
+Trying to filter by popular names
+
+```{r}
+
+head(babynames_dist)
+
+dead_fem_names <- babynames_dist %>%
+  group_by(name) %>%
+  mutate(largest_prop=sum(prop),most_total_births=sum(n), dead = n-trunc(est_alive_today)) %>%
+  filter(  sex == "F" ) %>% 
+  group_by(name) %>%
+  summarise(largest_prop, most_total_births,percent_dead = 100*(sum(dead)/sum(n)),sex )%>%
+  #distinct() %>%
+  #filter(name %in%  list("Mabel","Gertrude","Myrtle","Blanche","Beulah","Bessie","Pearl","Ethel","Minnie","Viola")) %>%
+  arrange(desc(percent_dead)) %>%
+  top_n(largest_prop,n=10) 
+  
+
+
+dead_male_names <- babynames_dist %>%
+  filter( sex == "M" ) %>% 
+  mutate(dead = n-trunc(est_alive_today)) %>%
+  group_by(name) %>%
+  summarise(percent_dead = 100*(sum(dead)/sum(n)) )%>%
+  
+  arrange(desc(percent_dead)) %>%
+  slice_max(percent_dead,n=10,with_ties=FALSE) %>%
+  mutate(sex="M")
+
+dead_fem_names
+dead_male_names
+```
+
+```{r}
+#I choose the names for female here
+
+list_of_fem_names <- list("Mabel","Gertrude","Myrtle","Blanche","Beulah","Bessie","Pearl","Ethel","Minnie","Viola")
+
+dead_fem_names <- babynames_dist %>%
+  filter(name %in% list_of_fem_names, sex == "F" ) %>% #This filters by the names in the list_of_fem_names
+  mutate(dead = n-trunc(est_alive_today)) %>%   #This is a dead variable the number of people born minus those that are alive today. 
+  group_by(name) %>%
+  summarise(percent_dead = 100*(sum(dead)/sum(n)) )%>%  #I made a variable that shows the percent of people who died
+  arrange(desc(percent_dead)) %>%
+  mutate(sex="F") #Add the sex variable 
+
+# I repeated the process for males as females. 
+list_of_male_names <- list("Elmer","Clarence","Harry","Chester","Willard","Herman","Herbert","Claude","Fred","Sam")
+
+dead_male_names <- babynames_dist %>%
+  filter(name %in% list_of_male_names, sex == "M" ) %>% 
+  mutate(dead = n-trunc(est_alive_today)) %>%
+  group_by(name) %>%
+  summarise(percent_dead = 100*(sum(dead)/sum(n)) )%>%
+  arrange(desc(percent_dead)) %>%
+  mutate(sex="M")
+
+
+twenty_dead_names <- dead_fem_names %>% 
+  full_join( dead_male_names) #I joined the two tables here and call is twenty dead names. 
+
+
+twenty_dead_names %>%
+  ggplot(aes(x= reorder(name,+percent_dead),y=percent_dead,fill=sex))+  #reorder puts higher percent at top
+  geom_bar(stat= "identity",show.legend = FALSE ) + #turns of legend
+  labs(title="Deadest Names", subtitle = str_wrap("Estimated percentage of Americans with a given name born since 1900 who were dead as of Jan 1, 2010" ,65))+ #str_wrap() wraps the subtitle
+  coord_flip(ylim=c(30,80)) + # Just makes the graph horizontal and limits the y-values
+  
+  geom_text(aes(label=trunc(percent_dead)),hjust=-0.5)+ #This adds the numbers at the end of the bars
+  theme_fivethirtyeight()+ # This starts the 538 theme and font 
+  theme(axis.line=element_blank(),panel.background=element_blank(),axis.ticks = element_blank(),panel.grid.major =element_blank(), axis.text.x =  element_blank())+ #this turns off all extra numbers and axes
+  scale_fill_manual("legend", values = c("F" = "#e5ae38", "M" = "#30a2da"))  #This sets the colors for male and female based on 538 color theme I found here https://www.color-hex.com/color-palette/13650
+ 
+  
+```
+
+
+
+
diff --git a/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_2.Rmd b/MY-MDSR-SOLUTIONS/Chapter 3 Problems/Problem_2.Rmd
@@ -0,0 +1,20 @@
+---
+title: "Problem 3.2"
+author: "Schwab"
+date: "2/21/2022"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(mdsr)
+library(nasaweather)
+```
+
+Problem 2 (Easy): Using data from the nasaweather package, create a scatterplot between wind and pressure, with color being used to distinguish the type of storm.
+
+```{r wind-v-pressure}
+ggplot(storms,aes(wind,pressure,color=type)) +   #Just plotting some storm data
+  geom_point()    
+```