diff --git a/docs/index.html b/docs/index.html index 2de540b..d1ce284 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,7 +7,7 @@ - + SDS 192 Fall ’23 - SDS 192: Intro to Data Science + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + + + +
+ +
+
+

S_problem

+
+ + + +
+ + + + +
+ + +
+ +
+
# dir <- usethis::use_zip("https://www.fec.gov/files/bulk-downloads/2020/pas220.zip", 
+#             destdir = tempdir(), cleanup = TRUE)
+library(tidyverse)
+
+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+✔ dplyr     1.1.3     ✔ readr     2.1.4
+✔ forcats   1.0.0     ✔ stringr   1.5.0
+✔ ggplot2   3.4.3     ✔ tibble    3.2.1
+✔ lubridate 1.9.2     ✔ tidyr     1.3.0
+✔ purrr     1.0.2     
+── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag()    masks stats::lag()
+ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+
file_path <- fs::path(getwd(), "itpas2.txt")
+    exp_names <- read_csv("https://www.fec.gov/files/bulk-downloads/data_dictionaries/pas2_header_file.csv") %>% 
+      names() %>% 
+      tolower()
+
+
Rows: 0 Columns: 22
+── Column specification ────────────────────────────────────────────────────────
+Delimiter: ","
+chr (22): CMTE_ID, AMNDT_IND, RPT_TP, TRANSACTION_PGI, IMAGE_NUM, TRANSACTIO...
+
+ℹ Use `spec()` to retrieve the full column specification for this data.
+ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+
+
    contributions <- vroom::vroom(file_path, col_names = exp_names, 
+        col_types = cols(employer = col_character(), occupation = col_character()), 
+        
+        n_max = Inf, delim = "|") %>% 
+      select(-c(employer, occupation, image_num, memo_cd, memo_text, sub_id, file_num)) %>% 
+      
+        mutate(transaction_dt = lubridate::mdy(transaction_dt))
+
+ + + +
+ +
+ + + + + \ No newline at end of file