Skip to content
This repository has been archived by the owner on Jan 30, 2024. It is now read-only.

Commit

Permalink
fix weird dplyr bug
Browse files Browse the repository at this point in the history
  • Loading branch information
maxheld83 committed Sep 18, 2018
1 parent 93ab101 commit 420bda0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 15 deletions.
25 changes: 11 additions & 14 deletions index.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ editor_options:
</sub></small></p>
</div>


```{r, child="README.rmd"}
```

Expand Down Expand Up @@ -195,9 +196,10 @@ td_coms <- r_coms_by_line %>%
tidyr::separate_rows( # separate out sepcial charcs
word,
sep = "_"
) %>%
filter(
is.na(as.numeric(word)),
)
td_coms <- td_coms %>%
dplyr::filter(
is.na(suppressWarnings(as.numeric(word))),
str_length(word) > 1
) %>%
mutate(
Expand Down Expand Up @@ -238,7 +240,7 @@ Es verbleiben über **`r nrow(td_coms)`** Beobachtungen von **`r length(unique(t

```{r, fig.cap="Unigram der über 300 Mal genannten Wörter"}
count(x = td_coms, word, sort = TRUE) %>%
filter(n > 300) %>%
dplyr::filter(n > 300) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
Expand All @@ -254,7 +256,7 @@ Gemessen an den Möglichkeiten neuer Technologien erscheint *mehr* Arbeit als un
```{r, fig.cap="Vergleich der Unigrams in Prozenten der drei größten Threads", fig.width=9, fig.height=9, warning=FALSE}
td_coms %>%
group_by(submission_title) %>%
filter(n() > 1900) %>%
dplyr::filter(n() > 1900) %>%
count(submission_title, word) %>%
mutate(proportion = n / sum(n)) %>%
select(-n) %>%
Expand Down Expand Up @@ -295,7 +297,7 @@ coms_bigram_graph <- r_coms_by_line %>%
unnest_tokens(bigram, body, token = "ngrams", n = 2) %>%
# put each in a column, so we can get rid of stop words
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(
dplyr::filter(
!word1 %in% c(stop_words$word, "http", "www", "https"),
!word2 %in% c(stop_words$word, "http", "www", "https"),
) %>%
Expand All @@ -308,7 +310,7 @@ coms_bigram_graph <- r_coms_by_line %>%
word2,
sep = "_"
) %>%
filter(
dplyr::filter(
is.na(suppressWarnings(as.numeric(word1))),
str_length(word1) > 1,
is.na(suppressWarnings(as.numeric(word2))),
Expand All @@ -329,7 +331,7 @@ coms_bigram_graph <- r_coms_by_line %>%
)
) %>%
count(word1, word2, sort = TRUE) %>%
filter(n > 20) %>%
dplyr::filter(n > 20) %>%
igraph::graph_from_data_frame()
library(ggraph)
Expand Down Expand Up @@ -359,13 +361,8 @@ Eine LDA daher zerlegt die Rohdaten in zwei Matrizen: Eine Matrix $Wörter x The
Somit handelt es sich bei der LDA um eine Dimensionsreduktion, also um einen Ansatz des *unsupervised learning*.

```{r}
td_coms %>%
tidy()
```



```


# Feelgood Management
Expand Down
2 changes: 1 addition & 1 deletion setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ library(knitr)
library(rmarkdown)
library(printr)
library(tidyverse)
library(dplyr)
library(magrittr)
library(tidytext)
library(scales)
library(dplyr)

# knitr setup ====
knitr::opts_chunk$set(
Expand Down

0 comments on commit 420bda0

Please sign in to comment.