-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda.py
52 lines (45 loc) · 1.8 KB
/
eda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("data/processed_tweets.csv")
#data manipulation
df.drop("Unnamed: 0", axis=1, inplace=True)
df.drop("post_id", axis=1, inplace=True)
df.drop("user_id", axis=1, inplace=True)
df["frequency"] = df["post_text"].apply(lambda x: len(str(x).split(" ")))
#datetime object
df.post_created=df.post_created.apply(pd.to_datetime)
df["month"]=df.post_created.dt.month
df["year"]=df.post_created.dt.year
df.drop("post_created", axis=1, inplace=True)
# frequency per month
sns.set_style("whitegrid")
sns.despine(left=True, bottom=True)
sns.set_context("poster", font_scale = .5, rc={"grid.linewidth": 0.6})
sns.set(rc = {'figure.figsize':(11,5)})
sns.barplot(data=df, x="month", y="frequency").set(title="Frequency of tweets per month")
plt.savefig('figure/freq_month.png')
plt.clf()
# followers per month
sns.set_style("whitegrid")
sns.despine(left=True, bottom=True)
sns.set_context("poster", font_scale = .5, rc={"grid.linewidth": 0.6})
sns.set(rc = {'figure.figsize':(11,5)})
sns.barplot(data=df, x="month", y="followers").set(title="Followers per month")
plt.savefig('figure/follower_month.png')
plt.clf()
# frequency per year
sns.set_style("whitegrid")
sns.despine(left=True, bottom=True)
sns.set_context("poster", font_scale = .5, rc={"grid.linewidth": 0.6})
sns.set(rc = {'figure.figsize':(11,5)})
sns.barplot(data=df, x="year", y="frequency").set(title="Frequency of tweets per year")
plt.savefig('figure/freq_year.png')
plt.clf()
# followers per year
sns.set_style("whitegrid")
sns.despine(left=True, bottom=True)
sns.set_context("poster", font_scale = .5, rc={"grid.linewidth": 0.6})
sns.set(rc = {'figure.figsize':(11,5)})
sns.barplot(data=df, x="year", y="followers").set(title="Followers per year")
plt.savefig('figure/follower_year.png')