-
Notifications
You must be signed in to change notification settings - Fork 0
/
decisionHesap.py
123 lines (97 loc) · 5.32 KB
/
decisionHesap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import tweepy
class decisionHesap():
def __init__(self):
self.tweets = []
self.tweetText = []
def DownloadData(self, keyword):
auth = tweepy.OAuthHandler('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', )
api = tweepy.API(auth)
limit = int(1)
# 1 bot 0 non bot
limit = tweepy.Cursor(api.user_timeline, screen_name=keyword, ).items(limit)
# create DataFrame
columns = ['screen_name', 'location', 'description', 'verified', 'follower', 'following', 'url']
data1 = []
for tweet in limit:
data1.append([tweet.user.screen_name, tweet.user.location,
tweet.user.description, tweet.user.verified, tweet.user.followers_count,
tweet.user.friends_count, tweet.user.url])
df1 = pd.DataFrame(data1, columns=columns)
df1.to_csv('data/veri.csv')
data1 = pd.read_csv('data/veri.csv')
condition = (data1.screen_name.str.contains("bot", case=False) == True) | (
data1.description.str.contains("bot", case=False) == True) | (data1.location.isnull()) | (
data1.verified == False)
data1['screen_name_binary'] = (data1.screen_name.str.contains("bot", case=False) == True)
data1['description_binary'] = (data1.description.str.contains("bot", case=False) == True)
data1['location_binary'] = (data1.location.isnull())
data1['verified_binary'] = (data1.verified == False)
print(data1)
data1.to_csv('data/sonuc.csv')
bots = pd.read_csv('data/bots_data.csv', encoding=('ISO-8859-1'))
nonbots = pd.read_csv('data/nonbots_data.csv', encoding=('ISO-8859-1'))
# Creating Bots identifying condition
# bots[bots.listedcount>10000]
condition = (bots.screen_name.str.contains("bot", case=False) == True) | (
bots.description.str.contains("bot", case=False) == True) | (bots.location.isnull()) | (
bots.verified == False)
bots['screen_name_binary'] = (bots.screen_name.str.contains("bot", case=False) == True)
bots['description_binary'] = (bots.description.str.contains("bot", case=False) == True)
bots['location_binary'] = (bots.location.isnull())
bots['verified_binary'] = (bots.verified == False)
print("Bots shape: {0}".format(bots.shape))
# Creating NonBots identifying condition
condition = (nonbots.screen_name.str.contains("bot", case=False) == False) | (
nonbots.description.str.contains("bot", case=False) == False) | (
nonbots.location.isnull() == False) | (
nonbots.verified == True)
nonbots['screen_name_binary'] = (nonbots.screen_name.str.contains("bot", case=False) == False)
nonbots['description_binary'] = (nonbots.description.str.contains("bot", case=False) == False)
nonbots['location_binary'] = (nonbots.location.isnull() == False)
nonbots['verified_binary'] = (nonbots.verified == True)
print("Nonbots shape: {0}".format(nonbots.shape))
# Joining Bots and NonBots dataframes
df = pd.concat([bots, nonbots])
print("DataFrames created...")
# Splitting data randombly into train_df and test_df
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2)
print("Randomly splitting the dataset into training and test, and training classifiers...\n")
# Using Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
clf = DecisionTreeClassifier(criterion='entropy')
# 80%
X_train = train_df[
['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']] # train_data
y_train = train_df['bot'] # train_target
# 20%
X_test = test_df[
['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']] # test_Data
y_test = test_df['bot'] # test_target
# Training on decision tree classifier
model = clf.fit(X_train, y_train)
# Predicting on test data
predicted = model.predict(X_test)
# Checking accuracy
data_df = pd.read_csv('data/sonuc.csv', encoding=('ISO-8859-1'))
dataset = data_df[['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']]
print("DECİSİON TREE tahmin", model.predict(dataset))
pred = model.predict(dataset)
if pred == 1:
pred = "Trol"
else:
pred = "Not Trol"
print("decision", pred)
follower = tweet.user.followers_count
following = tweet.user.friends_count
url = tweet.user.url
name = tweet.user.name
img = tweet.user.profile_image_url
bg = tweet.user.profile_image_url
print("Decision Tree Classifier Accuracy: {0}".format(accuracy_score(y_test, predicted)))
return pred, keyword, follower, following, url, name, img, bg
""" print("MultinomialNB Classifier", pred)
print("MultinomialNB Classifier Accuary: {0}".format(accuracy_score(y_test, predicted)))"""