-
Notifications
You must be signed in to change notification settings - Fork 2
/
elmo_tweets_SVM_model_building_evaluation.py
48 lines (31 loc) · 1.28 KB
/
elmo_tweets_SVM_model_building_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import f1_score
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
pd.set_option('display.max_colwidth', 200)
# read data
train = pd.read_csv("train_2kmZucJ.csv")
test = pd.read_csv("test_oJQbWVk.csv")
# load elmo_train_new
pickle_in = open("elmo_train_03032019.pickle", "rb")
elmo_train_new = pickle.load(pickle_in)
# load elmo_train_new
pickle_in = open("elmo_test_03032019.pickle", "rb")
elmo_test_new = pickle.load(pickle_in)
xtrain, xvalid, ytrain, yvalid = train_test_split(elmo_train_new,
train['label'],
random_state=42,
test_size=0.2)
clf = svm.SVC(gamma='scale')
clf.fit(xtrain, ytrain)
preds_valid = clf.predict(xvalid)
print (f1_score(yvalid, preds_valid))
# make predictions on test set
preds_test = clf.predict(elmo_test_new)
# prepare submission dataframe
sub = pd.DataFrame({'id':test['id'], 'label':preds_test})
# write predictions to a CSV file
sub.to_csv("sub_svm.csv", index=False)