-
Notifications
You must be signed in to change notification settings - Fork 0
/
FINAL-predict.py
39 lines (32 loc) · 1.04 KB
/
FINAL-predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
nltk.download('wordnet')
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
from train import TrainData
import sys
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
df = pd.read_excel("Data.xlsx", sheet_name="Sheet1")
df = df.drop(['Label'], axis=1)
df = df.drop(['Date of review'], axis=1)
df.dropna(axis=0, inplace=True)
df.isna().sum()
df.isnull().sum(axis=1)
df.loc[df.SENTIMENT == "positive", "SENTIMENT"] = 1
df.loc[df.SENTIMENT == "Negative", "SENTIMENT"] = 0
df.SENTIMENT.unique()
feedback, sentiment = list(df['Actual Feedback']), list(df['SENTIMENT'])
vectoriser = TfidfVectorizer(ngram_range=(1, 2), max_features=10000)
t = TrainData()
processedtext = t.preprocess(feedback)
sns.countplot('SENTIMENT', data=df)
t.train(p=processedtext, s=sentiment)
model = t.bnb()
# model = t.svc()
# model = t.lr()