-
Notifications
You must be signed in to change notification settings - Fork 0
/
VaderSentiment.py
55 lines (46 loc) · 1.35 KB
/
VaderSentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# encoding: utf-8
'''
Created on Sep 19, 2018
@author: g.werner
'''
from langdetect import detect
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize
import nltk
import os
nltk.download('punkt')
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()
def load_documents():
bm_data = []
print('Getting data')
directory = os.fsencode('C:\\Users\\g.werner\\eclipse-workspace\\GenreDeciderPython\\input\\test')
for file in os.listdir(directory):
with open(os.path.join(directory, file), mode='r', encoding="utf-8") as file:
data = file.read().strip()
if len(data) == 0:
continue
if detect(data) != 'en':
continue
bm_data.append(data)
return bm_data
def evaluate_single_document(document):
ss = sid.polarity_scores(document)
return ss['compound']
def main():
documents = load_documents()
pos = 0
neu = 0
neg = 0
for document in documents:
compound_value = evaluate_single_document(document)
if compound_value <= -0.5:
neg += 1
elif compound_value >= 0.5:
pos += 1
else:
neu += 1
print(str(pos) + ' pos ' + str(neu) + ' neu ' + str(neg) + ' neg')
if __name__ == '__main__':
main()