-
Notifications
You must be signed in to change notification settings - Fork 7
/
constants.py
62 lines (47 loc) · 2.26 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
Constants shared across files.
"""
import re
# special tokens and number regex
UNK = '_UNK' # unk/OOV word/char
WORD_START = '<w>' # word star
WORD_END = '</w>' # word end
NUM = 'NUM' # number normalization string
NUMBERREGEX = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+")
# tasks
TASK_NAMES = ['group', #The target group of the tweet
'annotator_sentiment', #The sentiment of the annotator with respect to the tweet
'directness', #Whether the tweet is direct or indirect hate speech
'target', #The characteristic based on which the tweet discriminates people (e.g., race).
'sentiment' ] #The sentiment expressed by the tweet
# word embeddings
EMBEDS = ['babylon', 'muse', 'umwe', None]
EMBEDS_FILES = {'babylon': '../data/bi-embedding-babylon78/transformed_embeds/',
'muse': '../data/bi-embedding-muse/',
'umwe':' '}
#Dictionary of tasks and corresponding labels
LABELS = {'group':['arabs', 'other', 'african_descent', 'left_wing_people', 'asians',
'hispanics', 'muslims', 'individual', 'special_needs', 'christian', 'immigrants', 'jews' ,
'women', 'indian/hindu', 'gay', 'refugees'],
'annotator_sentiment':['indifference', 'sadness', 'disgust', 'shock', 'confusion',
'anger', 'fear'],
'directness':['direct', 'indirect'],
'target':['origin', 'religion', 'disability', 'gender', 'sexual_orientation', 'other'],
'sentiment':['disrespectful', 'fearful', 'offensive', 'abusive', 'hateful', 'normal']}
MODIFIED_LABELS = {'group':['arabs', 'other', 'african_descent', 'left_wing_people', 'asians',
'hispanics', 'muslims', 'individual', 'special_needs', 'christian', 'immigrants', 'jews' ,
'women', 'indian/hindu', 'gay', 'refugees'],
'annotator_sentiment':['indifference', 'sadness', 'shock', 'confusion','anger', 'fear'],
'directness':['direct', 'indirect'],
'target':['origin', 'religion', 'disability', 'gender', 'sexual_orientation', 'other'],
'sentiment':['somewhatoffensive', 'offensive', 'veryoffensive', 'normal']}
#'directness':['direct', 'indirect', 'none'], #to be added
# languages
LANGUAGES = ['ar', 'en', 'fr']
FULL_LANG = {'ar': 'Arabic', 'en': 'English', 'fr': 'French'}
# optimizers
SGD = 'sgd'
ADAM = 'adam'
# cross-stitch and layer-stitch initialization schemes
BALANCED = 'balanced'
IMBALANCED = 'imbalanced'