-
Notifications
You must be signed in to change notification settings - Fork 1
/
hypotheses.py
140 lines (134 loc) · 6.15 KB
/
hypotheses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
import pandas as pd
import copy
import pickle
import input_output as inout
def combine_hypotheses_per_phrase(ann_mel_dict, oc_dict):
""" takes a melody dictionary with annotated values for
information content, pitch proximity, and pitch reversal,
and a dictionary listing whether a given phrase occurs in a given melody.
Returns a dataframe with columns for all hypotheses and occurrence info"""
for oc in oc_dict:
melody = next((a['symbols'] for a in ann_mel_dict
if a['filename']==oc['query_filename']))
phrase = [s for s in melody
if s['phrase_id']==int(oc['query_segment_id'])]
oc['IC'] = get_segment_average([p['IC'] for p in phrase])
oc['pitch_proximity'] = get_segment_average([p['pitch_proximity']
for p in phrase])
oc['pitch_reversal'] = get_segment_average([p['pitch_reversal']
for p in phrase])
oc['phrase_length'] = len(phrase)
oc_df = pd.DataFrame(oc_dict)
return oc_df
def get_segment_average(values):
""" return phrase average for
Information Content, pitch proximity and pitch reversal """
okvalues = [v for v in values if v is not np.nan]
return np.nanmean(okvalues)
################ Rehearsal ###########################
def phrase_repetitions(mel_dict, oc_dict):
for mel in mel_dict:
print(mel['filename'])
phrase_ids = set([s['phrase_id'] for s in mel['symbols']])
phrase_start_onset = 0.0
pitch_onset = []
phrase_list = []
for i,s in enumerate(mel['symbols']):
if i>0 and s['phrase_id']>mel['symbols'][i-1]['phrase_id']:
phrase_start_onset = s['onset']
phrase_list.append(pitch_onset)
pitch_onset = []
pitch_onset.append((s['onset']-phrase_start_onset, s['pitch']))
phrase_list.append(pitch_onset)
for i,p in enumerate(phrase_list):
repetitions = len([phr for phr in phrase_list if phr==p])
oc_dict_entries = [oc for oc in oc_dict
if oc['query_filename']==mel['filename']
and oc['query_segment_id']==i]
for oc in oc_dict_entries:
oc['phrase_repetitions'] = repetitions
return oc_dict
################## Motif repetivity #####################
def return_entropy_FANTASTIC(csvpath, oc_dict):
""" takes a file for which ngram entropy has been generated
with the FANTASTIC toolbox, and a dictionary of occurrences
returns the occurrence dict with entropy values added """
phrase_dict = inout.csv_to_dict(csvpath)
for phr in phrase_dict:
oc_dict_entries = [oc for oc in oc_dict if
oc['query_filename']==phr['file.id'] and
oc['query_segment_id']==int(phr['phr.id'])-1]
for oc in oc_dict_entries:
oc['mean.entropy'] = phr['mean.entropy']
return oc_dict
################## Expectancy #############################
################## IDyOM
def return_expectancy_IDyOM(idyomfile,mel_dict) :
""" takes an idyom csv file and a melody dictionary
for each note in the melodies, pulls probability,
information content and probability and adds it to the dictionary
"""
idyomdf = pd.read_csv(idyomfile,sep=" ")
filenames = set([idyomdf.ix[i, 'melody.name'] for i in range(len(idyomdf))])
sub_dict = [m for m in mel_dict if m['filename'] in filenames]
for mel in sub_dict :
idyom_notes = [idyomdf.ix[i] for i in range(len(idyomdf)) if
idyomdf.ix[i,'melody.name']==mel['filename']]
if not idyom_notes :
print(mel['filename'])
continue
for i,s in enumerate(mel['symbols']) :
s['IC'] = idyom_notes[i]['information.content']
return mel_dict
################## Schellenberg
def return_expectancy_Schellenberg(mel_dict):
""" takes a dictionary of melodies with pitch interval representations,
the factors for pitch proximity and pitch reversal and
returns how well expectations are fulfilled according to the two-factor
model by Schellenberg (1997).
"""
for m in mel_dict :
pitchIntervals = [s['pitch_interval'] for s in m['symbols']]
for i,p in enumerate(pitchIntervals) :
regDirection = np.nan
regReturn = np.nan
if i==0:
pitchProximity = np.nan
pitchReversal = np.nan
# only from the second pitch interval (i.e. third note)
# I-R theory can make predictions
elif i>=2:
# pitch proximity and pitch reversal
# are only defined below octave
if abs(pitchIntervals[i-1]) <= 11 and abs(p) <= 12:
pitchProximity = abs(p)
if abs(pitchIntervals[i-1]) != 6:
# pitch reversal is not defined if
# implicative interval is a tritone
if abs(pitchIntervals[i-1]) < 6:
# small pitch interval - direction could go either way
regDirection = 0
elif abs(pitchIntervals[i-1]) > 6:
if pitchIntervals[i-1] + p > 0:
# large interval but same direction
# this is less expected
regDirection = -1
else:
regDirection = 1
if ((pitchIntervals[i-1] * p) < 0 and
abs(abs(pitchIntervals[i-1]) - abs(p)) <= 2):
# intervals have different direction
# and are similar in size
regReturn = 1.5
else:
regReturn = 0
pitchReversal = regDirection + regReturn
else:
pitchReversal = np.nan
else:
pitchReversal = np.nan
pitchProximity = np.nan
m['symbols'][i]['pitch_proximity'] = pitchProximity
m['symbols'][i]['pitch_reversal'] = pitchReversal
return mel_dict.copy()