-
Notifications
You must be signed in to change notification settings - Fork 3
/
manual_error.py
216 lines (198 loc) · 8.01 KB
/
manual_error.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python
u"""
manual_error.py
Yara Mohajerani
Calculate human error
"""
import os
import sys
import fiona
import getopt
import numpy as np
from scipy import stats
from shapely.geometry import LineString,Polygon
import matplotlib.pyplot as plt
from descartes import PolygonPatch
#-- main function
def main():
#-- Read the system arguments listed after the program
long_options=['DIR1=','DIR2=']
optlist,arglist = getopt.getopt(sys.argv[1:],'O:T:',long_options)
#-- Set default settings
dir1 = os.path.expanduser('~/Google Drive File Stream/Shared drives/GROUNDING_LINE_TEAM_DRIVE/ML_Yara/SOURCE_SHP')
dir2 = os.path.expanduser('~/GL_learning_data/Archive_Track007_Second_Draw_Bernd')
for opt, arg in optlist:
if opt in ("-O","--DIR1"):
dir1 = os.path.expanduser(arg)
elif opt in ("-T","--DIR2"):
dir2 = os.path.expanduser(arg)
#-- read list of files from both directories
fileList = os.listdir(dir1)
list1 = [f for f in fileList if (f.startswith('gl') and f.endswith('.shp'))]
fileList = os.listdir(dir2)
list2 = [f for f in fileList if (f.startswith('gl') and f.endswith('.shp'))]
#------------------------------------------------------------------------------------------
#-- go through list2 and get corresponding files in list1 and calculate pairwise errors
#------------------------------------------------------------------------------------------
#-- out file for saving average error for each file
outtxt = open(os.path.join(dir2,'error_summary.txt'),'w')
outtxt.write('Average\tError(m)\tMIN(m)\tMAX(m) \t File\n')
#-- initialize array for distances, minimums, and maxmimums
distances = np.zeros(len(list2))
minims = np.zeros(len(list2))
maxims = np.zeros(len(list2))
#-- go through files and get pairwise distances
for count,f in enumerate(list2):
#-- read file
fid1 = fiona.open(os.path.join(dir2,f),'r')
#-- loop over ML lines and save all test lines
ml_lines = []
for j in range(len(fid1)):
g = fid1.next()
if not g['geometry'] is None:
#-- read geometry and filter out pinning points for an equivalent
#-- error comparison to ML
temp_pol = Polygon(g['geometry']['coordinates'])
box_ll = temp_pol.length
box_ww = temp_pol.area/box_ll
#-- if the with is larger than 1/25 of the length, it's a pinning point
if box_ww <= box_ll/25:
ml_lines.append(LineString(g['geometry']['coordinates']))
fid1.close()
#-- read second file file
try:
ind = int(list1.index(f))
except:
print("Couldn't find file:", f)
continue
else:
fid2 = fiona.open(os.path.join(dir1,list1[ind]),'r')
#-- loop over the hand-written lines and save all coordinates
hd_lines = []
for j in range(len(fid2)):
g2 = fid2.next()
if not g2['geometry'] is None:
#-- read geometry and filter out pinning points for an equivalent
#-- error comparison to ML
temp_pol = Polygon(g2['geometry']['coordinates'])
box_ll = temp_pol.length
box_ww = temp_pol.area/box_ll
#-- if the with is larger than 1/25 of the length, it's a pinning point
if box_ww <= box_ll/25:
hd_lines.append(LineString(g2['geometry']['coordinates']))
fid2.close()
#-- initialize plot for the large errors
fig = plt.figure(1, figsize=(8,8))
ax = fig.add_subplot(111)
#-- plot distnaces if specified
#-- initialize array of all pairwise distances and used indices
dist = []
#-- loop over ML line segments and form bounding boxes
for ml_original in ml_lines:
#-- break the line into n_seg segments
lcoords = list(ml_original.coords)
if len(lcoords) < 15:
ml_broken = [ml_original]
else:
#-- start from segments of 10 coordiantes, and increase until
#-- you find a divisible number
n_seg = 15
while len(lcoords)%n_seg < 5:
n_seg += 1
ml_broken = []
bc = 0
while bc < len(lcoords):
ml_broken.append(LineString(lcoords[bc:bc+n_seg]))
bc += n_seg
for ml in ml_broken:
box = ml.minimum_rotated_rectangle
#-- get hd line segment that intersects the box
for hd in hd_lines:
overlap = hd.intersection(box)
#-- if more than 20% of length is within the box, consider the line
if overlap.length > hd.length/5:
if box.geom_type == 'Polygon':
ppatch = PolygonPatch(box,alpha=0.2,facecolor='skyblue')
ax.add_patch(ppatch)
#-- we have found the line pairning. Get mean distance
#-- lines intersect. Now Find the shorter line to use as reference
if ml.length <= hd.length:
#-- use ML line as reference
x1,y1 = ml.coords.xy
x2,y2 = hd.coords.xy
ax.plot(x1,y1,color='red')
ax.plot(x2,y2,color='blue')
else:
#-- use manual line as reference (set as x1,y1)
x1,y1 = hd.coords.xy
x2,y2 = ml.coords.xy
ax.plot(x1,y1,color='blue')
ax.plot(x2,y2,color='red')
#-- go along x1,y1 and find closest points on x2,y2
d = np.empty((len(x1),len(x2)),dtype=float)
ind_list = np.empty(len(x1),dtype=int)
for i in range(len(x1)):
#-- get list of distances
d[i,:] = np.sqrt((np.array(x2)-x1[i])**2 + (np.array(y2)-y1[i])**2)
#-- get index of shortest distanace
ind_list[i] = np.argmin(d[i,:])
#-- Now check check if multiple points of the reference line point to the same
#-- (x2,y2) point
#-- first get list of unique indices
unique_list = list(set(ind_list))
#-- sort in ascending order
unique_list.sort()
#-- get how many times each unique index is repeated
u_count = np.zeros(len(unique_list),dtype=int)
#-- loop through unique indices and find all corresponding indices
for k,u in enumerate(unique_list):
u_count[k] = np.count_nonzero(ind_list == u)
#-- for repeating indices that are side-by-side (for example many 4s and many 5s),
#-- the line is out of bounds of the other line, and the far-away points are
#-- alternating between a few points on the refernec line. Make them all the same index
remove_list = []
for k in range(len(unique_list)):
if u_count[k] > 1:
#-- compare with element after
if (unique_list[k]+1 in unique_list):
ii, = np.nonzero(ind_list == unique_list[k])
jj, = np.nonzero(ind_list == unique_list[k]+1)
if np.min(d[ii,unique_list[k]]) < np.min(d[jj,unique_list[k]]):
remove_list.append(unique_list[k]+1)
else:
remove_list.append(unique_list[k])
#-- remove duplicate elements
remove_list = list(set(remove_list))
for r in remove_list:
unique_list.remove(r)
#-- loop through unique indices and find all corresponding indices
#-- NOTE we make a list of the total indices, which allows us to also delete
#-- repeated indices (if not deleting, this is redundant. Can just use ':')
xlist = np.arange(len(x1))
for u in unique_list:
w = np.argmin(d[xlist,u])
dist.append(d[xlist[w],u])
ax.plot([x1[xlist[w]],x2[u]],[y1[xlist[w]],y2[u]],color='gray')
#-- since we used this index, take it out
# xlist = np.delete(xlist,w)
distances[count] = np.mean(dist)
if len(dist) != 0:
minims[count] = np.min(dist)
maxims[count] = np.max(dist)
else:
minims[count] = np.nan
maxims[count] = np.nan
outtxt.write('%.1f \t %.1f \t %.1f \t\t %s\n'%(distances[count],minims[count],maxims[count],f))
plt.savefig(os.path.join(dir2,f.replace('.shp','_dist.pdf')),format='PDF')
plt.close(fig)
#-- also save the overal average
outtxt.write('\nMEAN\t\t\t\t%.1f m\n'%(np.nanmean(distances)))
outtxt.write('MIN\t\t\t\t\t%.1f m\n'%(np.nanmin(minims)))
outtxt.write('MAX\t\t\t\t\t%.1f m\n'%(np.nanmax(maxims)))
outtxt.write('Interquartile Range\t%.1f m\n'%(stats.iqr(distances,nan_policy='omit')))
outtxt.write('MAD\t\t\t\t\t%.1f m\n'%(stats.median_absolute_deviation(distances,nan_policy='omit')))
outtxt.write('STD\t\t\t\t\t%.1f m\n'%(np.nanstd(distances)))
outtxt.close()
#-- run main program
if __name__ == '__main__':
main()