forked from Jamesyang2333/SAM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
182 lines (154 loc) · 6.26 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""Utility functions."""
import ast
from collections import defaultdict
import csv
def _get_table_dict(tables):
table_dict = {}
for t in tables:
split = t.split(' ')
if len(split) > 1:
# Alias -> full table name.
table_dict[split[1]] = split[0]
else:
# Just full table name.
table_dict[split[0]] = split[0]
return table_dict
def _get_join_dict(joins, table_dict, use_alias_keys):
join_dict = defaultdict(set)
for j in joins:
ops = j.split('=')
op1 = ops[0].split('.')
op2 = ops[1].split('.')
t1, k1 = op1[0], op1[1]
t2, k2 = op2[0], op2[1]
if not use_alias_keys:
t1 = table_dict[t1]
t2 = table_dict[t2]
join_dict[t1].add(k1)
join_dict[t2].add(k2)
return join_dict
def _try_parse_literal(s):
try:
ret = ast.literal_eval(s)
# IN needs a tuple operand
# String equality needs a string operand
if isinstance(ret, tuple) or isinstance(ret, str):
return ret
return s
except:
return s
def _get_predicate_dict(predicates, table_dict):
predicates = [predicates[x:x + 3] for x in range(0, len(predicates), 3)]
predicate_dict = {}
for p in predicates:
split_p = p[0].split('.')
table_name = table_dict[split_p[0]]
if table_name not in predicate_dict:
predicate_dict[table_name] = {}
predicate_dict[table_name]['cols'] = []
predicate_dict[table_name]['ops'] = []
predicate_dict[table_name]['vals'] = []
predicate_dict[table_name]['cols'].append(split_p[1])
predicate_dict[table_name]['ops'].append(p[1])
predicate_dict[table_name]['vals'].append(_try_parse_literal(p[2]))
return predicate_dict
def JobToQuery(csv_file, use_alias_keys=True):
"""Parses custom #-delimited query csv.
`use_alias_keys` only applies to the 2nd return value.
If use_alias_keys is true, join_dict will use aliases (t, mi) as keys;
otherwise it uses real table names (title, movie_index).
Converts into (tables, join dict, predicate dict, true cardinality). Only
works for single equivalency class.
"""
queries = []
with open(csv_file) as f:
data_raw = list(list(rec) for rec in csv.reader(f, delimiter='#'))
for row in data_raw:
reader = csv.reader(row) # comma-separated
table_dict = _get_table_dict(next(reader))
join_dict = _get_join_dict(next(reader), table_dict, use_alias_keys)
predicate_dict = _get_predicate_dict(next(reader), table_dict)
true_cardinality = int(next(reader)[0])
queries.append((list(table_dict.values()), join_dict,
predicate_dict, true_cardinality))
return queries
def DMVToQuery(csv_file, use_alias_keys=True):
"""Parses custom #-delimited query csv.
`use_alias_keys` only applies to the 2nd return value.
If use_alias_keys is true, join_dict will use aliases (t, mi) as keys;
otherwise it uses real table names (title, movie_index).
Converts into (tables, join dict, predicate dict, true cardinality). Only
works for single equivalency class.
"""
queries = []
with open(csv_file) as f:
data_raw = list(list(rec) for rec in csv.reader(f, delimiter='#'))
for row in data_raw:
reader = csv.reader(row) # comma-separated
table_dict = _get_table_dict(next(reader))
join_dict = _get_join_dict(next(reader), table_dict, use_alias_keys)
predicate_dict = _get_predicate_dict(next(reader), table_dict)
true_cardinality = int(next(reader)[0])
queries.append((list(table_dict.values()), join_dict,
predicate_dict, true_cardinality))
return queries
def UnpackQueries(concat_table, queries):
"""Converts custom query representation to (cols, ops, vals)."""
converted = []
true_cards = []
for q in queries:
tables, join_dict, predicate_dict, true_cardinality = q
# All predicates in a query (an AND of these).
query_cols, query_ops, query_vals = [], [], []
skip = False
# A naive impl of "is join graph subset of another join" check.
for table in tables:
if table not in concat_table.table_names:
print('skipping query')
skip = True
break
# Add the indicators.
idx = concat_table.ColumnIndex('__in_{}'.format(table))
query_cols.append(concat_table.columns[idx])
query_ops.append('=')
query_vals.append(1)
if skip:
continue
for table, preds in predicate_dict.items():
cols = preds['cols']
ops = preds['ops']
vals = preds['vals']
assert len(cols) == len(ops) and len(ops) == len(vals)
for c, o, v in zip(cols, ops, vals):
column = concat_table.columns[concat_table.TableColumnIndex(
table, c)]
query_cols.append(column)
query_ops.append(o)
# Cast v into the correct column dtype.
cast_fn = column.all_distinct_values.dtype.type
# If v is a collection, cast its elements.
if isinstance(v, (list, set, tuple)):
qv = type(v)(map(cast_fn, v))
else:
qv = cast_fn(v)
query_vals.append(qv)
converted.append((query_cols, query_ops, query_vals))
true_cards.append(true_cardinality)
return converted, true_cards
def InvertOrder(order):
if order is None:
return None
# 'order'[i] maps nat_i -> position of nat_i
# Inverse: position -> natural idx. This it the "true" ordering -- it's how
# heuristic orders are generated + (less crucially) how Transformer works.
nin = len(order)
inv_ordering = [None] * nin
for natural_idx in range(nin):
inv_ordering[order[natural_idx]] = natural_idx
return inv_ordering
def HumanFormat(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])