forked from moshesipper/tiny_gp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tiny_gp.py
165 lines (144 loc) · 6.86 KB
/
tiny_gp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# tiny genetic programming by © moshe sipper, www.moshesipper.com
from random import random, randint, seed
from statistics import mean
from copy import deepcopy
POP_SIZE = 60 # population size
MIN_DEPTH = 2 # minimal initial random tree depth
MAX_DEPTH = 5 # maximal initial random tree depth
GENERATIONS = 250 # maximal number of generations to run evolution
TOURNAMENT_SIZE = 5 # size of tournament for tournament selection
XO_RATE = 0.8 # crossover rate
PROB_MUTATION = 0.2 # per-node mutation probability
def add(x, y): return x + y
def sub(x, y): return x - y
def mul(x, y): return x * y
FUNCTIONS = [add, sub, mul]
TERMINALS = ['x', -2, -1, 0, 1, 2]
def target_func(x): # evolution's target
return x*x*x*x + x*x*x + x*x + x + 1
def generate_dataset(): # generate 101 data points from target_func
dataset = []
for x in range(-100,101,2):
x /= 100
dataset.append([x, target_func(x)])
return dataset
class GPTree:
def __init__(self, data = None, left = None, right = None):
self.data = data
self.left = left
self.right = right
def node_label(self): # string label
if (self.data in FUNCTIONS):
return self.data.__name__
else:
return str(self.data)
def print_tree(self, prefix = ""): # textual printout
print("%s%s" % (prefix, self.node_label()))
if self.left: self.left.print_tree (prefix + " ")
if self.right: self.right.print_tree(prefix + " ")
def compute_tree(self, x):
if (self.data in FUNCTIONS):
return self.data(self.left.compute_tree(x), self.right.compute_tree(x))
elif self.data == 'x': return x
else: return self.data
def random_tree(self, grow, max_depth, depth = 0): # create random tree using either grow or full method
if depth < MIN_DEPTH or (depth < max_depth and not grow):
self.data = FUNCTIONS[randint(0, len(FUNCTIONS)-1)]
elif depth >= max_depth:
self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
else: # intermediate depth, grow
if random () > 0.5:
self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
else:
self.data = FUNCTIONS[randint(0, len(FUNCTIONS)-1)]
if self.data in FUNCTIONS:
self.left = GPTree()
self.left.random_tree(grow, max_depth, depth = depth + 1)
self.right = GPTree()
self.right.random_tree(grow, max_depth, depth = depth + 1)
def mutation(self):
if random() < PROB_MUTATION: # mutate at this node
self.random_tree(grow = True, max_depth = 2)
elif self.left: self.left.mutation()
elif self.right: self.right.mutation()
def size(self): # tree size in nodes
if self.data in TERMINALS: return 1
l = self.left.size() if self.left else 0
r = self.right.size() if self.right else 0
return 1 + l + r
def build_subtree(self): # count is list in order to pass "by reference"
t = GPTree()
t.data = self.data
if self.left: t.left = self.left.build_subtree()
if self.right: t.right = self.right.build_subtree()
return t
def scan_tree(self, count, second): # note: count is list, so it's passed "by reference"
count[0] -= 1
if count[0] <= 1:
if not second: # return subtree rooted here
return self.build_subtree()
else: # glue subtree here
self.data = second.data
self.left = second.left
self.right = second.right
else:
ret = None
if self.left and count[0] > 1: ret = self.left.scan_tree(count, second)
if self.right and count[0] > 1: ret = self.right.scan_tree(count, second)
return ret
def crossover(self, other): # xo 2 trees at random nodes
if random() < XO_RATE:
second = other.scan_tree([randint(1, other.size())], None) # 2nd random subtree
self.scan_tree([randint(1, self.size())], second) # 2nd subtree "glued" inside 1st tree
# end class GPTree
def init_population(): # ramped half-and-half
pop = []
for md in range(3, MAX_DEPTH + 1):
for i in range(int(POP_SIZE/6)):
t = GPTree()
t.random_tree(grow = True, max_depth = md) # grow
pop.append(t)
for i in range(int(POP_SIZE/6)):
t = GPTree()
t.random_tree(grow = False, max_depth = md) # full
pop.append(t)
return pop
def fitness(individual, dataset): # inverse mean absolute error over dataset normalized to [0,1]
return 1 / (1 + mean([abs(individual.compute_tree(ds[0]) - ds[1]) for ds in dataset]))
def selection(population, fitnesses): # select one individual using tournament selection
tournament = [randint(0, len(population)-1) for i in range(TOURNAMENT_SIZE)] # select tournament contenders
tournament_fitnesses = [fitnesses[tournament[i]] for i in range(TOURNAMENT_SIZE)]
return deepcopy(population[tournament[tournament_fitnesses.index(max(tournament_fitnesses))]])
def main():
# init stuff
seed() # init internal state of random number generator
dataset = generate_dataset()
population= init_population()
best_of_run = None
best_of_run_f = 0
best_of_run_gen = 0
fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
# go evolution!
for gen in range(GENERATIONS):
nextgen_population=[]
for i in range(POP_SIZE):
parent1 = selection(population, fitnesses)
parent2 = selection(population, fitnesses)
parent1.crossover(parent2)
parent1.mutation()
nextgen_population.append(parent1)
population=nextgen_population
fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
if max(fitnesses) > best_of_run_f:
best_of_run_f = max(fitnesses)
best_of_run_gen = gen
best_of_run = deepcopy(population[fitnesses.index(max(fitnesses))])
print("________________________")
print("gen:", gen, ", best_of_run_f:", round(max(fitnesses),3), ", best_of_run:")
best_of_run.print_tree()
if best_of_run_f == 1: break
print("\n\n_________________________________________________\nEND OF RUN\nbest_of_run attained at gen " + str(best_of_run_gen) +\
" and has f=" + str(round(best_of_run_f,3)))
best_of_run.print_tree()
if __name__== "__main__":
main()