-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_ml.py
213 lines (176 loc) · 5.89 KB
/
train_ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from tetris.tetris import Tetris
import random, time, sys
import numpy as np
from ai.utils.utils import getActivePosition, findPositions, isOutOfBounds
from ai.utils.pathfinding import findPath
from ai.algorithms.mlAi import choosePosition
from ai.utils.display import display, clear
from ml.utils.heuristics import *
#use genetic algorithm to find weights for specified parametersto calculate which position to choose
#concept:
#we start with a batch of "players", a collection of weights for the paramters
#play the game and the ones with higher scores (more fit) are the better ones
#breed and mutate
#parameters:
#height
#neighbors
#holes
#represented as [height, neighbors, holes]
CHANCEOFSURVIVAL = 0.1
CHANCEOFMUTATE = 0.2
INITIALPOP = 10
DESIREDSCORE = 1000
NUMWEIGHTS = 4
VERBOSE = False
class Player:
def __init__(self, weights=None):
self.weights = weights if weights != None else [0 for w in range(NUMWEIGHTS)]
def giveRandomWeights(self):
for w in range(len(self.weights)):
self.weights[w] = random.random() * 10 - 5
# this is for comparing players to each other visually
def name(self):
return "(" + " ".join([f'{round(weight, 1): >4}' for weight in self.weights]) + ")"
def ai(self, game): #mutator function
board = game.getBoard()
position = getActivePosition(board, game.pivot)
positions = findPositions(board, position, game.rotatable)
path = None
while path == None:
# someday get around to fixing this stupid bug:
if len(positions) < 1:
path = []; break # set path to empty to deal with error
p = choosePosition(board, positions, self.weights)
target = positions[p]
del positions[p] # remove from list of remaining positions
path = findPath(board, position, target, game.rotatable)
moves = path
# Execute move sequence:
numPieces = game.numPieces
while numPieces == game.numPieces:
if len(moves) > 0:
if moves[0] == 'd':
del moves[0]
game.incrementTime()
elif moves[0] == 'r':
del moves[0]
game.translateActiveRight()
elif moves[0] == 'l':
del moves[0]
game.translateActiveLeft()
elif moves[0] == 'u':
del moves[0]
game.rotateActiveClockwise()
else:
game.incrementTime()
def create_initial_population(count):
#creates a population of players with random genomes
pop = []
for i in range(count):
player = Player()
player.giveRandomWeights()
pop.append(player)
return pop
def breed(p1, p2):
p3 = Player()
for w in range(len(p3.weights)):
p3.weights[w] = random.choice([p1.weights[w], p2.weights[w]])
return p3
def mutate(p):
for w in range(len(p.weights)):
p.weights[w] = p.weights[w] + random.uniform(-1, 1)
return p
def evolve(pop, pop_scores, base):
#take top half of them and some random ones from the weak half
genSize = len(pop)
for i in range(genSize):
pop_scores[i] += random.random() # add random fuzz
median = np.median(pop_scores)
newGen = []
for i in range(genSize):
if (pop_scores[i] > median):
newGen.append(pop[i])
if VERBOSE: print(pop[i].name(), int(pop_scores[i]), "SURVIVED")
elif (CHANCEOFSURVIVAL > random.random()):
newGen.append(mutate(pop[i]))
if VERBOSE: print(pop[i].name(), int(pop_scores[i]), "SQUEAKED BY")
else:
if VERBOSE: print(pop[i].name(), int(pop_scores[i]), "DIED")
print("Median score:", int(median), f"(relative to baseline: {round(int(median)/(base+10**(-10)) * 100)}%)")
while len(newGen) < genSize:
a, b = random.randint(0, len(newGen) - 1), random.randint(0, len(newGen) - 1)
while a == b:
b = random.randint(0, len(newGen) - 1)
newGen.append(mutate(breed(newGen[a], newGen[b])))
return newGen
from ai.algorithms.holyNeighborAi import choosePosition as baselineFunction
from ai.ai import AI
def baseline(seed):
random.seed(seed)
game = Tetris(numColumns=10, numRows=10)
baselineAI = AI(baselineFunction)
while(not game.lost):
baselineAI.aiSequence(game)
random.seed()
if VERBOSE: print(f"Baseline AI played game with score {game.numLines}.")
return game.numLines
def train(population, seed):
#each player plays the game until death
#if any player plays well enough (>= DESIREDSCORE), then return tuple (score, player)
#otherwise, evolve them and return highest score and new generation (highest score, new gen)
scores = []
highestScore = 0
bestPlayer = population[0]
for player in population:
random.seed(seed)
game = Tetris(numColumns=10, numRows=10) # Use the same seed for each player, for fairness
while(not game.lost):
player.ai(game) #play round of game
random.seed() # reset random seed
if game.numLines > highestScore:
highestScore = game.numLines
bestPlayer = player
if VERBOSE: print(f"{player.name()} played game with score {game.numLines}.", player.weights)
scores.append(game.numLines) # adds random element to make a continuum of values
print(f"{bestPlayer.name()} is the best player with a score of {highestScore}: {bestPlayer.weights}")
return scores
def save(player):
f = open("ml/model",'w+')
for weight in player.weights:
f.write(str(weight) + "\n")
f.close()
def main(popCount):
pop = create_initial_population(popCount if popCount else INITIALPOP)
if VERBOSE:
print("\nInitial Population:")
for player in pop: print(player.name())
popnum = 1
while(True):
print(f"\nPopulation {popnum}:")
seed = random.randint(1, 10**10)
base = baseline(seed)
scores = train(pop, seed)
"""
seed = random.randint(1, 10**10)
base += baseline(seed)
scores += train(pop, seed)
seed = random.randint(1, 10**10)
base += baseline(seed)
scores += train(pop, seed)
"""
if VERBOSE: print("\n")
pop = evolve(pop, scores, base)
popnum += 1
save(final_player)
if __name__ == "__main__":
count = None
if "-v" in sys.argv:
sys.argv.remove("-v")
VERBOSE = True
if len(sys.argv) > 1 and sys.argv[1].isnumeric():
count = int(sys.argv[1])
del sys.argv[1]
if len(sys.argv) != 1:
print("Usage: python3 train_ml.py [-v] [count]")
exit(0)
main(popCount=count)