-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_12_evaluating.py
More file actions
56 lines (46 loc) · 1.58 KB
/
_12_evaluating.py
File metadata and controls
56 lines (46 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from kaggle_environments import make,evaluate
from _11_submission import act,agent
from _12_DQN import DQNAgent
import os
decision_fct=act
oponent_fct='random'
#Environment settings
env = make("connectx", debug=True)
trainer = env.train([None, oponent_fct])
#Board size
configuration={"rows": 6, "columns": 7}
rows=configuration['rows']
columns=configuration['columns']
#Training process settings
EPISODES=30
agent2=DQNAgent(rows=rows,columns=columns,action_size=columns)
verbose=False
obs = trainer.reset()
done=False
for e in range(EPISODES):
state = trainer.reset()
if verbose: print("episode: ",e)
for i in range(100): #episode isnt finished
action = decision_fct(state,configuration) # Action for the agent being trained.
new_state, reward, done, info = trainer.step(action)
#print('new_state=', new_state)
state=new_state
if done:
#print(states_converter(new_state,rows,columns))
print("Episode: {}, reward: {} after {} moves".format(e,reward,i))
break
def mean_reward(rewards):
rewards_corrected=[]
for r in rewards:
if r[0] is None: r[0]=0 #removing None Type result if game result is draw
rewards_corrected.append(r[0])
return sum(r for r in rewards_corrected) / float(len(rewards_corrected))
# Run multiple episodes to estimate its performance.
rewards=evaluate("connectx", [decision_fct, 'random'], num_episodes=30)
#print('rewards=',rewards)
print("My Agent vs Random Agent:", mean_reward(rewards))
#cleaning
try:
os.remove('./model_action_predictor.h5')
except:
pass