-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBasic_Model.py
More file actions
57 lines (40 loc) · 1.56 KB
/
Basic_Model.py
File metadata and controls
57 lines (40 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from utils import *
'''
This file runs the models based on the params set in utils
it saved the simulation in SavedData folder in the root directory
'''
AllQ = np.zeros((NumIter, len(AllCoh), Numstates, NumActions))
AllBound = np.zeros((NumIter, NumTr))
for iti in range(NumIter):
print(iti)
Q = 0 * np.ones((Numstates, NumActions)) # Q-table
trCount = 0
st = int(np.floor(len(States) / 2))
reset = 1
Bound = np.zeros(len(AllCoh))
while True:
if reset:
Ctemp = AllCoh[trCount]
Dtemp = DiVector[trCount]
reset = 0
epsilon = epsilon * esp_decay
ev = np.random.normal(K*Dtemp*Ctemp, SigmaEv) # takes a sample
NexState, NextAction, R = TakeAction(st, epsilon, ev, Dtemp, Q) # take an action
if NextAction == 0 or NextAction == 1: # update the Q_table
Q[st, NextAction] = Q[st, NextAction] + lr * (R - Q[st, NextAction])
else:
Q[st, NextAction] = Q[st, NextAction] + lr*(R + Gamma*np.max(Q[NexState, :]) - Q[st, NextAction])
if NextAction == 0 or NextAction == 1:
AllQ[iti, trCount, :] = Q
reset = 1
Bound[trCount] = NexState
trCount = trCount + 1
st = int(np.floor(len(States) / 2)) # set to zero
else:
st = NexState
if trCount >= len(AllCoh):
break
AllQ[iti] = Q
AllBound[iti] = Bound
with open('./SavedData/ModelSim.pkl', 'wb') as f:
pickle.dump([AllBound, AllQ], f)