# 【深度强化学习】神经网络、爬山法优化控制倒立摆问题实战（附源码）

## 用神经网络来参数化策略

delta = 0.01 # 爬山法中试探的步长
top_rewards = 0
top_paras = None
for _ in range(100): # 多次爬山，选取最好的结果
score = 0
paras = np.random.rand(5)  # 随机产生神经元的连接系数和阈值
most_rewards = rewards_by_paras(env, paras)
for i in range(200):
best_paras = paras
cur_rewards = most_rewards
rewards = rewards_by_paras(env, paras + [ delta, 0, 0, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ delta, 0, 0, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ -delta, 0, 0, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ -delta, 0, 0, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, delta, 0, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, delta, 0, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, -delta, 0, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, -delta, 0, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, 0, delta, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, delta, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, 0, -delta, 0, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, -delta, 0, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, 0, 0, delta, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, 0, delta, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, 0, 0, -delta, 0 ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, 0, -delta, 0 ]
rewards = rewards_by_paras(env, paras + [ 0, 0, 0, 0, delta ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, 0, 0, delta ]
rewards = rewards_by_paras(env, paras + [ 0, 0, 0, 0, -delta ])
if rewards > most_rewards:
most_rewards = rewards
best_paras = paras + [ 0, 0, 0, 0, -delta ]
if (cur_rewards == most_rewards) or (most_rewards >= 200): # 到了山顶，或者已经达到要求
break
else:
paras = best_paras
#print(most_rewards, paras)
if most_rewards > top_rewards:
top_rewards = most_rewards
top_paras = paras
print(top_rewards, top_paras)

