from google.colab import drive
drive.mount('/gdrive', force_remount=True)
# /gdrive/My Drive/ (폴더명)구글 드라이브 마운트
# 그림파일로 렌더링 하도록 패키지 설정
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet# 필요한 모듈 설치
import tensorflow as tf
import gym
from IPython import display
import cv2
from pyvirtualdisplay import Display
from IPython import display
import matplotlib.pyplot as plt
from collections import deque
import numpy as np
import random
%matplotlib inline
Display().start()<pyvirtualdisplay.display.Display at 0x7f34a075e1d0>
# 카트폴 게임 환경을 만듦
env = gym.make("CartPole-v1")env.render('rgb_array')# 2 - 액션 종류 슈 (아웃풋)
action_num=env.action_space.n
# 4 - 상태 종류 수 (인풋)
state_num=env.observation_space.shape[0]# pg 모델 만들기 - REINFORCE 알고리즘(가장 기초)
i=tf.keras.Input(shape=(state_num,))
out=tf.keras.layers.Dense(128,activation='relu')(i)
# out=tf.keras.layers.Dense(128,activation='relu')(out)
# out=tf.keras.layers.Dense(128,activation='relu')(out)
pi=tf.keras.layers.Dense(action_num,activation='softmax',name='pi')(out)
val=tf.keras.layers.Dense(1,name='val')(out)
pg_model=tf.keras.Model(inputs=[i],outputs=[pi,val])
opt=tf.keras.optimizers.Adam(0.001,clipnorm=0.1)--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-df773df0268a> in <module>() 1 # pg 모델 만들기 - REINFORCE 알고리즘(가장 기초) ----> 2 i=tf.keras.Input(shape=(state_num,)) 3 out=tf.keras.layers.Dense(128,activation='relu')(i) 4 # out=tf.keras.layers.Dense(128,activation='relu')(out) 5 # out=tf.keras.layers.Dense(128,activation='relu')(out) NameError: name 'tf' is not defined
pg_model.summary()Model: "model_1"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_2 (InputLayer) [(None, 4)] 0
__________________________________________________________________________________________________
dense_3 (Dense) (None, 128) 640 input_2[0][0]
__________________________________________________________________________________________________
pi (Dense) (None, 2) 258 dense_3[0][0]
__________________________________________________________________________________________________
val (Dense) (None, 1) 129 dense_3[0][0]
==================================================================================================
Total params: 1,027
Trainable params: 1,027
Non-trainable params: 0
__________________________________________________________________________________________________
# 에피소드 수만큼 학습
episode_count=1000
# 점수를 기록할 리스트
scores = []
# 디스카운트 팩터 정의
discount_rate=0.99
# 업데이트 배치
batch_size=16
count=0
grad_t=[]
for episode in range(episode_count):
state = env.reset()
# 차원을 맞추어 준다
state = np.reshape(state, [1, state_num])
done = False
total_reward = 0
while not done:
count=count+1
p,v=pg_model.predict(state)
action=np.random.choice(range(action_num),p=p[0])
next_state, reward, done, _ = env.step(action)
next_state = np.reshape(next_state, [1, state_num])
variable = pg_model.trainable_variables
with tf.GradientTape() as tape:
p,v=pg_model(state)
p=p[0]
_,n_v=pg_model(next_state)
td=reward + (1-done) * discount_rate * n_v[0]
tde=tf.stop_gradient(td-v[0])
val_loss=tf.stop_gradient(td)-v[0]
val_loss=tf.square(val_loss)
loss = - tf.math.log(p[action]) * tde + val_loss
grad=tape.gradient(loss,variable)
grad_t.append(grad)
# print(grad)
if(count%batch_size==0):
opt.apply_gradients(zip(np.mean(grad_t,axis=0),variable))
grad_t=[]
state = next_state
total_reward += reward
scores.append(total_reward)
mean_score = np.mean(scores)
print(episode+1,total_reward)
# print(p[action])
if (episode+1) % 20 == 0:
print("Episode %d: Mean survival = %0.2lf in %d episodes" %(episode+1, mean_score, 20))
scores = []
env.close() pg_model=tf.keras.models.load_model('/gdrive/My Drive/hjk_pg_reinforce_model.h5')WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.
env = gym.make('CartPole-v1')
state=env.reset()
state = np.reshape(state, [1, state_num])
done=False
# img = plt.imshow(env.render('rgb_array')) # only call this once
total_reward=0
img_avi=np.zeros((400,600,3))
fcc=cv2.VideoWriter_fourcc(*'DIVX')
out=cv2.VideoWriter('/gdrive/My Drive/hjk_pg_reinforce.avi',fcc,10.0,(600,400))
while not done:
# img.set_data(env.render('rgb_array')) # just update the data
# display.display(plt.gcf())
# display.clear_output(wait=True)
img_avi=env.render('rgb_array')
action = np.argmax(pg_model.predict(state)[0])
# action = env.action_space.sample()
next_state, reward, done, _ = env.step(action)
next_state = np.reshape(next_state, [1, state_num])
state = next_state
total_reward += reward
out.write(np.uint8(img_avi))
print(total_reward)
out.release()
cv2.destroyAllWindows()500.0