구글 드라이브 마운트

from google.colab import drive
drive.mount('/gdrive', force_remount=True)
# /gdrive/My Drive/ (폴더명)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive

# 그림파일로 렌더링 하도록 패키지 설정
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet

Reading package lists... Done
Building dependency tree       
Reading state information... Done
Suggested packages:
  libgle3
The following NEW packages will be installed:
  python-opengl
0 upgraded, 1 newly installed, 0 to remove and 31 not upgraded.
Need to get 496 kB of archives.
After this operation, 5,416 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-opengl all 3.1.0+dfsg-1 [496 kB]
Fetched 496 kB in 2s (237 kB/s)
Selecting previously unselected package python-opengl.
(Reading database ... 144433 files and directories currently installed.)
Preparing to unpack .../python-opengl_3.1.0+dfsg-1_all.deb ...
Unpacking python-opengl (3.1.0+dfsg-1) ...
Setting up python-opengl (3.1.0+dfsg-1) ...
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  xvfb
0 upgraded, 1 newly installed, 0 to remove and 31 not upgraded.
Need to get 784 kB of archives.
After this operation, 2,266 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 xvfb amd64 2:1.19.6-1ubuntu4.4 [784 kB]
Fetched 784 kB in 2s (331 kB/s)
Selecting previously unselected package xvfb.
(Reading database ... 146788 files and directories currently installed.)
Preparing to unpack .../xvfb_2%3a1.19.6-1ubuntu4.4_amd64.deb ...
Unpacking xvfb (2:1.19.6-1ubuntu4.4) ...
Setting up xvfb (2:1.19.6-1ubuntu4.4) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/69/ec/8221a07850d69fa3c57c02e526edd23d18c7c05d58ed103e3b19172757c1/PyVirtualDisplay-0.2.5-py2.py3-none-any.whl
Collecting EasyProcess
  Downloading https://files.pythonhosted.org/packages/48/3c/75573613641c90c6d094059ac28adb748560d99bd27ee6f80cce398f404e/EasyProcess-0.3-py2.py3-none-any.whl
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.3 pyvirtualdisplay-0.2.5
Collecting piglet
  Downloading https://files.pythonhosted.org/packages/11/56/6840e5f45626dc7eb7cd5dff57d11880b3113723b3b7b1fb1fa537855b75/piglet-1.0.0-py2.py3-none-any.whl
Collecting piglet-templates
  Downloading https://files.pythonhosted.org/packages/d0/dc/d628dcdf0b38b8f230e9c2309bfa370d2e3fb95e9e9c260213d10fde91ac/piglet_templates-1.0.0-py3-none-any.whl (63kB)
     |████████████████████████████████| 71kB 2.0MB/s 
Requirement already satisfied: markupsafe in /usr/local/lib/python3.6/dist-packages (from piglet-templates->piglet) (1.1.1)
Requirement already satisfied: attrs in /usr/local/lib/python3.6/dist-packages (from piglet-templates->piglet) (19.3.0)
Requirement already satisfied: astunparse in /usr/local/lib/python3.6/dist-packages (from piglet-templates->piglet) (1.6.3)
Collecting Parsley
  Downloading https://files.pythonhosted.org/packages/2b/d6/4fed8d65e28a970e1c5cb33ce9c7e22e3de745e1b2ae37af051ef16aea3b/Parsley-1.3-py2.py3-none-any.whl (88kB)
     |████████████████████████████████| 92kB 4.6MB/s 
Requirement already satisfied: six<2.0,>=1.6.1 in /usr/local/lib/python3.6/dist-packages (from astunparse->piglet-templates->piglet) (1.12.0)
Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.6/dist-packages (from astunparse->piglet-templates->piglet) (0.34.2)
Installing collected packages: Parsley, piglet-templates, piglet
Successfully installed Parsley-1.3 piglet-1.0.0 piglet-templates-1.0.0

# 필요한 모듈 설치
import tensorflow as tf
import gym
from IPython import display
import cv2
from pyvirtualdisplay import Display
from IPython import display
import matplotlib.pyplot as plt
from collections import deque
import numpy as np
import random
%matplotlib inline
Display().start()

xdpyinfo was not found, X start can not be checked! Please install xdpyinfo!

<Display cmd_param=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1024x768x24', ':1001'] cmd=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '1024x768x24', ':1001'] oserror=None return_code=None stdout="None" stderr="None" timeout_happened=False>

# 카트폴 게임 환경을 만듦
env = gym.make("CartPole-v1")

env.render('rgb_array')

# 2 - 액션 종류 슈 (아웃풋)
action_num=env.action_space.n
# 4 - 상태 종류 수 (인풋)
state_num=env.observation_space.shape[0]

# pg 모델 만들기 - REINFORCE 알고리즘(가장 기초)
pg_model=tf.keras.models.Sequential()
pg_model.add(tf.keras.layers.Dense(256,input_shape=(state_num,),activation='relu'))
pg_model.add(tf.keras.layers.Dense(action_num,activation='softmax'))
opt=tf.keras.optimizers.Adam(0.0005)

# 에피소드 수만큼 학습
episode_count=1000

# 점수를 기록할 리스트
scores = []
memory=[]

# 디스카운트 팩터 정의
discount_rate=0.99

for episode in range(episode_count):
    state = env.reset()
    # 차원을 맞추어 준다
    state = np.reshape(state, [1, state_num])
    done = False
    total_reward = 0
    while not done:
        p=pg_model.predict(state)[0]
        action=np.random.choice(range(action_num),p=p)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_num])
        i=(state,action,reward/100.,next_state,done)
        # 메모리에 작업 내용을 기록한다
        memory.append(i)
        # 다음상태를 현사태로 변경하여 계속 진행한다
        state = next_state
        total_reward += reward

    # 에피소드가 끝나면 PG 학습 시작
    G=0
    for s,a,r,n_s,d in memory[::-1]:
        G=r+discount_rate * G
        variable = pg_model.trainable_variables
        with tf.GradientTape() as tape:
            p=pg_model(s)[0][a]
            loss = -tf.math.log(p) * G
        grad=tape.gradient(loss,variable)
        opt.apply_gradients(zip(grad,variable))   
    
    memory=[]
    scores.append(total_reward)
    if(total_reward>450):
        pg_model.save('/gdrive/My Drive/hjk_pg_reinforce_model.h5')
    mean_score = np.mean(scores)
    
    print(episode+1,total_reward)

    if (episode+1) % 20 == 0:
        print("Episode %d: Mean survival = %0.2lf in %d episodes" %(episode+1, mean_score, 20))
        if mean_score >= 400:
            break
        scores = []

env.close()

1 11.0
2 11.0
3 16.0
4 27.0
5 29.0
6 52.0
7 19.0
8 35.0
9 11.0
10 13.0
11 16.0
12 36.0
13 14.0
14 11.0
15 15.0
16 29.0
17 27.0
18 51.0
19 27.0
20 24.0
Episode 20: Mean survival = 23.70 in 20 episodes
21 15.0
22 26.0
23 21.0
24 18.0
25 13.0
26 13.0
27 26.0
28 22.0
29 33.0
30 45.0
31 17.0
32 24.0
33 44.0
34 20.0
35 19.0
36 38.0
37 34.0
38 25.0
39 28.0
40 38.0
Episode 40: Mean survival = 25.95 in 20 episodes
41 27.0
42 15.0
43 18.0
44 40.0
45 26.0
46 19.0
47 18.0
48 19.0
49 31.0
50 34.0
51 17.0
52 36.0
53 18.0
54 16.0
55 17.0
56 59.0
57 86.0
58 43.0
59 49.0
60 38.0
Episode 60: Mean survival = 31.30 in 20 episodes
61 72.0
62 62.0
63 26.0
64 34.0
65 40.0
66 55.0
67 18.0
68 33.0
69 43.0
70 24.0
71 37.0
72 44.0
73 100.0
74 46.0
75 65.0
76 32.0
77 81.0
78 82.0
79 32.0
80 71.0
Episode 80: Mean survival = 49.85 in 20 episodes
81 34.0
82 80.0
83 88.0
84 119.0
85 41.0
86 70.0
87 43.0
88 97.0
89 42.0
90 42.0
91 88.0
92 46.0
93 92.0
94 114.0
95 151.0
96 169.0
97 110.0
98 179.0
99 291.0
100 189.0
Episode 100: Mean survival = 104.25 in 20 episodes
101 139.0
102 48.0
103 67.0
104 94.0
105 27.0
106 77.0
107 149.0
108 151.0
109 118.0
110 231.0
111 143.0
112 195.0
113 110.0
114 141.0
115 161.0
116 177.0
117 110.0
118 178.0
119 153.0
120 115.0
Episode 120: Mean survival = 129.20 in 20 episodes
121 115.0
122 161.0
123 97.0
124 106.0
125 104.0
126 48.0
127 84.0
128 170.0
129 133.0
130 129.0
131 211.0
132 215.0
133 163.0
134 203.0
135 166.0
136 125.0
137 112.0
138 115.0
139 155.0
140 118.0
Episode 140: Mean survival = 136.50 in 20 episodes
141 204.0
142 319.0
143 158.0
144 211.0
145 326.0
146 396.0
147 212.0
148 500.0
149 180.0
150 184.0
151 142.0
152 175.0
153 182.0
154 240.0
155 212.0
156 365.0
157 341.0
158 412.0
159 277.0
160 214.0
Episode 160: Mean survival = 262.50 in 20 episodes
161 270.0
162 263.0
163 214.0
164 43.0
165 165.0
166 120.0
167 192.0
168 133.0
169 176.0
170 236.0
171 393.0
172 208.0
173 500.0
174 175.0
175 442.0
176 492.0
177 500.0
178 463.0
179 274.0
180 500.0
Episode 180: Mean survival = 287.95 in 20 episodes
181 431.0
182 500.0
183 500.0

pg_model=tf.keras.models.load_model('/gdrive/My Drive/hjk_pg_reinforce_model.h5')

WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.

env = gym.make('CartPole-v1')
state=env.reset()
state = np.reshape(state, [1, state_num])
done=False
# img = plt.imshow(env.render('rgb_array')) # only call this once
total_reward=0
img_avi=np.zeros((400,600,3))
fcc=cv2.VideoWriter_fourcc(*'DIVX')
out=cv2.VideoWriter('/gdrive/My Drive/hjk_pg_reinforce.avi',fcc,10.0,(600,400))
while not done:
    # img.set_data(env.render('rgb_array')) # just update the data
    # display.display(plt.gcf())
    # display.clear_output(wait=True)
    img_avi=env.render('rgb_array')
    action = np.argmax(pg_model.predict(state)[0])
    # action = env.action_space.sample()
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1, state_num])
    state = next_state
    total_reward += reward
    out.write(np.uint8(img_avi))
print(total_reward)
out.release()
cv2.destroyAllWindows()

500.0