2717 lines
190 KiB
Plaintext
2717 lines
190 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import gymnasium as gym\n",
|
|
"from gymnasium.wrappers.jax_to_numpy import JaxToNumpy\n",
|
|
"from gymnasium.wrappers.vector import JaxToNumpy as VJaxToNumpy\n",
|
|
"from solarcarsim.simv1 import SolarRaceV1\n",
|
|
"from gymnasium.utils.env_checker import check_env as gym_check_env\n",
|
|
"from stable_baselines3 import TD3, PPO\n",
|
|
"from stable_baselines3.common.env_checker import check_env\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import jax.numpy as jnp\n",
|
|
"env = SolarRaceV1()\n",
|
|
"wrapped_env = JaxToNumpy(env)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/gymnasium/utils/env_checker.py:384: UserWarning: \u001b[33mWARN: The environment (<JaxToNumpy<SolarRaceV1 instance>>) is different from the unwrapped version (<SolarRaceV1 instance>). This could effect the environment checker as the environment most likely has a wrapper applied to it. We recommend using the raw environment for `check_env` using `env.unwrapped`.\u001b[0m\n",
|
|
" logger.warn(\n",
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/gymnasium/utils/env_checker.py:434: UserWarning: \u001b[33mWARN: Not able to test alternative render modes due to the environment not having a spec. Try instantiating the environment through `gymnasium.make`\u001b[0m\n",
|
|
" logger.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"gym_check_env(wrapped_env)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using cuda device\n",
|
|
"Wrapping the env with a `Monitor` wrapper\n",
|
|
"Wrapping the env in a DummyVecEnv.\n",
|
|
"---------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -293 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 181 |\n",
|
|
"| iterations | 1 |\n",
|
|
"| time_elapsed | 11 |\n",
|
|
"| total_timesteps | 2048 |\n",
|
|
"---------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -273 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 174 |\n",
|
|
"| iterations | 2 |\n",
|
|
"| time_elapsed | 23 |\n",
|
|
"| total_timesteps | 4096 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0054363105 |\n",
|
|
"| clip_fraction | 0.036 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -0.000109 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.72e+03 |\n",
|
|
"| n_updates | 10 |\n",
|
|
"| policy_gradient_loss | 0.00132 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 3.03e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -269 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 174 |\n",
|
|
"| iterations | 3 |\n",
|
|
"| time_elapsed | 35 |\n",
|
|
"| total_timesteps | 6144 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.011383371 |\n",
|
|
"| clip_fraction | 0.119 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 8.55e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.63e+03 |\n",
|
|
"| n_updates | 20 |\n",
|
|
"| policy_gradient_loss | -0.00406 |\n",
|
|
"| std | 0.998 |\n",
|
|
"| value_loss | 3.05e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -267 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 173 |\n",
|
|
"| iterations | 4 |\n",
|
|
"| time_elapsed | 47 |\n",
|
|
"| total_timesteps | 8192 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0013208076 |\n",
|
|
"| clip_fraction | 0.002 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 0.000122 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 931 |\n",
|
|
"| n_updates | 30 |\n",
|
|
"| policy_gradient_loss | 8.3e-05 |\n",
|
|
"| std | 0.99 |\n",
|
|
"| value_loss | 4.03e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -272 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 172 |\n",
|
|
"| iterations | 5 |\n",
|
|
"| time_elapsed | 59 |\n",
|
|
"| total_timesteps | 10240 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.012045372 |\n",
|
|
"| clip_fraction | 0.0221 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 5.51e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.17e+03 |\n",
|
|
"| n_updates | 40 |\n",
|
|
"| policy_gradient_loss | 0.00043 |\n",
|
|
"| std | 0.973 |\n",
|
|
"| value_loss | 3.06e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -272 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 172 |\n",
|
|
"| iterations | 6 |\n",
|
|
"| time_elapsed | 71 |\n",
|
|
"| total_timesteps | 12288 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0049332893 |\n",
|
|
"| clip_fraction | 0.0111 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 0.000114 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 7.39e+03 |\n",
|
|
"| n_updates | 50 |\n",
|
|
"| policy_gradient_loss | -0.00083 |\n",
|
|
"| std | 0.973 |\n",
|
|
"| value_loss | 4.05e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -275 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 171 |\n",
|
|
"| iterations | 7 |\n",
|
|
"| time_elapsed | 83 |\n",
|
|
"| total_timesteps | 14336 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0038162381 |\n",
|
|
"| clip_fraction | 0.0192 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 7.37e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.5e+03 |\n",
|
|
"| n_updates | 60 |\n",
|
|
"| policy_gradient_loss | -0.000316 |\n",
|
|
"| std | 0.971 |\n",
|
|
"| value_loss | 3.06e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -274 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 8 |\n",
|
|
"| time_elapsed | 96 |\n",
|
|
"| total_timesteps | 16384 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0039417995 |\n",
|
|
"| clip_fraction | 0.0062 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 7.75e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.76e+03 |\n",
|
|
"| n_updates | 70 |\n",
|
|
"| policy_gradient_loss | -0.000468 |\n",
|
|
"| std | 0.973 |\n",
|
|
"| value_loss | 3.08e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -275 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 9 |\n",
|
|
"| time_elapsed | 108 |\n",
|
|
"| total_timesteps | 18432 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0017004285 |\n",
|
|
"| clip_fraction | 0.0129 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 0.000155 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 756 |\n",
|
|
"| n_updates | 80 |\n",
|
|
"| policy_gradient_loss | -0.000469 |\n",
|
|
"| std | 0.98 |\n",
|
|
"| value_loss | 4.04e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -277 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 10 |\n",
|
|
"| time_elapsed | 121 |\n",
|
|
"| total_timesteps | 20480 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0034604114 |\n",
|
|
"| clip_fraction | 0.0167 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 0.000104 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.09e+03 |\n",
|
|
"| n_updates | 90 |\n",
|
|
"| policy_gradient_loss | -0.00122 |\n",
|
|
"| std | 0.995 |\n",
|
|
"| value_loss | 3.07e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -276 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 11 |\n",
|
|
"| time_elapsed | 133 |\n",
|
|
"| total_timesteps | 22528 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005835003 |\n",
|
|
"| clip_fraction | 0.0289 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 0.000224 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.44e+03 |\n",
|
|
"| n_updates | 100 |\n",
|
|
"| policy_gradient_loss | -0.00135 |\n",
|
|
"| std | 0.985 |\n",
|
|
"| value_loss | 4.06e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -276 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 12 |\n",
|
|
"| time_elapsed | 145 |\n",
|
|
"| total_timesteps | 24576 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00068298285 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 0.000121 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.33e+03 |\n",
|
|
"| n_updates | 110 |\n",
|
|
"| policy_gradient_loss | 7.82e-05 |\n",
|
|
"| std | 0.982 |\n",
|
|
"| value_loss | 3.08e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -274 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 13 |\n",
|
|
"| time_elapsed | 158 |\n",
|
|
"| total_timesteps | 26624 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0048444057 |\n",
|
|
"| clip_fraction | 0.00918 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 0.000109 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.07e+03 |\n",
|
|
"| n_updates | 120 |\n",
|
|
"| policy_gradient_loss | 0.000241 |\n",
|
|
"| std | 0.973 |\n",
|
|
"| value_loss | 3.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -272 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 14 |\n",
|
|
"| time_elapsed | 170 |\n",
|
|
"| total_timesteps | 28672 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0024140258 |\n",
|
|
"| clip_fraction | 0.0194 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 0.000147 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.1e+03 |\n",
|
|
"| n_updates | 130 |\n",
|
|
"| policy_gradient_loss | -0.000116 |\n",
|
|
"| std | 0.974 |\n",
|
|
"| value_loss | 4.07e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -271 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 15 |\n",
|
|
"| time_elapsed | 182 |\n",
|
|
"| total_timesteps | 30720 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0012023712 |\n",
|
|
"| clip_fraction | 0.0306 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 1.91e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.61e+03 |\n",
|
|
"| n_updates | 140 |\n",
|
|
"| policy_gradient_loss | 0.000104 |\n",
|
|
"| std | 0.964 |\n",
|
|
"| value_loss | 3.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -270 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 16 |\n",
|
|
"| time_elapsed | 195 |\n",
|
|
"| total_timesteps | 32768 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005513249 |\n",
|
|
"| clip_fraction | 0.0216 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 6.94e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 647 |\n",
|
|
"| n_updates | 150 |\n",
|
|
"| policy_gradient_loss | -0.00111 |\n",
|
|
"| std | 0.948 |\n",
|
|
"| value_loss | 4.06e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -272 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 17 |\n",
|
|
"| time_elapsed | 207 |\n",
|
|
"| total_timesteps | 34816 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.008722976 |\n",
|
|
"| clip_fraction | 0.0229 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 4.42e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.42e+03 |\n",
|
|
"| n_updates | 160 |\n",
|
|
"| policy_gradient_loss | -0.000433 |\n",
|
|
"| std | 0.945 |\n",
|
|
"| value_loss | 3.08e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -276 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 18 |\n",
|
|
"| time_elapsed | 219 |\n",
|
|
"| total_timesteps | 36864 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0060544205 |\n",
|
|
"| clip_fraction | 0.0893 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 2.23e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 593 |\n",
|
|
"| n_updates | 170 |\n",
|
|
"| policy_gradient_loss | -0.00357 |\n",
|
|
"| std | 0.952 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -277 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 19 |\n",
|
|
"| time_elapsed | 231 |\n",
|
|
"| total_timesteps | 38912 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.006287749 |\n",
|
|
"| clip_fraction | 0.00957 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 9.32e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.89e+03 |\n",
|
|
"| n_updates | 180 |\n",
|
|
"| policy_gradient_loss | -0.000475 |\n",
|
|
"| std | 0.952 |\n",
|
|
"| value_loss | 4.06e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -278 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 20 |\n",
|
|
"| time_elapsed | 243 |\n",
|
|
"| total_timesteps | 40960 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0006410396 |\n",
|
|
"| clip_fraction | 0.00317 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 1.19e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 609 |\n",
|
|
"| n_updates | 190 |\n",
|
|
"| policy_gradient_loss | 0.000116 |\n",
|
|
"| std | 0.963 |\n",
|
|
"| value_loss | 3.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -279 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 21 |\n",
|
|
"| time_elapsed | 256 |\n",
|
|
"| total_timesteps | 43008 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00017864068 |\n",
|
|
"| clip_fraction | 0.0233 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 1.61e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.26e+03 |\n",
|
|
"| n_updates | 200 |\n",
|
|
"| policy_gradient_loss | 1.53e-05 |\n",
|
|
"| std | 0.961 |\n",
|
|
"| value_loss | 4.07e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -280 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 22 |\n",
|
|
"| time_elapsed | 268 |\n",
|
|
"| total_timesteps | 45056 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0052862475 |\n",
|
|
"| clip_fraction | 0.0678 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 4.77e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 154 |\n",
|
|
"| n_updates | 210 |\n",
|
|
"| policy_gradient_loss | 0.000592 |\n",
|
|
"| std | 0.962 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -280 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 23 |\n",
|
|
"| time_elapsed | 280 |\n",
|
|
"| total_timesteps | 47104 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0017830351 |\n",
|
|
"| clip_fraction | 0.0224 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 5.36e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 719 |\n",
|
|
"| n_updates | 220 |\n",
|
|
"| policy_gradient_loss | -0.000802 |\n",
|
|
"| std | 0.961 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -281 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 24 |\n",
|
|
"| time_elapsed | 292 |\n",
|
|
"| total_timesteps | 49152 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00015185933 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 1.01e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.93e+03 |\n",
|
|
"| n_updates | 230 |\n",
|
|
"| policy_gradient_loss | -3.24e-05 |\n",
|
|
"| std | 0.951 |\n",
|
|
"| value_loss | 4.01e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -282 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 25 |\n",
|
|
"| time_elapsed | 305 |\n",
|
|
"| total_timesteps | 51200 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00283485 |\n",
|
|
"| clip_fraction | 0.0281 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 4.17e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.26e+03 |\n",
|
|
"| n_updates | 240 |\n",
|
|
"| policy_gradient_loss | -0.00137 |\n",
|
|
"| std | 0.954 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -282 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 26 |\n",
|
|
"| time_elapsed | 317 |\n",
|
|
"| total_timesteps | 53248 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0021635226 |\n",
|
|
"| clip_fraction | 0.0132 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 8.34e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 321 |\n",
|
|
"| n_updates | 250 |\n",
|
|
"| policy_gradient_loss | -0.000334 |\n",
|
|
"| std | 0.955 |\n",
|
|
"| value_loss | 4.07e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -283 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 27 |\n",
|
|
"| time_elapsed | 330 |\n",
|
|
"| total_timesteps | 55296 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.012732552 |\n",
|
|
"| clip_fraction | 0.0278 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 2.98e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 250 |\n",
|
|
"| n_updates | 260 |\n",
|
|
"| policy_gradient_loss | -0.00149 |\n",
|
|
"| std | 0.963 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -284 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 28 |\n",
|
|
"| time_elapsed | 342 |\n",
|
|
"| total_timesteps | 57344 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0035805362 |\n",
|
|
"| clip_fraction | 0.0155 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 8.34e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.12e+03 |\n",
|
|
"| n_updates | 270 |\n",
|
|
"| policy_gradient_loss | -0.000792 |\n",
|
|
"| std | 0.966 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 29 |\n",
|
|
"| time_elapsed | 354 |\n",
|
|
"| total_timesteps | 59392 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0018168361 |\n",
|
|
"| clip_fraction | 0.000488 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 4.77e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.16e+03 |\n",
|
|
"| n_updates | 280 |\n",
|
|
"| policy_gradient_loss | 4.89e-05 |\n",
|
|
"| std | 0.971 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 30 |\n",
|
|
"| time_elapsed | 366 |\n",
|
|
"| total_timesteps | 61440 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00029722328 |\n",
|
|
"| clip_fraction | 0.000635 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 1.79e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.02e+03 |\n",
|
|
"| n_updates | 290 |\n",
|
|
"| policy_gradient_loss | -0.00093 |\n",
|
|
"| std | 0.957 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 31 |\n",
|
|
"| time_elapsed | 378 |\n",
|
|
"| total_timesteps | 63488 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0036160094 |\n",
|
|
"| clip_fraction | 0.00591 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.37 |\n",
|
|
"| explained_variance | 7.15e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.69e+03 |\n",
|
|
"| n_updates | 300 |\n",
|
|
"| policy_gradient_loss | -0.000226 |\n",
|
|
"| std | 0.953 |\n",
|
|
"| value_loss | 4.08e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -286 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 32 |\n",
|
|
"| time_elapsed | 390 |\n",
|
|
"| total_timesteps | 65536 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00017739431 |\n",
|
|
"| clip_fraction | 0.0329 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 1.79e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.15e+03 |\n",
|
|
"| n_updates | 310 |\n",
|
|
"| policy_gradient_loss | 0.000172 |\n",
|
|
"| std | 0.965 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -287 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 33 |\n",
|
|
"| time_elapsed | 402 |\n",
|
|
"| total_timesteps | 67584 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.004563484 |\n",
|
|
"| clip_fraction | 0.0295 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 9.54e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.55e+03 |\n",
|
|
"| n_updates | 320 |\n",
|
|
"| policy_gradient_loss | -0.00134 |\n",
|
|
"| std | 0.972 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -287 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 167 |\n",
|
|
"| iterations | 34 |\n",
|
|
"| time_elapsed | 414 |\n",
|
|
"| total_timesteps | 69632 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0049857013 |\n",
|
|
"| clip_fraction | 0.018 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 7.15e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.61e+03 |\n",
|
|
"| n_updates | 330 |\n",
|
|
"| policy_gradient_loss | -0.0015 |\n",
|
|
"| std | 0.966 |\n",
|
|
"| value_loss | 3.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -286 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 35 |\n",
|
|
"| time_elapsed | 426 |\n",
|
|
"| total_timesteps | 71680 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0012287534 |\n",
|
|
"| clip_fraction | 0.000879 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 3.46e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.31e+03 |\n",
|
|
"| n_updates | 340 |\n",
|
|
"| policy_gradient_loss | -0.00019 |\n",
|
|
"| std | 0.969 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -287 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 36 |\n",
|
|
"| time_elapsed | 438 |\n",
|
|
"| total_timesteps | 73728 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 3.8835948e-05 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.38 |\n",
|
|
"| explained_variance | 4.41e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.76e+03 |\n",
|
|
"| n_updates | 350 |\n",
|
|
"| policy_gradient_loss | 0.000106 |\n",
|
|
"| std | 0.967 |\n",
|
|
"| value_loss | 4.07e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 37 |\n",
|
|
"| time_elapsed | 450 |\n",
|
|
"| total_timesteps | 75776 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.004455052 |\n",
|
|
"| clip_fraction | 0.0113 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.39 |\n",
|
|
"| explained_variance | 5.01e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.84e+03 |\n",
|
|
"| n_updates | 360 |\n",
|
|
"| policy_gradient_loss | -0.00126 |\n",
|
|
"| std | 0.976 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 38 |\n",
|
|
"| time_elapsed | 462 |\n",
|
|
"| total_timesteps | 77824 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.004241547 |\n",
|
|
"| clip_fraction | 0.0108 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 7.57e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 565 |\n",
|
|
"| n_updates | 370 |\n",
|
|
"| policy_gradient_loss | -0.000582 |\n",
|
|
"| std | 0.98 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 39 |\n",
|
|
"| time_elapsed | 474 |\n",
|
|
"| total_timesteps | 79872 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0017373057 |\n",
|
|
"| clip_fraction | 0.00103 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 5.19e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.68e+03 |\n",
|
|
"| n_updates | 380 |\n",
|
|
"| policy_gradient_loss | 7.21e-05 |\n",
|
|
"| std | 0.981 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -285 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 40 |\n",
|
|
"| time_elapsed | 486 |\n",
|
|
"| total_timesteps | 81920 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00016679132 |\n",
|
|
"| clip_fraction | 0.0324 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 6.26e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.55e+03 |\n",
|
|
"| n_updates | 390 |\n",
|
|
"| policy_gradient_loss | 4.64e-06 |\n",
|
|
"| std | 0.991 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -286 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 41 |\n",
|
|
"| time_elapsed | 497 |\n",
|
|
"| total_timesteps | 83968 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 4.9029622e-05 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 1.26e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.1e+03 |\n",
|
|
"| n_updates | 400 |\n",
|
|
"| policy_gradient_loss | -0.000107 |\n",
|
|
"| std | 0.987 |\n",
|
|
"| value_loss | 4.08e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -288 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 42 |\n",
|
|
"| time_elapsed | 509 |\n",
|
|
"| total_timesteps | 86016 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.008285521 |\n",
|
|
"| clip_fraction | 0.0146 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 4.95e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 5.63e+03 |\n",
|
|
"| n_updates | 410 |\n",
|
|
"| policy_gradient_loss | -0.000514 |\n",
|
|
"| std | 0.983 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -290 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 43 |\n",
|
|
"| time_elapsed | 521 |\n",
|
|
"| total_timesteps | 88064 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0044103963 |\n",
|
|
"| clip_fraction | 0.0221 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 1.2e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.19e+03 |\n",
|
|
"| n_updates | 420 |\n",
|
|
"| policy_gradient_loss | 0.000946 |\n",
|
|
"| std | 0.989 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -292 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 44 |\n",
|
|
"| time_elapsed | 533 |\n",
|
|
"| total_timesteps | 90112 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005043611 |\n",
|
|
"| clip_fraction | 0.0923 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 4.23e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 983 |\n",
|
|
"| n_updates | 430 |\n",
|
|
"| policy_gradient_loss | -0.00239 |\n",
|
|
"| std | 0.989 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -295 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 45 |\n",
|
|
"| time_elapsed | 546 |\n",
|
|
"| total_timesteps | 92160 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0047482466 |\n",
|
|
"| clip_fraction | 0.0449 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 4.65e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.78e+03 |\n",
|
|
"| n_updates | 440 |\n",
|
|
"| policy_gradient_loss | -0.000126 |\n",
|
|
"| std | 0.98 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -296 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 46 |\n",
|
|
"| time_elapsed | 558 |\n",
|
|
"| total_timesteps | 94208 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00606206 |\n",
|
|
"| clip_fraction | 0.0219 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.4 |\n",
|
|
"| explained_variance | 9.48e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.98e+03 |\n",
|
|
"| n_updates | 450 |\n",
|
|
"| policy_gradient_loss | -0.000753 |\n",
|
|
"| std | 0.985 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -296 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 47 |\n",
|
|
"| time_elapsed | 570 |\n",
|
|
"| total_timesteps | 96256 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0008331981 |\n",
|
|
"| clip_fraction | 0.013 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 3.7e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.69e+03 |\n",
|
|
"| n_updates | 460 |\n",
|
|
"| policy_gradient_loss | 2.58e-05 |\n",
|
|
"| std | 0.997 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -295 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 48 |\n",
|
|
"| time_elapsed | 582 |\n",
|
|
"| total_timesteps | 98304 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0005460837 |\n",
|
|
"| clip_fraction | 0.00146 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 8.94e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 578 |\n",
|
|
"| n_updates | 470 |\n",
|
|
"| policy_gradient_loss | 0.00032 |\n",
|
|
"| std | 0.998 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -295 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 49 |\n",
|
|
"| time_elapsed | 595 |\n",
|
|
"| total_timesteps | 100352 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0009762709 |\n",
|
|
"| clip_fraction | 0.0345 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.41 |\n",
|
|
"| explained_variance | 2.98e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.52e+03 |\n",
|
|
"| n_updates | 480 |\n",
|
|
"| policy_gradient_loss | -0.00181 |\n",
|
|
"| std | 0.997 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -296 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 50 |\n",
|
|
"| time_elapsed | 607 |\n",
|
|
"| total_timesteps | 102400 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0003773085 |\n",
|
|
"| clip_fraction | 0.00215 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 4.05e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.89e+03 |\n",
|
|
"| n_updates | 490 |\n",
|
|
"| policy_gradient_loss | 0.000501 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -297 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 51 |\n",
|
|
"| time_elapsed | 619 |\n",
|
|
"| total_timesteps | 104448 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.007983657 |\n",
|
|
"| clip_fraction | 0.0524 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 1.47e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.23e+03 |\n",
|
|
"| n_updates | 500 |\n",
|
|
"| policy_gradient_loss | -0.000208 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.96e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -298 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 52 |\n",
|
|
"| time_elapsed | 631 |\n",
|
|
"| total_timesteps | 106496 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0004374912 |\n",
|
|
"| clip_fraction | 0.0302 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 9.12e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.55e+03 |\n",
|
|
"| n_updates | 510 |\n",
|
|
"| policy_gradient_loss | -3.42e-05 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -300 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 53 |\n",
|
|
"| time_elapsed | 643 |\n",
|
|
"| total_timesteps | 108544 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005380518 |\n",
|
|
"| clip_fraction | 0.0136 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 1.67e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.03e+03 |\n",
|
|
"| n_updates | 520 |\n",
|
|
"| policy_gradient_loss | 0.000128 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 4.09e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -303 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 54 |\n",
|
|
"| time_elapsed | 655 |\n",
|
|
"| total_timesteps | 110592 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00812779 |\n",
|
|
"| clip_fraction | 0.0241 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 4.89e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 563 |\n",
|
|
"| n_updates | 530 |\n",
|
|
"| policy_gradient_loss | -0.00053 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -305 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 55 |\n",
|
|
"| time_elapsed | 668 |\n",
|
|
"| total_timesteps | 112640 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0014875259 |\n",
|
|
"| clip_fraction | 0.00767 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 8.34e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.67e+03 |\n",
|
|
"| n_updates | 540 |\n",
|
|
"| policy_gradient_loss | -0.000391 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 4.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -308 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 56 |\n",
|
|
"| time_elapsed | 680 |\n",
|
|
"| total_timesteps | 114688 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00965928 |\n",
|
|
"| clip_fraction | 0.092 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 3.28e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.47e+03 |\n",
|
|
"| n_updates | 550 |\n",
|
|
"| policy_gradient_loss | -0.00266 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.11e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -311 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 57 |\n",
|
|
"| time_elapsed | 692 |\n",
|
|
"| total_timesteps | 116736 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0022691623 |\n",
|
|
"| clip_fraction | 0.0141 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 3.4e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 936 |\n",
|
|
"| n_updates | 560 |\n",
|
|
"| policy_gradient_loss | -0.000543 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -313 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 58 |\n",
|
|
"| time_elapsed | 704 |\n",
|
|
"| total_timesteps | 118784 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0005463155 |\n",
|
|
"| clip_fraction | 0.00444 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 5.13e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.02e+03 |\n",
|
|
"| n_updates | 570 |\n",
|
|
"| policy_gradient_loss | -0.000174 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.1e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -317 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 59 |\n",
|
|
"| time_elapsed | 716 |\n",
|
|
"| total_timesteps | 120832 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0037943618 |\n",
|
|
"| clip_fraction | 0.0239 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.09e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 372 |\n",
|
|
"| n_updates | 580 |\n",
|
|
"| policy_gradient_loss | -2.05e-05 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -321 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 60 |\n",
|
|
"| time_elapsed | 728 |\n",
|
|
"| total_timesteps | 122880 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0015846763 |\n",
|
|
"| clip_fraction | 0.0468 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 3.22e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.53e+03 |\n",
|
|
"| n_updates | 590 |\n",
|
|
"| policy_gradient_loss | -0.000768 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 4.11e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -325 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 61 |\n",
|
|
"| time_elapsed | 740 |\n",
|
|
"| total_timesteps | 124928 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0014858413 |\n",
|
|
"| clip_fraction | 0.0124 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 1.67e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.44e+03 |\n",
|
|
"| n_updates | 600 |\n",
|
|
"| policy_gradient_loss | 0.000545 |\n",
|
|
"| std | 1.04 |\n",
|
|
"| value_loss | 3.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -331 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 62 |\n",
|
|
"| time_elapsed | 752 |\n",
|
|
"| total_timesteps | 126976 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0038123443 |\n",
|
|
"| clip_fraction | 0.0339 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 1.97e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.05e+03 |\n",
|
|
"| n_updates | 610 |\n",
|
|
"| policy_gradient_loss | -0.00259 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.13e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -335 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 63 |\n",
|
|
"| time_elapsed | 764 |\n",
|
|
"| total_timesteps | 129024 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.003941954 |\n",
|
|
"| clip_fraction | 0.00273 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 3.7e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.89e+03 |\n",
|
|
"| n_updates | 620 |\n",
|
|
"| policy_gradient_loss | 0.0002 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.11e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -341 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 64 |\n",
|
|
"| time_elapsed | 776 |\n",
|
|
"| total_timesteps | 131072 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.007216826 |\n",
|
|
"| clip_fraction | 0.0402 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.61e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.17e+03 |\n",
|
|
"| n_updates | 630 |\n",
|
|
"| policy_gradient_loss | -0.000444 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.13e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -345 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 65 |\n",
|
|
"| time_elapsed | 788 |\n",
|
|
"| total_timesteps | 133120 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.001702552 |\n",
|
|
"| clip_fraction | 0.0259 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.15e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.62e+03 |\n",
|
|
"| n_updates | 640 |\n",
|
|
"| policy_gradient_loss | 3.53e-05 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.12e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -350 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 66 |\n",
|
|
"| time_elapsed | 800 |\n",
|
|
"| total_timesteps | 135168 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.01116517 |\n",
|
|
"| clip_fraction | 0.128 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.19e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 370 |\n",
|
|
"| n_updates | 650 |\n",
|
|
"| policy_gradient_loss | -0.000384 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.13e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -356 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 168 |\n",
|
|
"| iterations | 67 |\n",
|
|
"| time_elapsed | 812 |\n",
|
|
"| total_timesteps | 137216 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00059924903 |\n",
|
|
"| clip_fraction | 0.0156 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.8e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.21e+03 |\n",
|
|
"| n_updates | 660 |\n",
|
|
"| policy_gradient_loss | 0.000519 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -360 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 68 |\n",
|
|
"| time_elapsed | 823 |\n",
|
|
"| total_timesteps | 139264 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.008353274 |\n",
|
|
"| clip_fraction | 0.0397 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 8.05e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 6.21e+03 |\n",
|
|
"| n_updates | 670 |\n",
|
|
"| policy_gradient_loss | -0.00075 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 4.12e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -365 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 69 |\n",
|
|
"| time_elapsed | 835 |\n",
|
|
"| total_timesteps | 141312 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0058903834 |\n",
|
|
"| clip_fraction | 0.0345 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 3.28e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.21e+03 |\n",
|
|
"| n_updates | 680 |\n",
|
|
"| policy_gradient_loss | -0.000968 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -370 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 70 |\n",
|
|
"| time_elapsed | 847 |\n",
|
|
"| total_timesteps | 143360 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00016515396 |\n",
|
|
"| clip_fraction | 0.0125 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.1e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.36e+03 |\n",
|
|
"| n_updates | 690 |\n",
|
|
"| policy_gradient_loss | 7.53e-05 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.12e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -373 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 71 |\n",
|
|
"| time_elapsed | 859 |\n",
|
|
"| total_timesteps | 145408 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0013749554 |\n",
|
|
"| clip_fraction | 0.0172 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 3.99e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.63e+03 |\n",
|
|
"| n_updates | 700 |\n",
|
|
"| policy_gradient_loss | -0.00127 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -377 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 72 |\n",
|
|
"| time_elapsed | 870 |\n",
|
|
"| total_timesteps | 147456 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0012910418 |\n",
|
|
"| clip_fraction | 0.0167 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.08e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 882 |\n",
|
|
"| n_updates | 710 |\n",
|
|
"| policy_gradient_loss | -4.58e-05 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -380 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 73 |\n",
|
|
"| time_elapsed | 882 |\n",
|
|
"| total_timesteps | 149504 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0010234144 |\n",
|
|
"| clip_fraction | 0.000293 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.32e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 109 |\n",
|
|
"| n_updates | 720 |\n",
|
|
"| policy_gradient_loss | 0.000243 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -384 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 74 |\n",
|
|
"| time_elapsed | 894 |\n",
|
|
"| total_timesteps | 151552 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.000599641 |\n",
|
|
"| clip_fraction | 0.0156 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 8.64e-06 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.79e+03 |\n",
|
|
"| n_updates | 730 |\n",
|
|
"| policy_gradient_loss | 0.000339 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -387 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 75 |\n",
|
|
"| time_elapsed | 906 |\n",
|
|
"| total_timesteps | 153600 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0004998102 |\n",
|
|
"| clip_fraction | 0.0404 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 3.27e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 980 |\n",
|
|
"| n_updates | 740 |\n",
|
|
"| policy_gradient_loss | 0.00127 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 4.12e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -390 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 76 |\n",
|
|
"| time_elapsed | 918 |\n",
|
|
"| total_timesteps | 155648 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0055045467 |\n",
|
|
"| clip_fraction | 0.0171 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 1.28e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.81e+03 |\n",
|
|
"| n_updates | 750 |\n",
|
|
"| policy_gradient_loss | 0.000433 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -395 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 77 |\n",
|
|
"| time_elapsed | 930 |\n",
|
|
"| total_timesteps | 157696 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00067343883 |\n",
|
|
"| clip_fraction | 0.0165 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 1.79e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 888 |\n",
|
|
"| n_updates | 760 |\n",
|
|
"| policy_gradient_loss | 0.000467 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -400 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 78 |\n",
|
|
"| time_elapsed | 942 |\n",
|
|
"| total_timesteps | 159744 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005202517 |\n",
|
|
"| clip_fraction | 0.104 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 2.89e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 662 |\n",
|
|
"| n_updates | 770 |\n",
|
|
"| policy_gradient_loss | -0.00199 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.9e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -406 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 79 |\n",
|
|
"| time_elapsed | 954 |\n",
|
|
"| total_timesteps | 161792 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0017581655 |\n",
|
|
"| clip_fraction | 0.00181 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.59e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.07e+03 |\n",
|
|
"| n_updates | 780 |\n",
|
|
"| policy_gradient_loss | 0.000173 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -410 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 80 |\n",
|
|
"| time_elapsed | 966 |\n",
|
|
"| total_timesteps | 163840 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00069459836 |\n",
|
|
"| clip_fraction | 0.0629 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 2.23e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.34e+03 |\n",
|
|
"| n_updates | 790 |\n",
|
|
"| policy_gradient_loss | -2.99e-05 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -414 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 81 |\n",
|
|
"| time_elapsed | 978 |\n",
|
|
"| total_timesteps | 165888 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0035149038 |\n",
|
|
"| clip_fraction | 0.036 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 1.04e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.69e+03 |\n",
|
|
"| n_updates | 800 |\n",
|
|
"| policy_gradient_loss | 0.000948 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -418 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 82 |\n",
|
|
"| time_elapsed | 990 |\n",
|
|
"| total_timesteps | 167936 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0068787774 |\n",
|
|
"| clip_fraction | 0.0504 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.04e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+03 |\n",
|
|
"| n_updates | 810 |\n",
|
|
"| policy_gradient_loss | -0.000511 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -421 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 83 |\n",
|
|
"| time_elapsed | 1001 |\n",
|
|
"| total_timesteps | 169984 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0018102819 |\n",
|
|
"| clip_fraction | 0.00742 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.06e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 5.99e+03 |\n",
|
|
"| n_updates | 820 |\n",
|
|
"| policy_gradient_loss | 0.000792 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -424 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 84 |\n",
|
|
"| time_elapsed | 1013 |\n",
|
|
"| total_timesteps | 172032 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0044906293 |\n",
|
|
"| clip_fraction | 0.00903 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 2.19e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 536 |\n",
|
|
"| n_updates | 830 |\n",
|
|
"| policy_gradient_loss | 8.28e-05 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -427 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 85 |\n",
|
|
"| time_elapsed | 1025 |\n",
|
|
"| total_timesteps | 174080 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0013765441 |\n",
|
|
"| clip_fraction | 0.000635 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 3.98e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.56e+03 |\n",
|
|
"| n_updates | 840 |\n",
|
|
"| policy_gradient_loss | -1.69e-05 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -430 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 86 |\n",
|
|
"| time_elapsed | 1037 |\n",
|
|
"| total_timesteps | 176128 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00035555626 |\n",
|
|
"| clip_fraction | 0.00757 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.58e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.53e+03 |\n",
|
|
"| n_updates | 850 |\n",
|
|
"| policy_gradient_loss | -0.000221 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -432 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 87 |\n",
|
|
"| time_elapsed | 1049 |\n",
|
|
"| total_timesteps | 178176 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0026123272 |\n",
|
|
"| clip_fraction | 0.0108 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 3.12e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.08e+03 |\n",
|
|
"| n_updates | 860 |\n",
|
|
"| policy_gradient_loss | 0.000388 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -434 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 88 |\n",
|
|
"| time_elapsed | 1061 |\n",
|
|
"| total_timesteps | 180224 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00018668428 |\n",
|
|
"| clip_fraction | 0.0435 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 1.5e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.38e+03 |\n",
|
|
"| n_updates | 870 |\n",
|
|
"| policy_gradient_loss | 0.000264 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -436 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 89 |\n",
|
|
"| time_elapsed | 1072 |\n",
|
|
"| total_timesteps | 182272 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0069202585 |\n",
|
|
"| clip_fraction | 0.0126 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 2.89e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 732 |\n",
|
|
"| n_updates | 880 |\n",
|
|
"| policy_gradient_loss | -0.000634 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -437 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 90 |\n",
|
|
"| time_elapsed | 1084 |\n",
|
|
"| total_timesteps | 184320 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0013296772 |\n",
|
|
"| clip_fraction | 0.0431 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.43 |\n",
|
|
"| explained_variance | 4.08e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.81e+03 |\n",
|
|
"| n_updates | 890 |\n",
|
|
"| policy_gradient_loss | 0.00076 |\n",
|
|
"| std | 1.01 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -439 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 169 |\n",
|
|
"| iterations | 91 |\n",
|
|
"| time_elapsed | 1096 |\n",
|
|
"| total_timesteps | 186368 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.005249043 |\n",
|
|
"| clip_fraction | 0.0232 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 1.92e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.29e+03 |\n",
|
|
"| n_updates | 900 |\n",
|
|
"| policy_gradient_loss | -0.000256 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -439 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 92 |\n",
|
|
"| time_elapsed | 1108 |\n",
|
|
"| total_timesteps | 188416 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 3.8214377e-05 |\n",
|
|
"| clip_fraction | 0.0542 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 4.1e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.05e+03 |\n",
|
|
"| n_updates | 910 |\n",
|
|
"| policy_gradient_loss | -9.36e-05 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -440 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 93 |\n",
|
|
"| time_elapsed | 1120 |\n",
|
|
"| total_timesteps | 190464 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0019262378 |\n",
|
|
"| clip_fraction | 0.0352 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.12e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.11e+03 |\n",
|
|
"| n_updates | 920 |\n",
|
|
"| policy_gradient_loss | -0.000944 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -441 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 94 |\n",
|
|
"| time_elapsed | 1132 |\n",
|
|
"| total_timesteps | 192512 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00047276722 |\n",
|
|
"| clip_fraction | 0.036 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 2.87e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 163 |\n",
|
|
"| n_updates | 930 |\n",
|
|
"| policy_gradient_loss | 0.00054 |\n",
|
|
"| std | 1.02 |\n",
|
|
"| value_loss | 3.15e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -442 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 95 |\n",
|
|
"| time_elapsed | 1144 |\n",
|
|
"| total_timesteps | 194560 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00018265905 |\n",
|
|
"| clip_fraction | 0.00132 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.44 |\n",
|
|
"| explained_variance | 4.35e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.17e+03 |\n",
|
|
"| n_updates | 940 |\n",
|
|
"| policy_gradient_loss | 0.000267 |\n",
|
|
"| std | 1.03 |\n",
|
|
"| value_loss | 4.15e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -444 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 96 |\n",
|
|
"| time_elapsed | 1156 |\n",
|
|
"| total_timesteps | 196608 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.014808972 |\n",
|
|
"| clip_fraction | 0.0734 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.45 |\n",
|
|
"| explained_variance | 2.44e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.85e+03 |\n",
|
|
"| n_updates | 950 |\n",
|
|
"| policy_gradient_loss | -0.000946 |\n",
|
|
"| std | 1.04 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -446 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 97 |\n",
|
|
"| time_elapsed | 1167 |\n",
|
|
"| total_timesteps | 198656 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.000299958 |\n",
|
|
"| clip_fraction | 0.0442 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.46 |\n",
|
|
"| explained_variance | 4.51e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.81e+03 |\n",
|
|
"| n_updates | 960 |\n",
|
|
"| policy_gradient_loss | -0.000387 |\n",
|
|
"| std | 1.04 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -447 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 98 |\n",
|
|
"| time_elapsed | 1179 |\n",
|
|
"| total_timesteps | 200704 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 6.6622015e-05 |\n",
|
|
"| clip_fraction | 0.0193 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.46 |\n",
|
|
"| explained_variance | 2.52e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.14e+03 |\n",
|
|
"| n_updates | 970 |\n",
|
|
"| policy_gradient_loss | 0.000193 |\n",
|
|
"| std | 1.04 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -448 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 99 |\n",
|
|
"| time_elapsed | 1191 |\n",
|
|
"| total_timesteps | 202752 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0016583821 |\n",
|
|
"| clip_fraction | 0.068 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.47 |\n",
|
|
"| explained_variance | 3.03e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 620 |\n",
|
|
"| n_updates | 980 |\n",
|
|
"| policy_gradient_loss | -0.0011 |\n",
|
|
"| std | 1.05 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -449 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 100 |\n",
|
|
"| time_elapsed | 1203 |\n",
|
|
"| total_timesteps | 204800 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.014906235 |\n",
|
|
"| clip_fraction | 0.0531 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.47 |\n",
|
|
"| explained_variance | 4.49e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 188 |\n",
|
|
"| n_updates | 990 |\n",
|
|
"| policy_gradient_loss | -0.00305 |\n",
|
|
"| std | 1.06 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -450 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 101 |\n",
|
|
"| time_elapsed | 1215 |\n",
|
|
"| total_timesteps | 206848 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00044346167 |\n",
|
|
"| clip_fraction | 0.00498 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.49 |\n",
|
|
"| explained_variance | 2.75e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 634 |\n",
|
|
"| n_updates | 1000 |\n",
|
|
"| policy_gradient_loss | -0.000378 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -451 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 102 |\n",
|
|
"| time_elapsed | 1227 |\n",
|
|
"| total_timesteps | 208896 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00400657 |\n",
|
|
"| clip_fraction | 0.074 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.49 |\n",
|
|
"| explained_variance | 4.66e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.04e+03 |\n",
|
|
"| n_updates | 1010 |\n",
|
|
"| policy_gradient_loss | -0.00208 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 4.15e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -452 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 103 |\n",
|
|
"| time_elapsed | 1238 |\n",
|
|
"| total_timesteps | 210944 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.000119188306 |\n",
|
|
"| clip_fraction | 0.0681 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.49 |\n",
|
|
"| explained_variance | 2.54e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 130 |\n",
|
|
"| n_updates | 1020 |\n",
|
|
"| policy_gradient_loss | 0.000329 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"--------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -454 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 104 |\n",
|
|
"| time_elapsed | 1251 |\n",
|
|
"| total_timesteps | 212992 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.02515565 |\n",
|
|
"| clip_fraction | 0.0928 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.49 |\n",
|
|
"| explained_variance | 3.2e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 507 |\n",
|
|
"| n_updates | 1030 |\n",
|
|
"| policy_gradient_loss | -0.00217 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -455 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 105 |\n",
|
|
"| time_elapsed | 1263 |\n",
|
|
"| total_timesteps | 215040 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00012812333 |\n",
|
|
"| clip_fraction | 0.00112 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 5.17e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.92e+03 |\n",
|
|
"| n_updates | 1040 |\n",
|
|
"| policy_gradient_loss | 0.000452 |\n",
|
|
"| std | 1.09 |\n",
|
|
"| value_loss | 3.75e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -457 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 106 |\n",
|
|
"| time_elapsed | 1275 |\n",
|
|
"| total_timesteps | 217088 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0017133974 |\n",
|
|
"| clip_fraction | 0.00947 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 4.11e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.63e+03 |\n",
|
|
"| n_updates | 1050 |\n",
|
|
"| policy_gradient_loss | 0.000314 |\n",
|
|
"| std | 1.09 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -457 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 107 |\n",
|
|
"| time_elapsed | 1288 |\n",
|
|
"| total_timesteps | 219136 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0031836042 |\n",
|
|
"| clip_fraction | 0.00962 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 6.03e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.78e+03 |\n",
|
|
"| n_updates | 1060 |\n",
|
|
"| policy_gradient_loss | -0.000246 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -458 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 108 |\n",
|
|
"| time_elapsed | 1300 |\n",
|
|
"| total_timesteps | 221184 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0026518258 |\n",
|
|
"| clip_fraction | 0.0826 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 3.83e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 895 |\n",
|
|
"| n_updates | 1070 |\n",
|
|
"| policy_gradient_loss | -0.000444 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -459 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 109 |\n",
|
|
"| time_elapsed | 1312 |\n",
|
|
"| total_timesteps | 223232 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0048480523 |\n",
|
|
"| clip_fraction | 0.0352 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 5.64e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.95e+03 |\n",
|
|
"| n_updates | 1080 |\n",
|
|
"| policy_gradient_loss | -0.000337 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 4.15e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -459 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 110 |\n",
|
|
"| time_elapsed | 1324 |\n",
|
|
"| total_timesteps | 225280 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0028444673 |\n",
|
|
"| clip_fraction | 0.00771 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 3.26e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.62e+03 |\n",
|
|
"| n_updates | 1090 |\n",
|
|
"| policy_gradient_loss | -1.15e-05 |\n",
|
|
"| std | 1.08 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -459 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 111 |\n",
|
|
"| time_elapsed | 1336 |\n",
|
|
"| total_timesteps | 227328 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00074651965 |\n",
|
|
"| clip_fraction | 0.000146 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 4.02e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 1.73e+03 |\n",
|
|
"| n_updates | 1100 |\n",
|
|
"| policy_gradient_loss | 7.33e-05 |\n",
|
|
"| std | 1.09 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"-------------------------------------------\n",
|
|
"----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -459 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 112 |\n",
|
|
"| time_elapsed | 1348 |\n",
|
|
"| total_timesteps | 229376 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.00920542 |\n",
|
|
"| clip_fraction | 0.0695 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.5 |\n",
|
|
"| explained_variance | 7.62e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 857 |\n",
|
|
"| n_updates | 1110 |\n",
|
|
"| policy_gradient_loss | -0.00293 |\n",
|
|
"| std | 1.09 |\n",
|
|
"| value_loss | 4.14e+03 |\n",
|
|
"----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -459 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 113 |\n",
|
|
"| time_elapsed | 1360 |\n",
|
|
"| total_timesteps | 231424 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.003430673 |\n",
|
|
"| clip_fraction | 0.0324 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.51 |\n",
|
|
"| explained_variance | 4e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.51e+03 |\n",
|
|
"| n_updates | 1120 |\n",
|
|
"| policy_gradient_loss | 0.000274 |\n",
|
|
"| std | 1.1 |\n",
|
|
"| value_loss | 3.16e+03 |\n",
|
|
"-----------------------------------------\n",
|
|
"-----------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | -458 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 170 |\n",
|
|
"| iterations | 114 |\n",
|
|
"| time_elapsed | 1372 |\n",
|
|
"| total_timesteps | 233472 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.010919753 |\n",
|
|
"| clip_fraction | 0.0383 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.52 |\n",
|
|
"| explained_variance | 7.24e-05 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.47e+03 |\n",
|
|
"| n_updates | 1130 |\n",
|
|
"| policy_gradient_loss | -0.0024 |\n",
|
|
"| std | 1.11 |\n",
|
|
"| value_loss | 4.15e+03 |\n",
|
|
"-----------------------------------------\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "KeyboardInterrupt",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# import a model and try it out!\u001b[39;00m\n\u001b[1;32m 2\u001b[0m model \u001b[38;5;241m=\u001b[39m PPO(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMultiInputPolicy\u001b[39m\u001b[38;5;124m\"\u001b[39m, wrapped_env, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1_000_000\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/ppo/ppo.py:311\u001b[0m, in \u001b[0;36mPPO.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28mself\u001b[39m: SelfPPO,\n\u001b[1;32m 304\u001b[0m total_timesteps: \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 309\u001b[0m progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 310\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SelfPPO:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_interval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[43m \u001b[49m\u001b[43mtb_log_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_log_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 316\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/on_policy_algorithm.py:323\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 322\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_timesteps \u001b[38;5;241m<\u001b[39m total_timesteps:\n\u001b[0;32m--> 323\u001b[0m continue_training \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_rollouts\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrollout_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_rollout_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_steps\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m continue_training:\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/on_policy_algorithm.py:218\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.collect_rollouts\u001b[0;34m(self, env, callback, rollout_buffer, n_rollout_steps)\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 214\u001b[0m \u001b[38;5;66;03m# Otherwise, clip the actions to avoid out of bound error\u001b[39;00m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;66;03m# as we are sampling from an unbounded Gaussian distribution\u001b[39;00m\n\u001b[1;32m 216\u001b[0m clipped_actions \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mclip(actions, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_space\u001b[38;5;241m.\u001b[39mlow, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_space\u001b[38;5;241m.\u001b[39mhigh)\n\u001b[0;32m--> 218\u001b[0m new_obs, rewards, dones, infos \u001b[38;5;241m=\u001b[39m \u001b[43menv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclipped_actions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_timesteps \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m env\u001b[38;5;241m.\u001b[39mnum_envs\n\u001b[1;32m 222\u001b[0m \u001b[38;5;66;03m# Give access to local variables\u001b[39;00m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/vec_env/base_vec_env.py:206\u001b[0m, in \u001b[0;36mVecEnv.step\u001b[0;34m(self, actions)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;124;03mStep the environments with the given action\u001b[39;00m\n\u001b[1;32m 201\u001b[0m \n\u001b[1;32m 202\u001b[0m \u001b[38;5;124;03m:param actions: the action\u001b[39;00m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;124;03m:return: observation, reward, done, information\u001b[39;00m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstep_async(actions)\n\u001b[0;32m--> 206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep_wait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/vec_env/dummy_vec_env.py:58\u001b[0m, in \u001b[0;36mDummyVecEnv.step_wait\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mstep_wait\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m VecEnvStepReturn:\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# Avoid circular imports\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m env_idx \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_envs):\n\u001b[0;32m---> 58\u001b[0m obs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuf_rews[env_idx], terminated, truncated, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuf_infos[env_idx] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menvs\u001b[49m\u001b[43m[\u001b[49m\u001b[43menv_idx\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mactions\u001b[49m\u001b[43m[\u001b[49m\u001b[43menv_idx\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;66;03m# convert to SB3 VecEnv api\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuf_dones[env_idx] \u001b[38;5;241m=\u001b[39m terminated \u001b[38;5;129;01mor\u001b[39;00m truncated\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/monitor.py:94\u001b[0m, in \u001b[0;36mMonitor.step\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneeds_reset:\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTried to step environment that needs reset\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 94\u001b[0m observation, reward, terminated, truncated, info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43maction\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrewards\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mfloat\u001b[39m(reward))\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m terminated \u001b[38;5;129;01mor\u001b[39;00m truncated:\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/gymnasium/wrappers/jax_to_numpy.py:166\u001b[0m, in \u001b[0;36mJaxToNumpy.step\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Transforms the action to a jax array .\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \n\u001b[1;32m 159\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124;03m A tuple containing numpy versions of the next observation, reward, termination, truncation, and extra info.\u001b[39;00m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 165\u001b[0m jax_action \u001b[38;5;241m=\u001b[39m numpy_to_jax(action)\n\u001b[0;32m--> 166\u001b[0m obs, reward, terminated, truncated, info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjax_action\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[1;32m 169\u001b[0m jax_to_numpy(obs),\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28mfloat\u001b[39m(reward),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 173\u001b[0m jax_to_numpy(info),\n\u001b[1;32m 174\u001b[0m )\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/src/solarcarsim/simv1.py:123\u001b[0m, in \u001b[0;36mSolarRaceV1.step\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 120\u001b[0m reward \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m500\u001b[39m\n\u001b[1;32m 121\u001b[0m truncated \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 123\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_obs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, reward, terminated, truncated, {}\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/src/solarcarsim/simv1.py:64\u001b[0m, in \u001b[0;36mSolarRaceV1._get_obs\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_obs\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 64\u001b[0m slope_view, wind_view \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_vision_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mposition\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state[\u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state[\u001b[38;5;241m1\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwind\u001b[39m\u001b[38;5;124m\"\u001b[39m: wind_view,\n\u001b[1;32m 73\u001b[0m }\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/src/solarcarsim/simv1.py:59\u001b[0m, in \u001b[0;36mSolarRaceV1._vision_function\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 57\u001b[0m pos \u001b[38;5;241m=\u001b[39m jnp\u001b[38;5;241m.\u001b[39mastype(jnp\u001b[38;5;241m.\u001b[39mround(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state[\u001b[38;5;241m0\u001b[39m]), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mint32\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 58\u001b[0m time \u001b[38;5;241m=\u001b[39m jnp\u001b[38;5;241m.\u001b[39mastype(jnp\u001b[38;5;241m.\u001b[39mround(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state[\u001b[38;5;241m1\u001b[39m]), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mint32\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 59\u001b[0m wind_view \u001b[38;5;241m=\u001b[39m \u001b[43mslookup\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhstack\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mpos\u001b[49m\u001b[43m,\u001b[49m\u001b[43mtime\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 60\u001b[0m slope_view \u001b[38;5;241m=\u001b[39m jax\u001b[38;5;241m.\u001b[39mlax\u001b[38;5;241m.\u001b[39mdynamic_slice(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_environment[\u001b[38;5;241m2\u001b[39m], pos, (\u001b[38;5;241m100\u001b[39m,))\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m slope_view, wind_view\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/src/solarcarsim/simv1.py:56\u001b[0m, in \u001b[0;36mSolarRaceV1._vision_function.<locals>.slookup\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mslookup\u001b[39m(x):\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjax\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlax\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdynamic_slice\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_environment\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/jax/_src/lax/slicing.py:160\u001b[0m, in \u001b[0;36mdynamic_slice\u001b[0;34m(operand, start_indices, slice_sizes)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdynamic_slice\u001b[39m(\n\u001b[1;32m 113\u001b[0m operand: Array \u001b[38;5;241m|\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray,\n\u001b[1;32m 114\u001b[0m start_indices: Array \u001b[38;5;241m|\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray \u001b[38;5;241m|\u001b[39m Sequence[ArrayLike],\n\u001b[1;32m 115\u001b[0m slice_sizes: Shape,\n\u001b[1;32m 116\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Array:\n\u001b[1;32m 117\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Wraps XLA's `DynamicSlice\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124;03m <https://www.tensorflow.org/xla/operation_semantics#dynamicslice>`_\u001b[39;00m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124;03m operator.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;124;03m - :func:`jax.lax.dynamic_index_in_dim`\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m start_indices \u001b[38;5;241m=\u001b[39m \u001b[43m_dynamic_slice_indices\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperand\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart_indices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m config\u001b[38;5;241m.\u001b[39mdynamic_shapes\u001b[38;5;241m.\u001b[39mvalue:\n\u001b[1;32m 162\u001b[0m dynamic_sizes, static_sizes \u001b[38;5;241m=\u001b[39m lax\u001b[38;5;241m.\u001b[39m_extract_tracers_dyn_shape(slice_sizes)\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/jax/_src/lax/slicing.py:3057\u001b[0m, in \u001b[0;36m_dynamic_slice_indices\u001b[0;34m(operand, start_indices)\u001b[0m\n\u001b[1;32m 3055\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 3056\u001b[0m d_arr \u001b[38;5;241m=\u001b[39m lax\u001b[38;5;241m.\u001b[39mconvert_element_type(d, _dtype(i))\n\u001b[0;32m-> 3057\u001b[0m result\u001b[38;5;241m.\u001b[39mappend(lax\u001b[38;5;241m.\u001b[39mselect(i \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m, \u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43md_arr\u001b[49m, i))\n\u001b[1;32m 3058\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/jax/_src/numpy/array_methods.py:573\u001b[0m, in \u001b[0;36m_defer_to_unrecognized_arg.<locals>.deferring_binary_op\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 571\u001b[0m args \u001b[38;5;241m=\u001b[39m (other, \u001b[38;5;28mself\u001b[39m) \u001b[38;5;28;01mif\u001b[39;00m swap \u001b[38;5;28;01melse\u001b[39;00m (\u001b[38;5;28mself\u001b[39m, other)\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(other, _accepted_binop_types):\n\u001b[0;32m--> 573\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbinary_op\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[38;5;66;03m# Note: don't use isinstance here, because we don't want to raise for\u001b[39;00m\n\u001b[1;32m 575\u001b[0m \u001b[38;5;66;03m# subclasses, e.g. NamedTuple objects that may override operators.\u001b[39;00m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(other) \u001b[38;5;129;01min\u001b[39;00m _rejected_binop_types:\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/jax/_src/numpy/ufunc_api.py:179\u001b[0m, in \u001b[0;36mufunc.__call__\u001b[0;34m(self, out, where, *args)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwhere argument of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 178\u001b[0m call \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__static_props[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcall\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_vectorized\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
|
]
|
|
},
|
|
{
|
|
"ename": "",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
|
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
|
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
|
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# import a model and try it out!\n",
|
|
"model = PPO(\"MultiInputPolicy\", wrapped_env, verbose=1)\n",
|
|
"model.learn(total_timesteps=1_000_000)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vec_env = model.get_env()\n",
|
|
"obs = vec_env.reset()\n",
|
|
"actions = []\n",
|
|
"obs_list = []\n",
|
|
"rewards = []\n",
|
|
"for i in range(1000):\n",
|
|
" action, _state = model.predict(obs, deterministic=True)\n",
|
|
" actions.append(action)\n",
|
|
" obs, reward, done, info = vec_env.step(action)\n",
|
|
" obs_list.append(obs)\n",
|
|
" rewards.append(reward)\n",
|
|
"\n",
|
|
" \n",
|
|
" # VecEnv resets automatically\n",
|
|
" if done:\n",
|
|
" break\n",
|
|
" # obs = vec_env.reset()\n",
|
|
"\n",
|
|
"position = jnp.array([x['position'] for x in obs_list]).flatten()\n",
|
|
"energy = jnp.array([x['energy'] for x in obs_list]).flatten()\n",
|
|
"actions = jnp.array(actions).flatten()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x738df5c59040>]"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 1200x600 with 3 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize=(12,6))\n",
|
|
"ax1.plot(position, label=\"position\")\n",
|
|
"ax2.plot(actions, label=\"energy\")\n",
|
|
"ax3.plot(rewards)\n",
|
|
"# plt.legend()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
|
|
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"actions"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|