diff --git a/notebooks/v1sim.ipynb b/notebooks/v1sim.ipynb index b259491..bb2d9fe 100644 --- a/notebooks/v1sim.ipynb +++ b/notebooks/v1sim.ipynb @@ -564,11 +564,11 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "%run ../src/solarcarsim/gym.py\n", + "%run ../src/solarcarsim/simv1.py\n", "import gymnasium as gym\n", "from gymnasium.wrappers.jax_to_numpy import JaxToNumpy\n", "from gymnasium.wrappers.vector import JaxToNumpy as VJaxToNumpy" @@ -604,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -613,100 +613,6130 @@ "text": [ "Using cuda device\n", "Wrapping the env with a `Monitor` wrapper\n", - "Wrapping the env in a DummyVecEnv.\n" + "Wrapping the env in a DummyVecEnv.\n", + "---------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.77e+11 |\n", + "| time/ | |\n", + "| fps | 335 |\n", + "| iterations | 1 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 2048 |\n", + "---------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.72e+11 |\n", + "| time/ | |\n", + "| fps | 313 |\n", + "| iterations | 2 |\n", + "| time_elapsed | 13 |\n", + "| total_timesteps | 4096 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.52e+20 |\n", + "| n_updates | 10 |\n", + "| policy_gradient_loss | 6.05e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.84e+20 |\n", + "--------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 3 |\n", + "| time_elapsed | 19 |\n", + "| total_timesteps | 6144 |\n", + "| train/ | |\n", + "| approx_kl | -2.0372681e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.5e+20 |\n", + "| n_updates | 20 |\n", + "| policy_gradient_loss | -2.82e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.52e+20 |\n", + "--------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 4 |\n", + "| time_elapsed | 26 |\n", + "| total_timesteps | 8192 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.23e+20 |\n", + "| n_updates | 30 |\n", + "| policy_gradient_loss | -6.43e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.91e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 5 |\n", + "| time_elapsed | 33 |\n", + "| total_timesteps | 10240 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.09e+20 |\n", + "| n_updates | 40 |\n", + "| policy_gradient_loss | -1.82e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.55e+20 |\n", + "---------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 6 |\n", + "| time_elapsed | 39 |\n", + "| total_timesteps | 12288 |\n", + "| train/ | |\n", + "| approx_kl | -1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.86e+20 |\n", + "| n_updates | 50 |\n", + "| policy_gradient_loss | 7.23e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.96e+20 |\n", + "--------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 7 |\n", + "| time_elapsed | 46 |\n", + "| total_timesteps | 14336 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.05e+20 |\n", + "| n_updates | 60 |\n", + "| policy_gradient_loss | -6.76e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.04e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 8 |\n", + "| time_elapsed | 53 |\n", + "| total_timesteps | 16384 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.27e+20 |\n", + "| n_updates | 70 |\n", + "| policy_gradient_loss | 6.42e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.66e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 9 |\n", + "| time_elapsed | 59 |\n", + "| total_timesteps | 18432 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.38e+20 |\n", + "| n_updates | 80 |\n", + "| policy_gradient_loss | -2.08e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.94e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 10 |\n", + "| time_elapsed | 66 |\n", + "| total_timesteps | 20480 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.77e+20 |\n", + "| n_updates | 90 |\n", + "| policy_gradient_loss | -4.44e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.87e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 11 |\n", + "| time_elapsed | 73 |\n", + "| total_timesteps | 22528 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.84e+20 |\n", + "| n_updates | 100 |\n", + "| policy_gradient_loss | -3.09e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.7e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 12 |\n", + "| time_elapsed | 79 |\n", + "| total_timesteps | 24576 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.47e+20 |\n", + "| n_updates | 110 |\n", + "| policy_gradient_loss | 5.43e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.17e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 13 |\n", + "| time_elapsed | 86 |\n", + "| total_timesteps | 26624 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.95e+20 |\n", + "| n_updates | 120 |\n", + "| policy_gradient_loss | -4.86e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.55e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 14 |\n", + "| time_elapsed | 92 |\n", + "| total_timesteps | 28672 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.6e+20 |\n", + "| n_updates | 130 |\n", + "| policy_gradient_loss | 4.9e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.01e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 15 |\n", + "| time_elapsed | 99 |\n", + "| total_timesteps | 30720 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.84e+20 |\n", + "| n_updates | 140 |\n", + "| policy_gradient_loss | -7.74e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.1e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 16 |\n", + "| time_elapsed | 106 |\n", + "| total_timesteps | 32768 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.22e+20 |\n", + "| n_updates | 150 |\n", + "| policy_gradient_loss | -2.27e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.73e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 17 |\n", + "| time_elapsed | 112 |\n", + "| total_timesteps | 34816 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.86e+20 |\n", + "| n_updates | 160 |\n", + "| policy_gradient_loss | -6.14e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.47e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 18 |\n", + "| time_elapsed | 119 |\n", + "| total_timesteps | 36864 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.05e+20 |\n", + "| n_updates | 170 |\n", + "| policy_gradient_loss | 2.55e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.59e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 19 |\n", + "| time_elapsed | 126 |\n", + "| total_timesteps | 38912 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 180 |\n", + "| policy_gradient_loss | 3.69e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.98e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 20 |\n", + "| time_elapsed | 132 |\n", + "| total_timesteps | 40960 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.71e+20 |\n", + "| n_updates | 190 |\n", + "| policy_gradient_loss | 1.04e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.52e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 21 |\n", + "| time_elapsed | 139 |\n", + "| total_timesteps | 43008 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.89e+20 |\n", + "| n_updates | 200 |\n", + "| policy_gradient_loss | -1.36e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.76e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 22 |\n", + "| time_elapsed | 146 |\n", + "| total_timesteps | 45056 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.69e+20 |\n", + "| n_updates | 210 |\n", + "| policy_gradient_loss | -4.9e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.35e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 23 |\n", + "| time_elapsed | 153 |\n", + "| total_timesteps | 47104 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.12e+20 |\n", + "| n_updates | 220 |\n", + "| policy_gradient_loss | 2.67e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 24 |\n", + "| time_elapsed | 159 |\n", + "| total_timesteps | 49152 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 230 |\n", + "| policy_gradient_loss | 4.05e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.71e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 25 |\n", + "| time_elapsed | 166 |\n", + "| total_timesteps | 51200 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.78e+20 |\n", + "| n_updates | 240 |\n", + "| policy_gradient_loss | 8.82e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.44e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 26 |\n", + "| time_elapsed | 173 |\n", + "| total_timesteps | 53248 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.82e+20 |\n", + "| n_updates | 250 |\n", + "| policy_gradient_loss | -6.17e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.43e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 27 |\n", + "| time_elapsed | 179 |\n", + "| total_timesteps | 55296 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.37e+20 |\n", + "| n_updates | 260 |\n", + "| policy_gradient_loss | -6.08e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.17e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 28 |\n", + "| time_elapsed | 186 |\n", + "| total_timesteps | 57344 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.01e+20 |\n", + "| n_updates | 270 |\n", + "| policy_gradient_loss | -3.07e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.19e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 29 |\n", + "| time_elapsed | 192 |\n", + "| total_timesteps | 59392 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.78e+20 |\n", + "| n_updates | 280 |\n", + "| policy_gradient_loss | -1.64e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.88e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 30 |\n", + "| time_elapsed | 199 |\n", + "| total_timesteps | 61440 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.14e+20 |\n", + "| n_updates | 290 |\n", + "| policy_gradient_loss | 1.87e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.11e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 31 |\n", + "| time_elapsed | 206 |\n", + "| total_timesteps | 63488 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.92e+20 |\n", + "| n_updates | 300 |\n", + "| policy_gradient_loss | 2.99e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.51e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 32 |\n", + "| time_elapsed | 212 |\n", + "| total_timesteps | 65536 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.27e+20 |\n", + "| n_updates | 310 |\n", + "| policy_gradient_loss | 4.27e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.65e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 33 |\n", + "| time_elapsed | 219 |\n", + "| total_timesteps | 67584 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.35e+20 |\n", + "| n_updates | 320 |\n", + "| policy_gradient_loss | -7.1e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.61e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 34 |\n", + "| time_elapsed | 225 |\n", + "| total_timesteps | 69632 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.07e+20 |\n", + "| n_updates | 330 |\n", + "| policy_gradient_loss | 4.04e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.28e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 35 |\n", + "| time_elapsed | 232 |\n", + "| total_timesteps | 71680 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.25e+20 |\n", + "| n_updates | 340 |\n", + "| policy_gradient_loss | -1.33e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.93e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 36 |\n", + "| time_elapsed | 239 |\n", + "| total_timesteps | 73728 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.7e+20 |\n", + "| n_updates | 350 |\n", + "| policy_gradient_loss | 5.44e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.46e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 37 |\n", + "| time_elapsed | 245 |\n", + "| total_timesteps | 75776 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.51e+20 |\n", + "| n_updates | 360 |\n", + "| policy_gradient_loss | 7.37e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.79e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 38 |\n", + "| time_elapsed | 252 |\n", + "| total_timesteps | 77824 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.31e+20 |\n", + "| n_updates | 370 |\n", + "| policy_gradient_loss | 7.54e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.89e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 39 |\n", + "| time_elapsed | 258 |\n", + "| total_timesteps | 79872 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.61e+20 |\n", + "| n_updates | 380 |\n", + "| policy_gradient_loss | -1.65e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.02e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 40 |\n", + "| time_elapsed | 265 |\n", + "| total_timesteps | 81920 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.13e+20 |\n", + "| n_updates | 390 |\n", + "| policy_gradient_loss | -1.05e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.72e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 41 |\n", + "| time_elapsed | 271 |\n", + "| total_timesteps | 83968 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.44e+20 |\n", + "| n_updates | 400 |\n", + "| policy_gradient_loss | 9.9e-11 |\n", + "| std | 1 |\n", + "| value_loss | 7.51e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 42 |\n", + "| time_elapsed | 278 |\n", + "| total_timesteps | 86016 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.82e+20 |\n", + "| n_updates | 410 |\n", + "| policy_gradient_loss | 3.73e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.65e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 43 |\n", + "| time_elapsed | 285 |\n", + "| total_timesteps | 88064 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.15e+20 |\n", + "| n_updates | 420 |\n", + "| policy_gradient_loss | 1.07e-08 |\n", + "| std | 1 |\n", + "| value_loss | 8.22e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 44 |\n", + "| time_elapsed | 291 |\n", + "| total_timesteps | 90112 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.26e+20 |\n", + "| n_updates | 430 |\n", + "| policy_gradient_loss | -6.7e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.38e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 45 |\n", + "| time_elapsed | 298 |\n", + "| total_timesteps | 92160 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.17e+20 |\n", + "| n_updates | 440 |\n", + "| policy_gradient_loss | -3.11e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.41e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 46 |\n", + "| time_elapsed | 304 |\n", + "| total_timesteps | 94208 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.36e+20 |\n", + "| n_updates | 450 |\n", + "| policy_gradient_loss | -1.6e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.61e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 47 |\n", + "| time_elapsed | 311 |\n", + "| total_timesteps | 96256 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.51e+20 |\n", + "| n_updates | 460 |\n", + "| policy_gradient_loss | -2.65e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.34e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 48 |\n", + "| time_elapsed | 318 |\n", + "| total_timesteps | 98304 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.17e+20 |\n", + "| n_updates | 470 |\n", + "| policy_gradient_loss | -2.4e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.91e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 49 |\n", + "| time_elapsed | 325 |\n", + "| total_timesteps | 100352 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.99e+20 |\n", + "| n_updates | 480 |\n", + "| policy_gradient_loss | -1.58e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.04e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 50 |\n", + "| time_elapsed | 331 |\n", + "| total_timesteps | 102400 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.47e+20 |\n", + "| n_updates | 490 |\n", + "| policy_gradient_loss | 1.78e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.01e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 51 |\n", + "| time_elapsed | 338 |\n", + "| total_timesteps | 104448 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.19e+20 |\n", + "| n_updates | 500 |\n", + "| policy_gradient_loss | -4e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.91e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 52 |\n", + "| time_elapsed | 345 |\n", + "| total_timesteps | 106496 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.02e+20 |\n", + "| n_updates | 510 |\n", + "| policy_gradient_loss | 7.63e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.35e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 53 |\n", + "| time_elapsed | 351 |\n", + "| total_timesteps | 108544 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.7e+20 |\n", + "| n_updates | 520 |\n", + "| policy_gradient_loss | -4.46e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.62e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 54 |\n", + "| time_elapsed | 358 |\n", + "| total_timesteps | 110592 |\n", + "| train/ | |\n", + "| approx_kl | 1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.5e+20 |\n", + "| n_updates | 530 |\n", + "| policy_gradient_loss | 2.7e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.65e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 55 |\n", + "| time_elapsed | 365 |\n", + "| total_timesteps | 112640 |\n", + "| train/ | |\n", + "| approx_kl | 2.3283064e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.57e+20 |\n", + "| n_updates | 540 |\n", + "| policy_gradient_loss | -4.66e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.58e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 56 |\n", + "| time_elapsed | 371 |\n", + "| total_timesteps | 114688 |\n", + "| train/ | |\n", + "| approx_kl | 1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.89e+20 |\n", + "| n_updates | 550 |\n", + "| policy_gradient_loss | -1.26e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.07e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 57 |\n", + "| time_elapsed | 378 |\n", + "| total_timesteps | 116736 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.73e+20 |\n", + "| n_updates | 560 |\n", + "| policy_gradient_loss | 3.36e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.93e+20 |\n", + "------------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 58 |\n", + "| time_elapsed | 385 |\n", + "| total_timesteps | 118784 |\n", + "| train/ | |\n", + "| approx_kl | -2.0372681e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 570 |\n", + "| policy_gradient_loss | 3.67e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.24e+20 |\n", + "--------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 59 |\n", + "| time_elapsed | 391 |\n", + "| total_timesteps | 120832 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.87e+20 |\n", + "| n_updates | 580 |\n", + "| policy_gradient_loss | -2.44e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.7e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 60 |\n", + "| time_elapsed | 398 |\n", + "| total_timesteps | 122880 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.35e+20 |\n", + "| n_updates | 590 |\n", + "| policy_gradient_loss | 9.02e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.98e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 61 |\n", + "| time_elapsed | 404 |\n", + "| total_timesteps | 124928 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.22e+20 |\n", + "| n_updates | 600 |\n", + "| policy_gradient_loss | -1.74e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.01e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 62 |\n", + "| time_elapsed | 411 |\n", + "| total_timesteps | 126976 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.65e+20 |\n", + "| n_updates | 610 |\n", + "| policy_gradient_loss | 4.34e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.57e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 63 |\n", + "| time_elapsed | 418 |\n", + "| total_timesteps | 129024 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.07e+20 |\n", + "| n_updates | 620 |\n", + "| policy_gradient_loss | 8.41e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.21e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 64 |\n", + "| time_elapsed | 424 |\n", + "| total_timesteps | 131072 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.94e+20 |\n", + "| n_updates | 630 |\n", + "| policy_gradient_loss | 4.04e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.44e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 65 |\n", + "| time_elapsed | 431 |\n", + "| total_timesteps | 133120 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.45e+20 |\n", + "| n_updates | 640 |\n", + "| policy_gradient_loss | -4.02e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.26e+20 |\n", + "---------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 66 |\n", + "| time_elapsed | 437 |\n", + "| total_timesteps | 135168 |\n", + "| train/ | |\n", + "| approx_kl | -1.4551915e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.24e+20 |\n", + "| n_updates | 650 |\n", + "| policy_gradient_loss | -7.53e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.25e+20 |\n", + "--------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 67 |\n", + "| time_elapsed | 444 |\n", + "| total_timesteps | 137216 |\n", + "| train/ | |\n", + "| approx_kl | 2.6193447e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 660 |\n", + "| policy_gradient_loss | -9.75e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.22e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 68 |\n", + "| time_elapsed | 451 |\n", + "| total_timesteps | 139264 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.36e+20 |\n", + "| n_updates | 670 |\n", + "| policy_gradient_loss | 4.05e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.64e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 69 |\n", + "| time_elapsed | 457 |\n", + "| total_timesteps | 141312 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.4e+20 |\n", + "| n_updates | 680 |\n", + "| policy_gradient_loss | 2.14e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.54e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 70 |\n", + "| time_elapsed | 464 |\n", + "| total_timesteps | 143360 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.52e+20 |\n", + "| n_updates | 690 |\n", + "| policy_gradient_loss | 4.44e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.43e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 71 |\n", + "| time_elapsed | 471 |\n", + "| total_timesteps | 145408 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.75e+20 |\n", + "| n_updates | 700 |\n", + "| policy_gradient_loss | 1.57e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.35e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 72 |\n", + "| time_elapsed | 478 |\n", + "| total_timesteps | 147456 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.54e+20 |\n", + "| n_updates | 710 |\n", + "| policy_gradient_loss | 3.18e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.9e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 73 |\n", + "| time_elapsed | 485 |\n", + "| total_timesteps | 149504 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.12e+20 |\n", + "| n_updates | 720 |\n", + "| policy_gradient_loss | -3.43e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 74 |\n", + "| time_elapsed | 491 |\n", + "| total_timesteps | 151552 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.43e+20 |\n", + "| n_updates | 730 |\n", + "| policy_gradient_loss | 3.68e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.32e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 75 |\n", + "| time_elapsed | 498 |\n", + "| total_timesteps | 153600 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.3e+20 |\n", + "| n_updates | 740 |\n", + "| policy_gradient_loss | 5.75e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.27e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 76 |\n", + "| time_elapsed | 505 |\n", + "| total_timesteps | 155648 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.25e+20 |\n", + "| n_updates | 750 |\n", + "| policy_gradient_loss | -7.98e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.22e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 77 |\n", + "| time_elapsed | 511 |\n", + "| total_timesteps | 157696 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.46e+20 |\n", + "| n_updates | 760 |\n", + "| policy_gradient_loss | -9.47e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.11e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 78 |\n", + "| time_elapsed | 518 |\n", + "| total_timesteps | 159744 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.8e+20 |\n", + "| n_updates | 770 |\n", + "| policy_gradient_loss | 7.7e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.45e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 79 |\n", + "| time_elapsed | 525 |\n", + "| total_timesteps | 161792 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.52e+20 |\n", + "| n_updates | 780 |\n", + "| policy_gradient_loss | -1.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.03e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 80 |\n", + "| time_elapsed | 531 |\n", + "| total_timesteps | 163840 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.44e+20 |\n", + "| n_updates | 790 |\n", + "| policy_gradient_loss | -1.34e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.34e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 81 |\n", + "| time_elapsed | 538 |\n", + "| total_timesteps | 165888 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.67e+20 |\n", + "| n_updates | 800 |\n", + "| policy_gradient_loss | -4.87e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.82e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 82 |\n", + "| time_elapsed | 545 |\n", + "| total_timesteps | 167936 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.25e+20 |\n", + "| n_updates | 810 |\n", + "| policy_gradient_loss | 6.9e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.02e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 83 |\n", + "| time_elapsed | 551 |\n", + "| total_timesteps | 169984 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.73e+20 |\n", + "| n_updates | 820 |\n", + "| policy_gradient_loss | 1.06e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.56e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 84 |\n", + "| time_elapsed | 558 |\n", + "| total_timesteps | 172032 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.39e+20 |\n", + "| n_updates | 830 |\n", + "| policy_gradient_loss | 6.23e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.77e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 85 |\n", + "| time_elapsed | 565 |\n", + "| total_timesteps | 174080 |\n", + "| train/ | |\n", + "| approx_kl | 2.0372681e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.28e+20 |\n", + "| n_updates | 840 |\n", + "| policy_gradient_loss | 2.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.28e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 86 |\n", + "| time_elapsed | 572 |\n", + "| total_timesteps | 176128 |\n", + "| train/ | |\n", + "| approx_kl | -1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.39e+20 |\n", + "| n_updates | 850 |\n", + "| policy_gradient_loss | 3.62e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.58e+20 |\n", + "--------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 87 |\n", + "| time_elapsed | 578 |\n", + "| total_timesteps | 178176 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.77e+20 |\n", + "| n_updates | 860 |\n", + "| policy_gradient_loss | -6e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.84e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 88 |\n", + "| time_elapsed | 585 |\n", + "| total_timesteps | 180224 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.77e+20 |\n", + "| n_updates | 870 |\n", + "| policy_gradient_loss | -1.66e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.08e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 89 |\n", + "| time_elapsed | 592 |\n", + "| total_timesteps | 182272 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.75e+20 |\n", + "| n_updates | 880 |\n", + "| policy_gradient_loss | -5.66e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.9e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 90 |\n", + "| time_elapsed | 598 |\n", + "| total_timesteps | 184320 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.41e+20 |\n", + "| n_updates | 890 |\n", + "| policy_gradient_loss | 1.07e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.39e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 91 |\n", + "| time_elapsed | 605 |\n", + "| total_timesteps | 186368 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.59e+20 |\n", + "| n_updates | 900 |\n", + "| policy_gradient_loss | -5.2e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.35e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 92 |\n", + "| time_elapsed | 612 |\n", + "| total_timesteps | 188416 |\n", + "| train/ | |\n", + "| approx_kl | 8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.91e+20 |\n", + "| n_updates | 910 |\n", + "| policy_gradient_loss | -1.26e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.09e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 93 |\n", + "| time_elapsed | 619 |\n", + "| total_timesteps | 190464 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.86e+20 |\n", + "| n_updates | 920 |\n", + "| policy_gradient_loss | -1.74e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.42e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 94 |\n", + "| time_elapsed | 626 |\n", + "| total_timesteps | 192512 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.13e+20 |\n", + "| n_updates | 930 |\n", + "| policy_gradient_loss | 7.95e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.04e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 95 |\n", + "| time_elapsed | 633 |\n", + "| total_timesteps | 194560 |\n", + "| train/ | |\n", + "| approx_kl | 1.4551915e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.92e+20 |\n", + "| n_updates | 940 |\n", + "| policy_gradient_loss | 1.34e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.42e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 96 |\n", + "| time_elapsed | 639 |\n", + "| total_timesteps | 196608 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.69e+20 |\n", + "| n_updates | 950 |\n", + "| policy_gradient_loss | 1.96e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.04e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 97 |\n", + "| time_elapsed | 646 |\n", + "| total_timesteps | 198656 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.81e+20 |\n", + "| n_updates | 960 |\n", + "| policy_gradient_loss | 5.85e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.1e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 98 |\n", + "| time_elapsed | 653 |\n", + "| total_timesteps | 200704 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.93e+20 |\n", + "| n_updates | 970 |\n", + "| policy_gradient_loss | 4.95e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.2e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 99 |\n", + "| time_elapsed | 660 |\n", + "| total_timesteps | 202752 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.75e+20 |\n", + "| n_updates | 980 |\n", + "| policy_gradient_loss | -3.31e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.1e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 666 |\n", + "| total_timesteps | 204800 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.16e+20 |\n", + "| n_updates | 990 |\n", + "| policy_gradient_loss | -4.21e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.06e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 101 |\n", + "| time_elapsed | 673 |\n", + "| total_timesteps | 206848 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.1e+20 |\n", + "| n_updates | 1000 |\n", + "| policy_gradient_loss | 3.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.6e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.7e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 102 |\n", + "| time_elapsed | 679 |\n", + "| total_timesteps | 208896 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.46e+20 |\n", + "| n_updates | 1010 |\n", + "| policy_gradient_loss | -4.23e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.04e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.7e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 103 |\n", + "| time_elapsed | 686 |\n", + "| total_timesteps | 210944 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.53e+20 |\n", + "| n_updates | 1020 |\n", + "| policy_gradient_loss | -2.27e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.27e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 104 |\n", + "| time_elapsed | 693 |\n", + "| total_timesteps | 212992 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.15e+20 |\n", + "| n_updates | 1030 |\n", + "| policy_gradient_loss | -6.14e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.04e+20 |\n", + "---------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.7e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 105 |\n", + "| time_elapsed | 699 |\n", + "| total_timesteps | 215040 |\n", + "| train/ | |\n", + "| approx_kl | -1.4551915e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.98e+20 |\n", + "| n_updates | 1040 |\n", + "| policy_gradient_loss | 4.92e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.48e+20 |\n", + "--------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 106 |\n", + "| time_elapsed | 706 |\n", + "| total_timesteps | 217088 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.4e+20 |\n", + "| n_updates | 1050 |\n", + "| policy_gradient_loss | 2.12e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.6e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 107 |\n", + "| time_elapsed | 713 |\n", + "| total_timesteps | 219136 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.93e+20 |\n", + "| n_updates | 1060 |\n", + "| policy_gradient_loss | 1.12e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.37e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 108 |\n", + "| time_elapsed | 719 |\n", + "| total_timesteps | 221184 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.51e+20 |\n", + "| n_updates | 1070 |\n", + "| policy_gradient_loss | -7.58e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.43e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 109 |\n", + "| time_elapsed | 726 |\n", + "| total_timesteps | 223232 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.42e+20 |\n", + "| n_updates | 1080 |\n", + "| policy_gradient_loss | -1.91e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.34e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 110 |\n", + "| time_elapsed | 733 |\n", + "| total_timesteps | 225280 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.01e+20 |\n", + "| n_updates | 1090 |\n", + "| policy_gradient_loss | -3.17e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.45e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 111 |\n", + "| time_elapsed | 739 |\n", + "| total_timesteps | 227328 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.57e+20 |\n", + "| n_updates | 1100 |\n", + "| policy_gradient_loss | -1.47e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.72e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 112 |\n", + "| time_elapsed | 746 |\n", + "| total_timesteps | 229376 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.44e+20 |\n", + "| n_updates | 1110 |\n", + "| policy_gradient_loss | -7.1e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.55e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 113 |\n", + "| time_elapsed | 753 |\n", + "| total_timesteps | 231424 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.91e+20 |\n", + "| n_updates | 1120 |\n", + "| policy_gradient_loss | 1.29e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.22e+20 |\n", + "--------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 114 |\n", + "| time_elapsed | 760 |\n", + "| total_timesteps | 233472 |\n", + "| train/ | |\n", + "| approx_kl | -1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.18e+20 |\n", + "| n_updates | 1130 |\n", + "| policy_gradient_loss | -3.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.99e+20 |\n", + "--------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.69e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 115 |\n", + "| time_elapsed | 766 |\n", + "| total_timesteps | 235520 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 1140 |\n", + "| policy_gradient_loss | -5.62e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.98e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.68e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 116 |\n", + "| time_elapsed | 773 |\n", + "| total_timesteps | 237568 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.13e+20 |\n", + "| n_updates | 1150 |\n", + "| policy_gradient_loss | 2.58e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.67e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 117 |\n", + "| time_elapsed | 779 |\n", + "| total_timesteps | 239616 |\n", + "| train/ | |\n", + "| approx_kl | 2.3283064e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.24e+20 |\n", + "| n_updates | 1160 |\n", + "| policy_gradient_loss | -4.64e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.08e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.67e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 118 |\n", + "| time_elapsed | 786 |\n", + "| total_timesteps | 241664 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.8e+20 |\n", + "| n_updates | 1170 |\n", + "| policy_gradient_loss | -1.2e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.3e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 119 |\n", + "| time_elapsed | 792 |\n", + "| total_timesteps | 243712 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.04e+20 |\n", + "| n_updates | 1180 |\n", + "| policy_gradient_loss | -3.09e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.8e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 120 |\n", + "| time_elapsed | 799 |\n", + "| total_timesteps | 245760 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.45e+20 |\n", + "| n_updates | 1190 |\n", + "| policy_gradient_loss | 4.03e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.61e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 121 |\n", + "| time_elapsed | 805 |\n", + "| total_timesteps | 247808 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.63e+20 |\n", + "| n_updates | 1200 |\n", + "| policy_gradient_loss | -5.43e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.45e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 122 |\n", + "| time_elapsed | 811 |\n", + "| total_timesteps | 249856 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.99e+20 |\n", + "| n_updates | 1210 |\n", + "| policy_gradient_loss | 1.86e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.59e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 123 |\n", + "| time_elapsed | 818 |\n", + "| total_timesteps | 251904 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.85e+20 |\n", + "| n_updates | 1220 |\n", + "| policy_gradient_loss | -1.14e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.44e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 124 |\n", + "| time_elapsed | 824 |\n", + "| total_timesteps | 253952 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.3e+20 |\n", + "| n_updates | 1230 |\n", + "| policy_gradient_loss | 2.21e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.75e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 125 |\n", + "| time_elapsed | 830 |\n", + "| total_timesteps | 256000 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.37e+20 |\n", + "| n_updates | 1240 |\n", + "| policy_gradient_loss | -4.63e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.79e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 126 |\n", + "| time_elapsed | 836 |\n", + "| total_timesteps | 258048 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.11e+20 |\n", + "| n_updates | 1250 |\n", + "| policy_gradient_loss | -5.74e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.72e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 127 |\n", + "| time_elapsed | 843 |\n", + "| total_timesteps | 260096 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.79e+20 |\n", + "| n_updates | 1260 |\n", + "| policy_gradient_loss | 3.38e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.82e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 128 |\n", + "| time_elapsed | 849 |\n", + "| total_timesteps | 262144 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.91e+20 |\n", + "| n_updates | 1270 |\n", + "| policy_gradient_loss | 3.41e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.62e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 129 |\n", + "| time_elapsed | 856 |\n", + "| total_timesteps | 264192 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.33e+20 |\n", + "| n_updates | 1280 |\n", + "| policy_gradient_loss | 4.79e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.22e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 130 |\n", + "| time_elapsed | 862 |\n", + "| total_timesteps | 266240 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.22e+20 |\n", + "| n_updates | 1290 |\n", + "| policy_gradient_loss | -1.57e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.75e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 131 |\n", + "| time_elapsed | 868 |\n", + "| total_timesteps | 268288 |\n", + "| train/ | |\n", + "| approx_kl | 1.4551915e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.14e+20 |\n", + "| n_updates | 1300 |\n", + "| policy_gradient_loss | -3.55e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.36e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 132 |\n", + "| time_elapsed | 875 |\n", + "| total_timesteps | 270336 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.27e+20 |\n", + "| n_updates | 1310 |\n", + "| policy_gradient_loss | 6.04e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.36e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 133 |\n", + "| time_elapsed | 881 |\n", + "| total_timesteps | 272384 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.09e+20 |\n", + "| n_updates | 1320 |\n", + "| policy_gradient_loss | -5.18e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.34e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 134 |\n", + "| time_elapsed | 887 |\n", + "| total_timesteps | 274432 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.58e+20 |\n", + "| n_updates | 1330 |\n", + "| policy_gradient_loss | -1.22e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.27e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 135 |\n", + "| time_elapsed | 894 |\n", + "| total_timesteps | 276480 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.35e+20 |\n", + "| n_updates | 1340 |\n", + "| policy_gradient_loss | -2.39e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.82e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 136 |\n", + "| time_elapsed | 900 |\n", + "| total_timesteps | 278528 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.12e+20 |\n", + "| n_updates | 1350 |\n", + "| policy_gradient_loss | -2.61e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.26e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 137 |\n", + "| time_elapsed | 907 |\n", + "| total_timesteps | 280576 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.59e+20 |\n", + "| n_updates | 1360 |\n", + "| policy_gradient_loss | -4.31e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.52e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 138 |\n", + "| time_elapsed | 913 |\n", + "| total_timesteps | 282624 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.44e+20 |\n", + "| n_updates | 1370 |\n", + "| policy_gradient_loss | 1.35e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.26e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 139 |\n", + "| time_elapsed | 919 |\n", + "| total_timesteps | 284672 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.68e+20 |\n", + "| n_updates | 1380 |\n", + "| policy_gradient_loss | 4.9e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.15e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 140 |\n", + "| time_elapsed | 926 |\n", + "| total_timesteps | 286720 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.53e+20 |\n", + "| n_updates | 1390 |\n", + "| policy_gradient_loss | -3.98e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.89e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 141 |\n", + "| time_elapsed | 932 |\n", + "| total_timesteps | 288768 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 5.04e+20 |\n", + "| n_updates | 1400 |\n", + "| policy_gradient_loss | 3.41e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.59e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 142 |\n", + "| time_elapsed | 938 |\n", + "| total_timesteps | 290816 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.64e+20 |\n", + "| n_updates | 1410 |\n", + "| policy_gradient_loss | -2.99e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.82e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 143 |\n", + "| time_elapsed | 944 |\n", + "| total_timesteps | 292864 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.05e+20 |\n", + "| n_updates | 1420 |\n", + "| policy_gradient_loss | -1.03e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.54e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 144 |\n", + "| time_elapsed | 951 |\n", + "| total_timesteps | 294912 |\n", + "| train/ | |\n", + "| approx_kl | 8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.29e+20 |\n", + "| n_updates | 1430 |\n", + "| policy_gradient_loss | -5.52e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.57e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 145 |\n", + "| time_elapsed | 957 |\n", + "| total_timesteps | 296960 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.79e+20 |\n", + "| n_updates | 1440 |\n", + "| policy_gradient_loss | 9.34e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.39e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 146 |\n", + "| time_elapsed | 963 |\n", + "| total_timesteps | 299008 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.6e+20 |\n", + "| n_updates | 1450 |\n", + "| policy_gradient_loss | -1.21e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.85e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 147 |\n", + "| time_elapsed | 970 |\n", + "| total_timesteps | 301056 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.8e+20 |\n", + "| n_updates | 1460 |\n", + "| policy_gradient_loss | -2.56e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.97e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 148 |\n", + "| time_elapsed | 976 |\n", + "| total_timesteps | 303104 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.77e+20 |\n", + "| n_updates | 1470 |\n", + "| policy_gradient_loss | -5.72e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.11e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 149 |\n", + "| time_elapsed | 983 |\n", + "| total_timesteps | 305152 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.35e+20 |\n", + "| n_updates | 1480 |\n", + "| policy_gradient_loss | -3.17e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.27e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 150 |\n", + "| time_elapsed | 989 |\n", + "| total_timesteps | 307200 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.86e+20 |\n", + "| n_updates | 1490 |\n", + "| policy_gradient_loss | -1.05e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.64e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 151 |\n", + "| time_elapsed | 995 |\n", + "| total_timesteps | 309248 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.36e+20 |\n", + "| n_updates | 1500 |\n", + "| policy_gradient_loss | -5.21e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.21e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 152 |\n", + "| time_elapsed | 1002 |\n", + "| total_timesteps | 311296 |\n", + "| train/ | |\n", + "| approx_kl | 1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4e+20 |\n", + "| n_updates | 1510 |\n", + "| policy_gradient_loss | 2.01e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.84e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 153 |\n", + "| time_elapsed | 1008 |\n", + "| total_timesteps | 313344 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.96e+20 |\n", + "| n_updates | 1520 |\n", + "| policy_gradient_loss | 1.63e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.87e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 154 |\n", + "| time_elapsed | 1014 |\n", + "| total_timesteps | 315392 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.64e+20 |\n", + "| n_updates | 1530 |\n", + "| policy_gradient_loss | -3.11e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.15e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 155 |\n", + "| time_elapsed | 1020 |\n", + "| total_timesteps | 317440 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.85e+20 |\n", + "| n_updates | 1540 |\n", + "| policy_gradient_loss | -8.58e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.28e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 156 |\n", + "| time_elapsed | 1027 |\n", + "| total_timesteps | 319488 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.16e+20 |\n", + "| n_updates | 1550 |\n", + "| policy_gradient_loss | -2.01e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.75e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 157 |\n", + "| time_elapsed | 1033 |\n", + "| total_timesteps | 321536 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.25e+20 |\n", + "| n_updates | 1560 |\n", + "| policy_gradient_loss | -1.96e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.13e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 158 |\n", + "| time_elapsed | 1040 |\n", + "| total_timesteps | 323584 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.72e+20 |\n", + "| n_updates | 1570 |\n", + "| policy_gradient_loss | -2.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.97e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 159 |\n", + "| time_elapsed | 1046 |\n", + "| total_timesteps | 325632 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.67e+20 |\n", + "| n_updates | 1580 |\n", + "| policy_gradient_loss | -1.06e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.72e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 160 |\n", + "| time_elapsed | 1052 |\n", + "| total_timesteps | 327680 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.86e+20 |\n", + "| n_updates | 1590 |\n", + "| policy_gradient_loss | -3.42e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.59e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 161 |\n", + "| time_elapsed | 1058 |\n", + "| total_timesteps | 329728 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.92e+20 |\n", + "| n_updates | 1600 |\n", + "| policy_gradient_loss | 4.93e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.36e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 162 |\n", + "| time_elapsed | 1065 |\n", + "| total_timesteps | 331776 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.62e+20 |\n", + "| n_updates | 1610 |\n", + "| policy_gradient_loss | -2.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.25e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 163 |\n", + "| time_elapsed | 1072 |\n", + "| total_timesteps | 333824 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.58e+20 |\n", + "| n_updates | 1620 |\n", + "| policy_gradient_loss | -5.06e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.1e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 164 |\n", + "| time_elapsed | 1078 |\n", + "| total_timesteps | 335872 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.62e+20 |\n", + "| n_updates | 1630 |\n", + "| policy_gradient_loss | 4.85e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.93e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 165 |\n", + "| time_elapsed | 1084 |\n", + "| total_timesteps | 337920 |\n", + "| train/ | |\n", + "| approx_kl | -1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.69e+20 |\n", + "| n_updates | 1640 |\n", + "| policy_gradient_loss | -3.42e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.06e+20 |\n", + "--------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 166 |\n", + "| time_elapsed | 1091 |\n", + "| total_timesteps | 339968 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.86e+20 |\n", + "| n_updates | 1650 |\n", + "| policy_gradient_loss | -1.39e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.25e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 167 |\n", + "| time_elapsed | 1097 |\n", + "| total_timesteps | 342016 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.36e+20 |\n", + "| n_updates | 1660 |\n", + "| policy_gradient_loss | 2.51e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.95e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 168 |\n", + "| time_elapsed | 1103 |\n", + "| total_timesteps | 344064 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.84e+20 |\n", + "| n_updates | 1670 |\n", + "| policy_gradient_loss | 1.31e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.74e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 169 |\n", + "| time_elapsed | 1110 |\n", + "| total_timesteps | 346112 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.85e+20 |\n", + "| n_updates | 1680 |\n", + "| policy_gradient_loss | 2.5e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.09e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 170 |\n", + "| time_elapsed | 1116 |\n", + "| total_timesteps | 348160 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.58e+20 |\n", + "| n_updates | 1690 |\n", + "| policy_gradient_loss | -1.57e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.12e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 171 |\n", + "| time_elapsed | 1123 |\n", + "| total_timesteps | 350208 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.88e+20 |\n", + "| n_updates | 1700 |\n", + "| policy_gradient_loss | -2.62e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.43e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 172 |\n", + "| time_elapsed | 1129 |\n", + "| total_timesteps | 352256 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.39e+20 |\n", + "| n_updates | 1710 |\n", + "| policy_gradient_loss | -5.16e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.11e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 173 |\n", + "| time_elapsed | 1136 |\n", + "| total_timesteps | 354304 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.92e+20 |\n", + "| n_updates | 1720 |\n", + "| policy_gradient_loss | 1.08e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.98e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 174 |\n", + "| time_elapsed | 1143 |\n", + "| total_timesteps | 356352 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.47e+20 |\n", + "| n_updates | 1730 |\n", + "| policy_gradient_loss | 3.02e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.93e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 175 |\n", + "| time_elapsed | 1149 |\n", + "| total_timesteps | 358400 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.88e+20 |\n", + "| n_updates | 1740 |\n", + "| policy_gradient_loss | 4.55e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.44e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 176 |\n", + "| time_elapsed | 1156 |\n", + "| total_timesteps | 360448 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.04e+20 |\n", + "| n_updates | 1750 |\n", + "| policy_gradient_loss | -1.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.06e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 177 |\n", + "| time_elapsed | 1163 |\n", + "| total_timesteps | 362496 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.63e+20 |\n", + "| n_updates | 1760 |\n", + "| policy_gradient_loss | 7.19e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.68e+20 |\n", + "---------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 178 |\n", + "| time_elapsed | 1170 |\n", + "| total_timesteps | 364544 |\n", + "| train/ | |\n", + "| approx_kl | -1.4551915e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.73e+20 |\n", + "| n_updates | 1770 |\n", + "| policy_gradient_loss | -4.8e-10 |\n", + "| std | 1 |\n", + "| value_loss | 8.09e+20 |\n", + "--------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 179 |\n", + "| time_elapsed | 1176 |\n", + "| total_timesteps | 366592 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.99e+20 |\n", + "| n_updates | 1780 |\n", + "| policy_gradient_loss | -5.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.05e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 180 |\n", + "| time_elapsed | 1183 |\n", + "| total_timesteps | 368640 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.6e+20 |\n", + "| n_updates | 1790 |\n", + "| policy_gradient_loss | -4.17e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.91e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 181 |\n", + "| time_elapsed | 1190 |\n", + "| total_timesteps | 370688 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.23e+20 |\n", + "| n_updates | 1800 |\n", + "| policy_gradient_loss | 3.55e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.3e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 182 |\n", + "| time_elapsed | 1197 |\n", + "| total_timesteps | 372736 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.88e+20 |\n", + "| n_updates | 1810 |\n", + "| policy_gradient_loss | 3.87e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.55e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 183 |\n", + "| time_elapsed | 1203 |\n", + "| total_timesteps | 374784 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.95e+20 |\n", + "| n_updates | 1820 |\n", + "| policy_gradient_loss | -5.22e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.24e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 184 |\n", + "| time_elapsed | 1210 |\n", + "| total_timesteps | 376832 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.49e+20 |\n", + "| n_updates | 1830 |\n", + "| policy_gradient_loss | 3.16e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.13e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 185 |\n", + "| time_elapsed | 1217 |\n", + "| total_timesteps | 378880 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.05e+20 |\n", + "| n_updates | 1840 |\n", + "| policy_gradient_loss | 8.54e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.17e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 186 |\n", + "| time_elapsed | 1223 |\n", + "| total_timesteps | 380928 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.91e+20 |\n", + "| n_updates | 1850 |\n", + "| policy_gradient_loss | 4.47e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.75e+20 |\n", + "--------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.66e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 187 |\n", + "| time_elapsed | 1230 |\n", + "| total_timesteps | 382976 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.65e+20 |\n", + "| n_updates | 1860 |\n", + "| policy_gradient_loss | 1.96e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.6e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 311 |\n", + "| iterations | 188 |\n", + "| time_elapsed | 1237 |\n", + "| total_timesteps | 385024 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.39e+20 |\n", + "| n_updates | 1870 |\n", + "| policy_gradient_loss | -4.21e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.77e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 189 |\n", + "| time_elapsed | 1244 |\n", + "| total_timesteps | 387072 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.16e+20 |\n", + "| n_updates | 1880 |\n", + "| policy_gradient_loss | -2.6e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.65e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 190 |\n", + "| time_elapsed | 1251 |\n", + "| total_timesteps | 389120 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.93e+20 |\n", + "| n_updates | 1890 |\n", + "| policy_gradient_loss | 1.39e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.18e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.65e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 191 |\n", + "| time_elapsed | 1258 |\n", + "| total_timesteps | 391168 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.31e+20 |\n", + "| n_updates | 1900 |\n", + "| policy_gradient_loss | -1.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.92e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.64e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 192 |\n", + "| time_elapsed | 1265 |\n", + "| total_timesteps | 393216 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.42e+20 |\n", + "| n_updates | 1910 |\n", + "| policy_gradient_loss | 3.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.98e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 193 |\n", + "| time_elapsed | 1273 |\n", + "| total_timesteps | 395264 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.98e+20 |\n", + "| n_updates | 1920 |\n", + "| policy_gradient_loss | -9.79e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.83e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.63e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 194 |\n", + "| time_elapsed | 1280 |\n", + "| total_timesteps | 397312 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.5e+20 |\n", + "| n_updates | 1930 |\n", + "| policy_gradient_loss | 1.64e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.09e+20 |\n", + "--------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 195 |\n", + "| time_elapsed | 1286 |\n", + "| total_timesteps | 399360 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.15e+20 |\n", + "| n_updates | 1940 |\n", + "| policy_gradient_loss | 1.91e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.95e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 196 |\n", + "| time_elapsed | 1293 |\n", + "| total_timesteps | 401408 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.39e+20 |\n", + "| n_updates | 1950 |\n", + "| policy_gradient_loss | -3.75e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.44e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 197 |\n", + "| time_elapsed | 1300 |\n", + "| total_timesteps | 403456 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.92e+20 |\n", + "| n_updates | 1960 |\n", + "| policy_gradient_loss | 2.71e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.42e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 198 |\n", + "| time_elapsed | 1307 |\n", + "| total_timesteps | 405504 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.41e+20 |\n", + "| n_updates | 1970 |\n", + "| policy_gradient_loss | 2.62e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.96e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 199 |\n", + "| time_elapsed | 1314 |\n", + "| total_timesteps | 407552 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.46e+20 |\n", + "| n_updates | 1980 |\n", + "| policy_gradient_loss | 6.41e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.64e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.62e+11 |\n", + "| time/ | |\n", + "| fps | 310 |\n", + "| iterations | 200 |\n", + "| time_elapsed | 1321 |\n", + "| total_timesteps | 409600 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.18e+20 |\n", + "| n_updates | 1990 |\n", + "| policy_gradient_loss | -2.67e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.2e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 201 |\n", + "| time_elapsed | 1328 |\n", + "| total_timesteps | 411648 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.13e+20 |\n", + "| n_updates | 2000 |\n", + "| policy_gradient_loss | 7.92e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.34e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 202 |\n", + "| time_elapsed | 1335 |\n", + "| total_timesteps | 413696 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.53e+20 |\n", + "| n_updates | 2010 |\n", + "| policy_gradient_loss | -3.51e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.15e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 203 |\n", + "| time_elapsed | 1341 |\n", + "| total_timesteps | 415744 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.48e+20 |\n", + "| n_updates | 2020 |\n", + "| policy_gradient_loss | 1.69e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.11e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.61e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 204 |\n", + "| time_elapsed | 1348 |\n", + "| total_timesteps | 417792 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.07e+20 |\n", + "| n_updates | 2030 |\n", + "| policy_gradient_loss | -4.74e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.08e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.6e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 205 |\n", + "| time_elapsed | 1355 |\n", + "| total_timesteps | 419840 |\n", + "| train/ | |\n", + "| approx_kl | 1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.17e+20 |\n", + "| n_updates | 2040 |\n", + "| policy_gradient_loss | -3.94e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.87e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 206 |\n", + "| time_elapsed | 1362 |\n", + "| total_timesteps | 421888 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.76e+20 |\n", + "| n_updates | 2050 |\n", + "| policy_gradient_loss | 3.55e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.46e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 207 |\n", + "| time_elapsed | 1369 |\n", + "| total_timesteps | 423936 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.56e+20 |\n", + "| n_updates | 2060 |\n", + "| policy_gradient_loss | -1.75e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.04e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 208 |\n", + "| time_elapsed | 1376 |\n", + "| total_timesteps | 425984 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.79e+20 |\n", + "| n_updates | 2070 |\n", + "| policy_gradient_loss | -1.17e-08 |\n", + "| std | 1 |\n", + "| value_loss | 6.1e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 209 |\n", + "| time_elapsed | 1383 |\n", + "| total_timesteps | 428032 |\n", + "| train/ | |\n", + "| approx_kl | 2.6193447e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.41e+20 |\n", + "| n_updates | 2080 |\n", + "| policy_gradient_loss | 9.74e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.75e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 210 |\n", + "| time_elapsed | 1390 |\n", + "| total_timesteps | 430080 |\n", + "| train/ | |\n", + "| approx_kl | 1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.14e+20 |\n", + "| n_updates | 2090 |\n", + "| policy_gradient_loss | -5.02e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.2e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 211 |\n", + "| time_elapsed | 1397 |\n", + "| total_timesteps | 432128 |\n", + "| train/ | |\n", + "| approx_kl | 1.7462298e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.71e+20 |\n", + "| n_updates | 2100 |\n", + "| policy_gradient_loss | 3.52e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.23e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 212 |\n", + "| time_elapsed | 1403 |\n", + "| total_timesteps | 434176 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.44e+20 |\n", + "| n_updates | 2110 |\n", + "| policy_gradient_loss | 2.95e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.7e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 213 |\n", + "| time_elapsed | 1410 |\n", + "| total_timesteps | 436224 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.98e+20 |\n", + "| n_updates | 2120 |\n", + "| policy_gradient_loss | -4.42e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.62e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 214 |\n", + "| time_elapsed | 1417 |\n", + "| total_timesteps | 438272 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.47e+20 |\n", + "| n_updates | 2130 |\n", + "| policy_gradient_loss | -1.04e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.43e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 309 |\n", + "| iterations | 215 |\n", + "| time_elapsed | 1424 |\n", + "| total_timesteps | 440320 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.59e+20 |\n", + "| n_updates | 2140 |\n", + "| policy_gradient_loss | -6.51e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.91e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 216 |\n", + "| time_elapsed | 1431 |\n", + "| total_timesteps | 442368 |\n", + "| train/ | |\n", + "| approx_kl | -1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.57e+20 |\n", + "| n_updates | 2150 |\n", + "| policy_gradient_loss | -2.45e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.32e+20 |\n", + "--------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 217 |\n", + "| time_elapsed | 1438 |\n", + "| total_timesteps | 444416 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.72e+20 |\n", + "| n_updates | 2160 |\n", + "| policy_gradient_loss | 8.11e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.65e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.56e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 218 |\n", + "| time_elapsed | 1445 |\n", + "| total_timesteps | 446464 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.21e+20 |\n", + "| n_updates | 2170 |\n", + "| policy_gradient_loss | -4.02e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.93e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 219 |\n", + "| time_elapsed | 1452 |\n", + "| total_timesteps | 448512 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 5.96e-08 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.26e+20 |\n", + "| n_updates | 2180 |\n", + "| policy_gradient_loss | 2.76e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.84e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 220 |\n", + "| time_elapsed | 1460 |\n", + "| total_timesteps | 450560 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.45e+20 |\n", + "| n_updates | 2190 |\n", + "| policy_gradient_loss | 4.91e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.09e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.56e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 221 |\n", + "| time_elapsed | 1467 |\n", + "| total_timesteps | 452608 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.4e+20 |\n", + "| n_updates | 2200 |\n", + "| policy_gradient_loss | 2.73e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.29e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 222 |\n", + "| time_elapsed | 1474 |\n", + "| total_timesteps | 454656 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.69e+20 |\n", + "| n_updates | 2210 |\n", + "| policy_gradient_loss | 3.09e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.16e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.56e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 223 |\n", + "| time_elapsed | 1481 |\n", + "| total_timesteps | 456704 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.4e+20 |\n", + "| n_updates | 2220 |\n", + "| policy_gradient_loss | -2.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.83e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 224 |\n", + "| time_elapsed | 1487 |\n", + "| total_timesteps | 458752 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.23e+20 |\n", + "| n_updates | 2230 |\n", + "| policy_gradient_loss | 6.27e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.98e+20 |\n", + "------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 225 |\n", + "| time_elapsed | 1494 |\n", + "| total_timesteps | 460800 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.31e+20 |\n", + "| n_updates | 2240 |\n", + "| policy_gradient_loss | 1.68e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.97e+20 |\n", + "--------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 226 |\n", + "| time_elapsed | 1501 |\n", + "| total_timesteps | 462848 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.34e+20 |\n", + "| n_updates | 2250 |\n", + "| policy_gradient_loss | 1.34e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.23e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.56e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 227 |\n", + "| time_elapsed | 1508 |\n", + "| total_timesteps | 464896 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.18e+20 |\n", + "| n_updates | 2260 |\n", + "| policy_gradient_loss | -1.05e-08 |\n", + "| std | 1 |\n", + "| value_loss | 7.11e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 228 |\n", + "| time_elapsed | 1514 |\n", + "| total_timesteps | 466944 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.67e+20 |\n", + "| n_updates | 2270 |\n", + "| policy_gradient_loss | 9.96e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.88e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 229 |\n", + "| time_elapsed | 1521 |\n", + "| total_timesteps | 468992 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.16e+20 |\n", + "| n_updates | 2280 |\n", + "| policy_gradient_loss | -1.16e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.4e+20 |\n", + "---------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.54e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 230 |\n", + "| time_elapsed | 1528 |\n", + "| total_timesteps | 471040 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.63e+20 |\n", + "| n_updates | 2290 |\n", + "| policy_gradient_loss | 1.45e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.63e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 231 |\n", + "| time_elapsed | 1534 |\n", + "| total_timesteps | 473088 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.48e+20 |\n", + "| n_updates | 2300 |\n", + "| policy_gradient_loss | -3.34e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.29e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 232 |\n", + "| time_elapsed | 1541 |\n", + "| total_timesteps | 475136 |\n", + "| train/ | |\n", + "| approx_kl | 8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.87e+20 |\n", + "| n_updates | 2310 |\n", + "| policy_gradient_loss | -4.51e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.1e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 233 |\n", + "| time_elapsed | 1548 |\n", + "| total_timesteps | 477184 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.61e+20 |\n", + "| n_updates | 2320 |\n", + "| policy_gradient_loss | 2.87e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.51e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 234 |\n", + "| time_elapsed | 1554 |\n", + "| total_timesteps | 479232 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.23e+20 |\n", + "| n_updates | 2330 |\n", + "| policy_gradient_loss | -5.88e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.24e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 235 |\n", + "| time_elapsed | 1562 |\n", + "| total_timesteps | 481280 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.22e+20 |\n", + "| n_updates | 2340 |\n", + "| policy_gradient_loss | -2.38e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.19e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 236 |\n", + "| time_elapsed | 1568 |\n", + "| total_timesteps | 483328 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.3e+20 |\n", + "| n_updates | 2350 |\n", + "| policy_gradient_loss | -1.01e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.09e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 237 |\n", + "| time_elapsed | 1575 |\n", + "| total_timesteps | 485376 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.9e+20 |\n", + "| n_updates | 2360 |\n", + "| policy_gradient_loss | -2.64e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.85e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 238 |\n", + "| time_elapsed | 1582 |\n", + "| total_timesteps | 487424 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.95e+20 |\n", + "| n_updates | 2370 |\n", + "| policy_gradient_loss | -3.15e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.31e+20 |\n", + "-------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 239 |\n", + "| time_elapsed | 1588 |\n", + "| total_timesteps | 489472 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.58e+20 |\n", + "| n_updates | 2380 |\n", + "| policy_gradient_loss | 5.26e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.31e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 240 |\n", + "| time_elapsed | 1595 |\n", + "| total_timesteps | 491520 |\n", + "| train/ | |\n", + "| approx_kl | -2.6193447e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.16e+20 |\n", + "| n_updates | 2390 |\n", + "| policy_gradient_loss | -2.83e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.82e+20 |\n", + "--------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 241 |\n", + "| time_elapsed | 1602 |\n", + "| total_timesteps | 493568 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.39e+20 |\n", + "| n_updates | 2400 |\n", + "| policy_gradient_loss | 2.51e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.5e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 242 |\n", + "| time_elapsed | 1608 |\n", + "| total_timesteps | 495616 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.06e+20 |\n", + "| n_updates | 2410 |\n", + "| policy_gradient_loss | -6.89e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.06e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 243 |\n", + "| time_elapsed | 1615 |\n", + "| total_timesteps | 497664 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.18e+20 |\n", + "| n_updates | 2420 |\n", + "| policy_gradient_loss | -3.41e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.76e+20 |\n", + "---------------------------------------\n", + "--------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.49e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 244 |\n", + "| time_elapsed | 1622 |\n", + "| total_timesteps | 499712 |\n", + "| train/ | |\n", + "| approx_kl | -1.1641532e-10 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.97e+20 |\n", + "| n_updates | 2430 |\n", + "| policy_gradient_loss | -7.16e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.5e+20 |\n", + "--------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 245 |\n", + "| time_elapsed | 1628 |\n", + "| total_timesteps | 501760 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.48e+20 |\n", + "| n_updates | 2440 |\n", + "| policy_gradient_loss | -2.81e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.7e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 246 |\n", + "| time_elapsed | 1635 |\n", + "| total_timesteps | 503808 |\n", + "| train/ | |\n", + "| approx_kl | 8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.45e+20 |\n", + "| n_updates | 2450 |\n", + "| policy_gradient_loss | 4.88e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.99e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 247 |\n", + "| time_elapsed | 1641 |\n", + "| total_timesteps | 505856 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4e+20 |\n", + "| n_updates | 2460 |\n", + "| policy_gradient_loss | -3.23e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.21e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 248 |\n", + "| time_elapsed | 1649 |\n", + "| total_timesteps | 507904 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.82e+20 |\n", + "| n_updates | 2470 |\n", + "| policy_gradient_loss | -6.64e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.33e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 249 |\n", + "| time_elapsed | 1655 |\n", + "| total_timesteps | 509952 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.87e+20 |\n", + "| n_updates | 2480 |\n", + "| policy_gradient_loss | -2e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.59e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 250 |\n", + "| time_elapsed | 1662 |\n", + "| total_timesteps | 512000 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.21e+20 |\n", + "| n_updates | 2490 |\n", + "| policy_gradient_loss | 1.76e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.45e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 251 |\n", + "| time_elapsed | 1669 |\n", + "| total_timesteps | 514048 |\n", + "| train/ | |\n", + "| approx_kl | -2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.16e+20 |\n", + "| n_updates | 2500 |\n", + "| policy_gradient_loss | 1.24e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.98e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 252 |\n", + "| time_elapsed | 1675 |\n", + "| total_timesteps | 516096 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.71e+20 |\n", + "| n_updates | 2510 |\n", + "| policy_gradient_loss | -3.89e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.88e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 253 |\n", + "| time_elapsed | 1682 |\n", + "| total_timesteps | 518144 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.46e+20 |\n", + "| n_updates | 2520 |\n", + "| policy_gradient_loss | 6.97e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.13e+20 |\n", + "-------------------------------------------\n", + "--------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 254 |\n", + "| time_elapsed | 1689 |\n", + "| total_timesteps | 520192 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.25e+20 |\n", + "| n_updates | 2530 |\n", + "| policy_gradient_loss | -3.5e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.1e+20 |\n", + "--------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.51e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 255 |\n", + "| time_elapsed | 1695 |\n", + "| total_timesteps | 522240 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.07e+20 |\n", + "| n_updates | 2540 |\n", + "| policy_gradient_loss | -3.22e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.55e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 256 |\n", + "| time_elapsed | 1702 |\n", + "| total_timesteps | 524288 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.85e+20 |\n", + "| n_updates | 2550 |\n", + "| policy_gradient_loss | 5.49e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.33e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 257 |\n", + "| time_elapsed | 1708 |\n", + "| total_timesteps | 526336 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3e+20 |\n", + "| n_updates | 2560 |\n", + "| policy_gradient_loss | 3.43e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.02e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.52e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 258 |\n", + "| time_elapsed | 1715 |\n", + "| total_timesteps | 528384 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.28e+20 |\n", + "| n_updates | 2570 |\n", + "| policy_gradient_loss | -1.44e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.71e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 259 |\n", + "| time_elapsed | 1722 |\n", + "| total_timesteps | 530432 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.34e+20 |\n", + "| n_updates | 2580 |\n", + "| policy_gradient_loss | -4.22e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.16e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.53e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 260 |\n", + "| time_elapsed | 1728 |\n", + "| total_timesteps | 532480 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.15e+20 |\n", + "| n_updates | 2590 |\n", + "| policy_gradient_loss | 6.02e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.2e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 261 |\n", + "| time_elapsed | 1735 |\n", + "| total_timesteps | 534528 |\n", + "| train/ | |\n", + "| approx_kl | -8.731149e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 5e+20 |\n", + "| n_updates | 2600 |\n", + "| policy_gradient_loss | 4.48e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.02e+20 |\n", + "-------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 262 |\n", + "| time_elapsed | 1741 |\n", + "| total_timesteps | 536576 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.15e+20 |\n", + "| n_updates | 2610 |\n", + "| policy_gradient_loss | -5.84e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.04e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.55e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 263 |\n", + "| time_elapsed | 1748 |\n", + "| total_timesteps | 538624 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.89e+20 |\n", + "| n_updates | 2620 |\n", + "| policy_gradient_loss | -3.63e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.62e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 264 |\n", + "| time_elapsed | 1754 |\n", + "| total_timesteps | 540672 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.73e+20 |\n", + "| n_updates | 2630 |\n", + "| policy_gradient_loss | -2.57e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.07e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 265 |\n", + "| time_elapsed | 1761 |\n", + "| total_timesteps | 542720 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.3e+20 |\n", + "| n_updates | 2640 |\n", + "| policy_gradient_loss | -5.97e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.42e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 266 |\n", + "| time_elapsed | 1768 |\n", + "| total_timesteps | 544768 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.58e+20 |\n", + "| n_updates | 2650 |\n", + "| policy_gradient_loss | -1.79e-09 |\n", + "| std | 1 |\n", + "| value_loss | 8.32e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 267 |\n", + "| time_elapsed | 1774 |\n", + "| total_timesteps | 546816 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.5e+20 |\n", + "| n_updates | 2660 |\n", + "| policy_gradient_loss | 6.34e-09 |\n", + "| std | 1 |\n", + "| value_loss | 6.9e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 268 |\n", + "| time_elapsed | 1781 |\n", + "| total_timesteps | 548864 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.39e+20 |\n", + "| n_updates | 2670 |\n", + "| policy_gradient_loss | 2.16e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.59e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 269 |\n", + "| time_elapsed | 1788 |\n", + "| total_timesteps | 550912 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.3e+20 |\n", + "| n_updates | 2680 |\n", + "| policy_gradient_loss | 6.27e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.17e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.57e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 270 |\n", + "| time_elapsed | 1795 |\n", + "| total_timesteps | 552960 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.02e+20 |\n", + "| n_updates | 2690 |\n", + "| policy_gradient_loss | -1.96e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.08e+20 |\n", + "------------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 271 |\n", + "| time_elapsed | 1801 |\n", + "| total_timesteps | 555008 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.98e+20 |\n", + "| n_updates | 2700 |\n", + "| policy_gradient_loss | 4.61e-10 |\n", + "| std | 1 |\n", + "| value_loss | 7.76e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 272 |\n", + "| time_elapsed | 1808 |\n", + "| total_timesteps | 557056 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 2.9e+20 |\n", + "| n_updates | 2710 |\n", + "| policy_gradient_loss | -6.43e-10 |\n", + "| std | 1 |\n", + "| value_loss | 6.67e+20 |\n", + "---------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 273 |\n", + "| time_elapsed | 1815 |\n", + "| total_timesteps | 559104 |\n", + "| train/ | |\n", + "| approx_kl | 2.910383e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.34e+20 |\n", + "| n_updates | 2720 |\n", + "| policy_gradient_loss | 7.83e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.53e+20 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 274 |\n", + "| time_elapsed | 1821 |\n", + "| total_timesteps | 561152 |\n", + "| train/ | |\n", + "| approx_kl | 5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.21e+20 |\n", + "| n_updates | 2730 |\n", + "| policy_gradient_loss | -2.04e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.01e+20 |\n", + "------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.58e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 275 |\n", + "| time_elapsed | 1828 |\n", + "| total_timesteps | 563200 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.16e+20 |\n", + "| n_updates | 2740 |\n", + "| policy_gradient_loss | -8.44e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.38e+20 |\n", + "---------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 308 |\n", + "| iterations | 276 |\n", + "| time_elapsed | 1835 |\n", + "| total_timesteps | 565248 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.1e+20 |\n", + "| n_updates | 2750 |\n", + "| policy_gradient_loss | 2.26e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.69e+20 |\n", + "---------------------------------------\n", + "-------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 277 |\n", + "| time_elapsed | 1841 |\n", + "| total_timesteps | 567296 |\n", + "| train/ | |\n", + "| approx_kl | -5.820766e-11 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -1.19e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 4.02e+20 |\n", + "| n_updates | 2760 |\n", + "| policy_gradient_loss | -1.79e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.44e+20 |\n", + "-------------------------------------------\n", + "---------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 601 |\n", + "| ep_rew_mean | 8.59e+11 |\n", + "| time/ | |\n", + "| fps | 307 |\n", + "| iterations | 278 |\n", + "| time_elapsed | 1848 |\n", + "| total_timesteps | 569344 |\n", + "| train/ | |\n", + "| approx_kl | 0.0 |\n", + "| clip_fraction | 0 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -2.38e-07 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 3.88e+20 |\n", + "| n_updates | 2770 |\n", + "| policy_gradient_loss | 3.25e-09 |\n", + "| std | 1 |\n", + "| value_loss | 7.91e+20 |\n", + "---------------------------------------\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/buffers.py:605: UserWarning: This system does not have apparently enough memory to store the complete replay buffer 80.85GB > 30.69GB\n", - " warnings.warn(\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[25], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mstable_baselines3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PPO\n\u001b[1;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m PPO(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMultiInputPolicy\u001b[39m\u001b[38;5;124m\"\u001b[39m, wrapped_env, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1_000_000\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/ppo/ppo.py:311\u001b[0m, in \u001b[0;36mPPO.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28mself\u001b[39m: SelfPPO,\n\u001b[1;32m 304\u001b[0m total_timesteps: \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 309\u001b[0m progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 310\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SelfPPO:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_interval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[43m \u001b[49m\u001b[43mtb_log_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_log_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 316\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/on_policy_algorithm.py:336\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mep_info_buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dump_logs(iteration)\n\u001b[0;32m--> 336\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m callback\u001b[38;5;241m.\u001b[39mon_training_end()\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/ppo/ppo.py:213\u001b[0m, in \u001b[0;36mPPO.train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_space, spaces\u001b[38;5;241m.\u001b[39mDiscrete):\n\u001b[1;32m 210\u001b[0m \u001b[38;5;66;03m# Convert discrete action from float to long\u001b[39;00m\n\u001b[1;32m 211\u001b[0m actions \u001b[38;5;241m=\u001b[39m rollout_data\u001b[38;5;241m.\u001b[39mactions\u001b[38;5;241m.\u001b[39mlong()\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[0;32m--> 213\u001b[0m values, log_prob, entropy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpolicy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate_actions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrollout_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobservations\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mactions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 214\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m 215\u001b[0m \u001b[38;5;66;03m# Normalize advantage\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/policies.py:739\u001b[0m, in \u001b[0;36mActorCriticPolicy.evaluate_actions\u001b[0;34m(self, obs, actions)\u001b[0m\n\u001b[1;32m 737\u001b[0m distribution \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_action_dist_from_latent(latent_pi)\n\u001b[1;32m 738\u001b[0m log_prob \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mlog_prob(actions)\n\u001b[0;32m--> 739\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue_net\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlatent_vf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 740\u001b[0m entropy \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mentropy()\n\u001b[1;32m 741\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m values, log_prob, entropy\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/linear.py:125\u001b[0m, in \u001b[0;36mLinear.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 601 |\n", - "| ep_rew_mean | 1.25e+12 |\n", - "| time/ | |\n", - "| episodes | 4 |\n", - "| fps | 77 |\n", - "| time_elapsed | 31 |\n", - "| total_timesteps | 2404 |\n", - "| train/ | |\n", - "| actor_loss | -2.13e+10 |\n", - "| critic_loss | 1.37e+15 |\n", - "| ent_coef | 1.92 |\n", - "| ent_coef_loss | -6.08 |\n", - "| learning_rate | 0.0003 |\n", - "| n_updates | 2303 |\n", - "----------------------------------\n", - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 601 |\n", - "| ep_rew_mean | 1.26e+12 |\n", - "| time/ | |\n", - "| episodes | 8 |\n", - "| fps | 75 |\n", - "| time_elapsed | 63 |\n", - "| total_timesteps | 4808 |\n", - "| train/ | |\n", - "| actor_loss | -4.31e+10 |\n", - "| critic_loss | 5e+15 |\n", - "| ent_coef | 3.95 |\n", - "| ent_coef_loss | -12.9 |\n", - "| learning_rate | 0.0003 |\n", - "| n_updates | 4707 |\n", - "----------------------------------\n", - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 601 |\n", - "| ep_rew_mean | 1.27e+12 |\n", - "| time/ | |\n", - "| episodes | 12 |\n", - "| fps | 75 |\n", - "| time_elapsed | 95 |\n", - "| total_timesteps | 7212 |\n", - "| train/ | |\n", - "| actor_loss | -5.97e+10 |\n", - "| critic_loss | 1.1e+16 |\n", - "| ent_coef | 8.13 |\n", - "| ent_coef_loss | -19.6 |\n", - "| learning_rate | 0.0003 |\n", - "| n_updates | 7111 |\n", - "----------------------------------\n", - "----------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 601 |\n", - "| ep_rew_mean | 1.27e+12 |\n", - "| time/ | |\n", - "| episodes | 16 |\n", - "| fps | 75 |\n", - "| time_elapsed | 126 |\n", - "| total_timesteps | 9616 |\n", - "| train/ | |\n", - "| actor_loss | -7.87e+10 |\n", - "| critic_loss | 1.94e+16 |\n", - "| ent_coef | 16.7 |\n", - "| ent_coef_loss | -26.4 |\n", - "| learning_rate | 0.0003 |\n", - "| n_updates | 9515 |\n", - "----------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ diff --git a/pdm.lock b/pdm.lock index 0c00cbd..cc7b888 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:a1edba805cc867a6316cea6c754bc112f0f79046b604ee515c541505f9c546f7" +content_hash = "sha256:81e26f71acf1a583b21280b235fa2ac16165ac824ae8483bd391b88406421aa4" [[metadata.targets]] requires_python = ">=3.12,<3.13" @@ -522,13 +522,13 @@ files = [ [[package]] name = "jax" -version = "0.4.35" +version = "0.4.37" requires_python = ">=3.10" summary = "Differentiate, compile, and transform Numpy code." groups = ["default"] marker = "python_version >= \"3.12\" and python_version < \"3.13\"" dependencies = [ - "jaxlib<=0.4.35,>=0.4.34", + "jaxlib<=0.4.37,>=0.4.36", "ml-dtypes>=0.4.0", "numpy>=1.24", "numpy>=1.26.0; python_version >= \"3.12\"", @@ -537,13 +537,83 @@ dependencies = [ "scipy>=1.11.1; python_version >= \"3.12\"", ] files = [ - {file = "jax-0.4.35-py3-none-any.whl", hash = "sha256:fa99e909a31424abfec750019a6dd36f6acc18a6e7d40e2c0086b932cc351325"}, - {file = "jax-0.4.35.tar.gz", hash = "sha256:c0c986993026b10bf6f607fecb7417377460254640766ce40f1fef3fd139c12e"}, + {file = "jax-0.4.37-py3-none-any.whl", hash = "sha256:bdc0686d7e5a944e2d38026eae632214d98dd2d91869cbcedbf1c11298ae3e3e"}, + {file = "jax-0.4.37.tar.gz", hash = "sha256:7774f3d9e23fe199c65589c680c5a5be87a183b89598421a632d8245222b637b"}, +] + +[[package]] +name = "jax-cuda12-pjrt" +version = "0.4.36" +summary = "JAX XLA PJRT Plugin for NVIDIA GPUs" +groups = ["default"] +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" +files = [ + {file = "jax_cuda12_pjrt-0.4.36-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1dfc0bec0820ba801b61e9421064b6e58238c430b4ad8f54043323d93c0217c6"}, + {file = "jax_cuda12_pjrt-0.4.36-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e3c3705d8db7d63da9abfaebf06f5cd0667f5acb0748a5c5eb00d80041e922ed"}, +] + +[[package]] +name = "jax-cuda12-plugin" +version = "0.4.36" +requires_python = ">=3.10" +summary = "JAX Plugin for NVIDIA GPUs" +groups = ["default"] +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" +dependencies = [ + "jax-cuda12-pjrt==0.4.36", +] +files = [ + {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:6a0b0c2bdc1da2eea2c20723a1e8f39b3cda67d24c665de936647e8091f5790d"}, + {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:5d4727fb519fedc06a9a984d5a0714804d81ef126a2cb60cefd5cbc4a3ea2627"}, +] + +[[package]] +name = "jax-cuda12-plugin" +version = "0.4.36" +extras = ["with_cuda"] +requires_python = ">=3.10" +summary = "JAX Plugin for NVIDIA GPUs" +groups = ["default"] +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" +dependencies = [ + "jax-cuda12-plugin==0.4.36", + "nvidia-cublas-cu12>=12.1.3.1", + "nvidia-cuda-cupti-cu12>=12.1.105", + "nvidia-cuda-nvcc-cu12>=12.6.85", + "nvidia-cuda-runtime-cu12>=12.1.105", + "nvidia-cudnn-cu12<10.0,>=9.1", + "nvidia-cufft-cu12>=11.0.2.54", + "nvidia-cusolver-cu12>=11.4.5.107", + "nvidia-cusparse-cu12>=12.1.0.106", + "nvidia-nccl-cu12>=2.18.1", + "nvidia-nvjitlink-cu12>=12.1.105", +] +files = [ + {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:6a0b0c2bdc1da2eea2c20723a1e8f39b3cda67d24c665de936647e8091f5790d"}, + {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:5d4727fb519fedc06a9a984d5a0714804d81ef126a2cb60cefd5cbc4a3ea2627"}, +] + +[[package]] +name = "jax" +version = "0.4.37" +extras = ["cuda12"] +requires_python = ">=3.10" +summary = "Differentiate, compile, and transform Numpy code." +groups = ["default"] +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" +dependencies = [ + "jax-cuda12-plugin[with_cuda]<=0.4.37,>=0.4.36", + "jax==0.4.37", + "jaxlib==0.4.36", +] +files = [ + {file = "jax-0.4.37-py3-none-any.whl", hash = "sha256:bdc0686d7e5a944e2d38026eae632214d98dd2d91869cbcedbf1c11298ae3e3e"}, + {file = "jax-0.4.37.tar.gz", hash = "sha256:7774f3d9e23fe199c65589c680c5a5be87a183b89598421a632d8245222b637b"}, ] [[package]] name = "jaxlib" -version = "0.4.35" +version = "0.4.36" requires_python = ">=3.10" summary = "XLA library for JAX" groups = ["default"] @@ -555,16 +625,11 @@ dependencies = [ "scipy>=1.11.1; python_version >= \"3.12\"", ] files = [ - {file = "jaxlib-0.4.35-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:b44f3e6e9fb748bb43df914356cf9d0d0c9a6e446a12c21fe843db25ed0df65f"}, - {file = "jaxlib-0.4.35-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:504d0a2e2117724359d99d7e3663022686dcdddd85aa14bdad02008d444481ad"}, - {file = "jaxlib-0.4.35-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:187cb6929dc139b75d952d67c33118473c1b4105525a3e5607f064e7b8efdc74"}, - {file = "jaxlib-0.4.35-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:04d1db3bf0050d120238bfb9b686b58fefcc4d9dd9e2d96aecd3f68a1f1f5e0a"}, - {file = "jaxlib-0.4.35-cp312-cp312-win_amd64.whl", hash = "sha256:dddffce48d7e6057008999aed2d8a9daecc57a48c45a4f8c475e00880eb2e41d"}, - {file = "jaxlib-0.4.35-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:14aeac3fea2ca1d5afb1878f72470b159cc89adb2633c5f0686f5d7c39f2ac18"}, - {file = "jaxlib-0.4.35-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e8c9579e20d5ecdc4f61336cdd032710cb8c38d5ae9c4fce0cf9ea031cef21cb"}, - {file = "jaxlib-0.4.35-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:7b11ad7c13f7f96f36efd303711ecac425f19ca2ddf65cf1be1541167a959ee5"}, - {file = "jaxlib-0.4.35-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:0be3cf9df879d9ae1b5b92fc281f77d21f522fcbae1a48a02661026bbd9b9309"}, - {file = "jaxlib-0.4.35-cp313-cp313-win_amd64.whl", hash = "sha256:330c090bb9af413f552d8a92d097e50baec6b75823430fb2966a49f5298d4c43"}, + {file = "jaxlib-0.4.36-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:5972aa85f6d771ecc8cc72148c1fa64250ca33cbdf2bf24407cdee8a5299d25d"}, + {file = "jaxlib-0.4.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5597908cd10418c0b42e9af807fc8112036703533cf501a5255a8fbf4011867e"}, + {file = "jaxlib-0.4.36-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:fbbabaa287378a78a3cf9cbe4de30a1f6f19a99116feb4bd687ff256415cd442"}, + {file = "jaxlib-0.4.36-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:be295abc209c980817db0488f21f1fbc0644f87326522895e2b9b64729106357"}, + {file = "jaxlib-0.4.36-cp312-cp312-win_amd64.whl", hash = "sha256:d4bbb5d2970628dcd3dabc28a5b97a1125ad3e06a1be822d340fd9f06f7449b3"}, ] [[package]] @@ -861,7 +926,7 @@ version = "12.4.5.8" requires_python = ">=3" summary = "CUBLAS native runtime libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" files = [ {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"}, {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"}, @@ -874,13 +939,26 @@ version = "12.4.127" requires_python = ">=3" summary = "CUDA profiling tools runtime libs." groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"}, {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"}, {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922"}, ] +[[package]] +name = "nvidia-cuda-nvcc-cu12" +version = "12.6.85" +requires_python = ">=3" +summary = "CUDA nvcc" +groups = ["default"] +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" +files = [ + {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d75d9d74599f4d7c0865df19ed21b739e6cb77a6497a3f73d6f61e8038a765e4"}, + {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d2edd5531b13e3daac8ffee9fc2b70a147e6088b2af2565924773d63d36d294"}, + {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:aa04742337973dcb5bcccabb590edc8834c60ebfaf971847888d24ffef6c46b5"}, +] + [[package]] name = "nvidia-cuda-nvrtc-cu12" version = "12.4.127" @@ -900,7 +978,7 @@ version = "12.4.127" requires_python = ">=3" summary = "CUDA Runtime native Libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"}, {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"}, @@ -913,7 +991,7 @@ version = "9.1.0.70" requires_python = ">=3" summary = "cuDNN runtime libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" dependencies = [ "nvidia-cublas-cu12", ] @@ -928,7 +1006,7 @@ version = "11.2.1.3" requires_python = ">=3" summary = "CUFFT native runtime libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" dependencies = [ "nvidia-nvjitlink-cu12", ] @@ -957,7 +1035,7 @@ version = "11.6.1.9" requires_python = ">=3" summary = "CUDA solver native runtime libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" dependencies = [ "nvidia-cublas-cu12", "nvidia-cusparse-cu12", @@ -975,7 +1053,7 @@ version = "12.3.1.170" requires_python = ">=3" summary = "CUSPARSE native runtime libraries" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" dependencies = [ "nvidia-nvjitlink-cu12", ] @@ -991,7 +1069,7 @@ version = "2.21.5" requires_python = ">=3" summary = "NVIDIA Collective Communication Library (NCCL) Runtime" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" files = [ {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"}, ] @@ -1002,7 +1080,7 @@ version = "12.4.127" requires_python = ">=3" summary = "Nvidia JIT LTO Library" groups = ["default"] -marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\"" +marker = "python_version >= \"3.12\" and python_version < \"3.13\"" files = [ {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"}, {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"}, diff --git a/pyproject.toml b/pyproject.toml index 157f8bd..3ac6c98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "A solar car racing simulation library and GUI tool" authors = [ {name = "saji", email = "saji@saji.dev"}, ] -dependencies = ["pyqtgraph>=0.13.7", "jax>=0.4.35", "pytest>=8.3.3", "pyside6>=6.8.0.2", "matplotlib>=3.9.2", "gymnasium[jax]>=1.0.0", "pyvista>=0.44.2", "pyvistaqt>=0.11.1", "stable-baselines3>=2.4.0"] +dependencies = ["pyqtgraph>=0.13.7", "jax[cuda12]>=0.4.37", "pytest>=8.3.3", "pyside6>=6.8.0.2", "matplotlib>=3.9.2", "gymnasium[jax]>=1.0.0", "pyvista>=0.44.2", "pyvistaqt>=0.11.1", "stable-baselines3>=2.4.0"] requires-python = ">=3.10,<3.13" readme = "README.md" license = {text = "MIT"} diff --git a/src/solarcarsim/physsim.py b/src/solarcarsim/physsim.py index 94bc090..368f6db 100644 --- a/src/solarcarsim/physsim.py +++ b/src/solarcarsim/physsim.py @@ -1,3 +1,5 @@ +"""Physical equations and models for building a simulation environment""" + import jax.numpy as jnp import jax from jax import grad, jit, vmap, lax @@ -5,7 +7,11 @@ from functools import partial from typing import NamedTuple, Tuple -from solarcarsim.noise import fractal_noise_1d, generate_elevation_profile, generate_wind_field +from solarcarsim.noise import ( + fractal_noise_1d, + generate_wind_field, +) + class MotorParams(NamedTuple): kv: float @@ -16,33 +22,37 @@ class MotorParams(NamedTuple): class BatteryParams(NamedTuple): - shape: Tuple[int, int] # (series,parallel) array of batteries - resistance: float # ohms - initial_energy: float # joules + shape: Tuple[int, int] # (series,parallel) array of batteries + resistance: float # ohms + initial_energy: float # joules + class CarParams(NamedTuple): - """ Physical Data for Solar Car Parameters """ - mass: float = 800 # kg - frontal_area: float = 1.3 # m^2 - drag_coeff: float = 0.18 # drag coefficient, dimensionless - rolling_coeff: float = 0.002 # rolling resistance. - moter_eff: float = 0.93 # 0 < x < 1 scaling factor - wheel_radius: float = 0.23 # wheel radius in meters - max_speed: float = 30.0 # m/s top speed - solar_area: float = 5.0 # m^2, typically 5.0 - solar_eff: float = 0.20 # 0 < x < 1, typically ~.25 - n_motors: int = 2 # how many motors we have. - motor: MotorParams = MotorParams(8.43, 1.1, 100.0, 0.001, 0.001) # mitsuba m2090 estimate - battery: BatteryParams = BatteryParams((36,19), 0.0126, 66.6e3) # freebasing 50s pack. + """Physical Data for Solar Car Parameters""" + mass: float = 800 # kg + frontal_area: float = 1.3 # m^2 + drag_coeff: float = 0.18 # drag coefficient, dimensionless + rolling_coeff: float = 0.002 # rolling resistance. + moter_eff: float = 0.93 # 0 < x < 1 scaling factor + wheel_radius: float = 0.23 # wheel radius in meters + max_speed: float = 30.0 # m/s top speed + solar_area: float = 5.0 # m^2, typically 5.0 + solar_eff: float = 0.20 # 0 < x < 1, typically ~.25 + n_motors: int = 2 # how many motors we have. + motor: MotorParams = MotorParams( + 8.43, 1.1, 100.0, 0.001, 0.001 + ) # mitsuba m2090 estimate + battery: BatteryParams = BatteryParams( + (36, 19), 0.0126, 66.6e3 + ) # freebasing 50s pack. def DefaultCar() -> CarParams: - """ Creates a basic car """ + """Creates a basic car""" return CarParams(1000, 1.3, 0.18, 0.002, 0.85, 5.0, 0.23) - # some physics equations using jax @@ -50,26 +60,31 @@ def DefaultCar() -> CarParams: def normal_force(mass, theta): return mass * 9.8 * jnp.cos(theta) + @jit def downslope_force(mass, theta): return mass * 9.8 * jnp.sin(theta) -@partial(jit, static_argnames=['crr']) + +@partial(jit, static_argnames=["crr"]) def rolling_force(mass, theta, crr): return normal_force(mass, theta) * crr -@partial(jit, static_argnames=['area', 'cd', 'rho']) + +@partial(jit, static_argnames=["area", "cd", "rho"]) def drag_force(u, area, cd, rho): return 0.5 * rho * jnp.pow(u, 2) * cd * area + # we can use those forces above to determine what forces we have to overcome. Sum(F)=0 + # @partial(jit, static_argnums=(2,)) @jit def bldc_power_draw(torque, velocity, params: MotorParams): """ Approximates power draw of a BLDC motor outputting a torque at a given velocity - + Args: torq: Applied force in Newton/meters velocity: Angular velocity in rad/s @@ -77,32 +92,32 @@ def bldc_power_draw(torque, velocity, params: MotorParams): kt: Torque constant (N⋅m/A) friction_coeff: Mechanical friction coefficient iron_loss_coeff: Iron loss coefficient (core losses) - + Returns: Total electrical power draw in Watts """ - + # Current required for torque (simplified relationship) current = torque / params.kt - + # Copper losses (I²R) - copper_losses = params.resistance * current**2 + copper_losses = params.resistance * current**2 # Mechanical friction losses - friction_losses = params.friction_coeff * velocity**2 + friction_losses = params.friction_coeff * velocity**2 # Iron losses (simplified model - primarily dependent on speed) - iron_losses = params.iron_coeff * velocity**2 + iron_losses = params.iron_coeff * velocity**2 # Mechanical power output mechanical_power = torque * velocity - + # Total electrical power input total_power = mechanical_power + copper_losses + friction_losses + iron_losses - + return total_power + # @partial(jit, static_argnames=['resistance', 'kt', 'kv', 'vmax', 'Cf']) @jit def bldc_torque(velocity, current_limit, resistance, kt, kv, vmax, Cf): - bemf = velocity / kv v_avail = jnp.clip(vmax - bemf, 0.0, vmax) current = jnp.clip(v_avail / resistance, 0.0, current_limit) @@ -113,8 +128,15 @@ def bldc_torque(velocity, current_limit, resistance, kt, kv, vmax, Cf): stall_torque = kt * current_limit return jnp.where(velocity < 0.01, stall_torque, net_torque) -@partial(jit, static_argnums=(1,2,)) -def battery_powerloss(current,cell_r, battery_shape: Tuple[int,int]): + +@partial( + jit, + static_argnums=( + 1, + 2, + ), +) +def battery_powerloss(current, cell_r, battery_shape: Tuple[int, int]): r_array = jnp.full(battery_shape, cell_r) branch_current = current / battery_shape[1] I_array = jnp.full(battery_shape, branch_current) @@ -122,7 +144,6 @@ def battery_powerloss(current,cell_r, battery_shape: Tuple[int,int]): return jnp.sum(cell_Ploss) - def forward(state, timestep, control, params: CarParams): # state is (position, time, energy) # control is velocity @@ -130,7 +151,7 @@ def forward(state, timestep, control, params: CarParams): # params is the params dictionary. # returns the next state with (position', time + timestep, energy') # TODO: terrain, weather, solar - + # determine the forces acting on the car. dragf = drag_force(control, params.frontal_area, params.drag_coeff, 1.184) rollf = rolling_force(params.mass, 0, params.rolling_coeff) @@ -139,18 +160,24 @@ def forward(state, timestep, control, params: CarParams): # determine the power needed to make this force tau = params.wheel_radius * totalf pdraw = bldc_power_draw(tau, control, params.motor) - net_power = 0 - pdraw # watts aka j/s - + net_power = 0 - pdraw # watts aka j/s + # TODO: calculate battery-based power losses. # TODO: support regenerative braking when going downhill # TODO: delta x = cos(theta) * velocity * timestep - new_state = jnp.array([state[0] + control * timestep, state[1] + timestep, state[2] + net_power * timestep]) + new_state = jnp.array( + [ + state[0] + control * timestep, + state[1] + timestep, + state[2] + net_power * timestep, + ] + ) return new_state def make_environment(seed): - """ Generate a race environment: terrain function, wind function, wrapped forward function.""" + """Generate a race environment: terrain function, wind function, wrapped forward function.""" key, subkey = jax.random.split(seed) wind = generate_wind_field(subkey, 10000, 600, spatial_scale=1000) key, subkey = jax.random.split(key) @@ -161,46 +188,3 @@ def make_environment(seed): return wind, elevation, slope -@partial(jit, static_argnames=['params']) -def forwardv2(state, control, delta_time, wind, elevation, slope, params): - pos = jnp.astype(jnp.round(state[0]), "int32") - time = jnp.astype(jnp.round(state[1]), "int32") - theta = slope[pos] - - velocity = control * params.max_speed - - # sum up the forces acting on the car - dragf = drag_force(velocity, params.frontal_area, params.drag_coeff, 1.184) - rollf = rolling_force(params.mass, theta, params.rolling_coeff) - hillforce = downslope_force(params.mass, theta) - windf = wind[pos, time] - totalf = dragf + rollf + hillforce + windf - # with the sum of forces, determine the needed torque at the wheels, and then power - tau = params.wheel_radius * totalf - pdraw = bldc_power_draw(tau, velocity, params.motor) - # determine the energy needed to do this power for the time step - net_power = state[2] - delta_time * pdraw # joules - - dpos = jnp.cos(theta) * velocity * delta_time - dist_remaining = 10000.0 - dpos - time_remaining = 600 - (state[1] + delta_time) - return jnp.array([dpos, state[1] + delta_time, net_power, dist_remaining, time_remaining]) - -def reward(state): - progress = state[0] / 10000 * 100 - energy_usage = -10 * state[2] - time_factor = (1.0 - (state[1] / 600)) * 50 - reward = progress + energy_usage + time_factor - return reward -# now we have an environment tuned in. -# we want to take an environment, and bind it to the forward function -def make_simulator(params: CarParams, wind, elevation, slope): - def reward(state): - progress = state[0] / 10000 * 100 - energy_usage = -10 * state[2] - time_factor = (1.0 - (state[1] / 600)) * 50 - reward = progress + energy_usage + time_factor - return reward - return forwardv2, reward - - diff --git a/src/solarcarsim/gym.py b/src/solarcarsim/simv1.py similarity index 63% rename from src/solarcarsim/gym.py rename to src/solarcarsim/simv1.py index ebfe343..dee29b8 100644 --- a/src/solarcarsim/gym.py +++ b/src/solarcarsim/simv1.py @@ -2,10 +2,45 @@ import gymnasium as gym import solarcarsim.physsim as sim import jax import jax.numpy as jnp -import numpy as np -from typing import Any +from jax import jit from functools import partial -from jax import vmap +from solarcarsim.physsim import drag_force, rolling_force, downslope_force, bldc_power_draw + + +@partial(jit, static_argnames=["params"]) +def forwardv2(state, control, delta_time, wind, elevation, slope, params): + pos = jnp.astype(jnp.round(state[0]), "int32") + time = jnp.astype(jnp.round(state[1]), "int32") + theta = slope[pos] + + velocity = control * params.max_speed + + # sum up the forces acting on the car + windspeed = wind[pos, time] + dragf = sim.drag_force(velocity + windspeed, params.frontal_area, params.drag_coeff, 1.184) + rollf = sim.rolling_force(params.mass, theta, params.rolling_coeff) + hillforce = sim.downslope_force(params.mass, theta) + totalf = dragf + rollf + hillforce + # with the sum of forces, determine the needed torque at the wheels, and then power + tau = params.wheel_radius * totalf + pdraw = bldc_power_draw(tau, velocity, params.motor) + # determine the energy needed to do this power for the time step + net_power = state[2] - delta_time * pdraw # joules + + dpos = jnp.cos(theta) * velocity * delta_time + dist_remaining = 10000.0 - dpos + time_remaining = 600 - (state[1] + delta_time) + return jnp.array( + [dpos, state[1] + delta_time, net_power, dist_remaining, time_remaining] + ) + + +def reward(state, prev_energy): + progress = state[0] / 10000 * 100 + energy_usage = 10 * (state[2] - prev_energy) # current energy < previous energy. + time_factor = (1.0 - (state[1] / 600)) * 50 + reward = progress + energy_usage + time_factor + return reward class SolarRaceV1(gym.Env): """A primitive hill climber. Aims to solve the given route optimizing @@ -46,8 +81,8 @@ class SolarRaceV1(gym.Env): self._reset_sim(jax.random.key(seed)) self._timestep = timestep self._car = car - self._simstep = sim.forwardv2 - self._simreward = sim.reward + self._simstep = forwardv2 + self._simreward = reward self.observation_space = gym.spaces.Dict( { @@ -73,8 +108,10 @@ class SolarRaceV1(gym.Env): def step(self, action): wind, elevation, slope = self._environment + old_energy = self._state[2] + self._state = self._simstep(self._state, action, self._timestep,wind, elevation, slope, self._car) - reward = self._simreward(self._state)[0] + reward = self._simreward(self._state, old_energy)[0] terminated = False truncated = False if jnp.all(self._state[0] > 10000): @@ -82,4 +119,6 @@ class SolarRaceV1(gym.Env): if self._state[1] > 600: truncated = True - return self._get_obs(), reward, terminated, truncated, {} \ No newline at end of file + return self._get_obs(), reward, terminated, truncated, {} + + diff --git a/src/solarcarsim/simv2.py b/src/solarcarsim/simv2.py new file mode 100644 index 0000000..60a66d2 --- /dev/null +++ b/src/solarcarsim/simv2.py @@ -0,0 +1,14 @@ +""" Second-generation simulator. More functional, cleaner code, faster """ + +from typing import NamedTuple +import jax +import jax.numpy as jnp + + +class SimState(NamedTuple): + position: float + time: float + energy: float + distance_remaining: float + time_remaining: float +